[InstCombine] Simplify and/or of icmp eq with op replacement (#70335)

and/or in logical (select) form benefit from generic simplifications via simplifyWithOpReplaced(). However, the corresponding fold for plain and/or currently does not exist. Similar to selects, there are two general cases for this fold (illustrated with `and`, but there are `or` conjugates). The basic case is something like `(a == b) & c`, where the replacement of a with b or b with a inside c allows it to fold to true or false. Then the whole operation will fold to either false or `a == b`. The second case is something like `(a != b) & c`, where the replacement inside c allows it to fold to false. In that case, the operand can be replaced with c, because in the case where a == b (and thus the icmp is false), c itself will already be false. As the test diffs show, this catches quite a lot of patterns in existing test coverage. This also obsoletes quite a few existing special-case and/or of icmp folds we have (e.g. simplifyAndOrOfICmpsWithLimitConst), but I haven't removed anything as part of this patch in the interest of risk mitigation. Fixes #69050. Fixes #69091.
llvm · Oct 30, 2023 · 1770a2e · 1770a2e
1 parent 5c8a71d
commit 1770a2e
Show file tree

Hide file tree

Showing 14 changed files with 241 additions and 587 deletions.
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -2025,6 +2025,52 @@ static Value *simplifyAndOrOfCmps(const SimplifyQuery &Q, Value *Op0,
   return nullptr;
 }
 
+static Value *simplifyWithOpReplaced(Value *V, Value *Op, Value *RepOp,
+                                     const SimplifyQuery &Q,
+                                     bool AllowRefinement,
+                                     SmallVectorImpl<Instruction *> *DropFlags,
+                                     unsigned MaxRecurse);
+
+static Value *simplifyAndOrWithICmpEq(unsigned Opcode, Value *Op0, Value *Op1,
+                                      const SimplifyQuery &Q,
+                                      unsigned MaxRecurse) {
+  assert((Opcode == Instruction::And || Opcode == Instruction::Or) &&
+         "Must be and/or");
+  ICmpInst::Predicate Pred;
+  Value *A, *B;
+  if (!match(Op0, m_ICmp(Pred, m_Value(A), m_Value(B))) ||
+      !ICmpInst::isEquality(Pred) || !MaxRecurse--)
+    return nullptr;
+
+  auto Simplify = [&](Value *Res) -> Value * {
+    // and (icmp eq a, b), x implies (a==b) inside x.
+    // or (icmp ne a, b), x implies (a==b) inside x.
+    // If x simplifies to true/false, we can simplify the and/or.
+    if (Pred ==
+        (Opcode == Instruction::And ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE))
+      return simplifyBinOp(Opcode, Op0, Res, Q, MaxRecurse);
+    // If we have and (icmp ne a, b), x and for a==b we can simplify x to false,
+    // then we can drop the icmp, as x will already be false in the case where
+    // the icmp is false. Similar for or and true.
+    if (Res == ConstantExpr::getBinOpAbsorber(Opcode, Res->getType()))
+      return Op1;
+    return nullptr;
+  };
+
+  // Increment MaxRecurse again, because simplifyWithOpReplaced() does its own
+  // decrement.
+  if (Value *Res =
+          simplifyWithOpReplaced(Op1, A, B, Q, /* AllowRefinement */ true,
+                                 /* DropFlags */ nullptr, MaxRecurse + 1))
+    return Simplify(Res);
+  if (Value *Res =
+          simplifyWithOpReplaced(Op1, B, A, Q, /* AllowRefinement */ true,
+                                 /* DropFlags */ nullptr, MaxRecurse + 1))
+    return Simplify(Res);
+
+  return nullptr;
+}
+
 /// Given a bitwise logic op, check if the operands are add/sub with a common
 /// source value and inverted constant (identity: C - X -> ~(X + ~C)).
 static Value *simplifyLogicOfAddSub(Value *Op0, Value *Op1,
@@ -2159,6 +2205,13 @@ static Value *simplifyAndInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
       isKnownToBeAPowerOfTwo(Op0, Q.DL, /*OrZero*/ true, 0, Q.AC, Q.CxtI, Q.DT))
     return Constant::getNullValue(Op0->getType());
 
+  if (Value *V =
+          simplifyAndOrWithICmpEq(Instruction::And, Op0, Op1, Q, MaxRecurse))
+    return V;
+  if (Value *V =
+          simplifyAndOrWithICmpEq(Instruction::And, Op1, Op0, Q, MaxRecurse))
+    return V;
+
   if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, true))
     return V;
 
@@ -2435,6 +2488,13 @@ static Value *simplifyOrInst(Value *Op0, Value *Op1, const SimplifyQuery &Q,
       match(Op0, m_LShr(m_Specific(X), m_Specific(Y))))
     return Op1;
 
+  if (Value *V =
+          simplifyAndOrWithICmpEq(Instruction::Or, Op0, Op1, Q, MaxRecurse))
+    return V;
+  if (Value *V =
+          simplifyAndOrWithICmpEq(Instruction::Or, Op1, Op0, Q, MaxRecurse))
+    return V;
+
   if (Value *V = simplifyAndOrOfCmps(Q, Op0, Op1, false))
     return V;
 

diff --git a/llvm/test/CodeGen/PowerPC/pr45448.ll b/llvm/test/CodeGen/PowerPC/pr45448.ll
@@ -20,20 +20,16 @@ define hidden void @julia_tryparse_internal_45896() #0 {
 ; CHECK-NEXT:  .LBB0_6: # %fail194
 ; CHECK-NEXT:  .LBB0_7: # %L670
 ; CHECK-NEXT:    li r5, -3
-; CHECK-NEXT:    cmpdi r3, 0
 ; CHECK-NEXT:    sradi r4, r3, 63
 ; CHECK-NEXT:    rldic r5, r5, 4, 32
-; CHECK-NEXT:    crnot 4*cr5+lt, eq
 ; CHECK-NEXT:    mulhdu r3, r3, r5
 ; CHECK-NEXT:    maddld r6, r4, r5, r3
 ; CHECK-NEXT:    cmpld cr1, r6, r3
 ; CHECK-NEXT:    mulhdu. r3, r4, r5
-; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_10
-; CHECK-NEXT:  # %bb.8: # %L670
 ; CHECK-NEXT:    crorc 4*cr5+lt, 4*cr1+lt, eq
-; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_10
-; CHECK-NEXT:  # %bb.9: # %L917
-; CHECK-NEXT:  .LBB0_10: # %L994
+; CHECK-NEXT:    bc 4, 4*cr5+lt, .LBB0_9
+; CHECK-NEXT:  # %bb.8: # %L917
+; CHECK-NEXT:  .LBB0_9: # %L994
 top:
   %0 = load i64, ptr undef, align 8
   %1 = icmp ne i64 %0, 0

diff --git a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-smul_ov.ll
@@ -47,11 +47,7 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) {
 
 define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @n3_wrong_pred(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
-; CHECK-NEXT:    [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
-; CHECK-NEXT:    [[AND:%.*]] = and i1 [[SMUL_OV]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[AND]]
+; CHECK-NEXT:    ret i1 false
 ;
   %cmp = icmp eq i4 %size, 0 ; not 'ne'
   %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)
@@ -63,10 +59,7 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) {
 define i1 @n4_not_and(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @n4_not_and(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[SMUL:%.*]] = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
-; CHECK-NEXT:    [[SMUL_OV:%.*]] = extractvalue { i4, i1 } [[SMUL]], 1
-; CHECK-NEXT:    [[AND:%.*]] = or i1 [[SMUL_OV]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[AND]]
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %cmp = icmp ne i4 %size, 0
   %smul = tail call { i4, i1 } @llvm.smul.with.overflow.i4(i4 %size, i4 %nmemb)

diff --git a/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll b/llvm/test/Transforms/InstCombine/div-by-0-guard-before-umul_ov.ll
@@ -47,11 +47,7 @@ define i1 @n2_wrong_size(i4 %size0, i4 %size1, i4 %nmemb) {
 
 define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @n3_wrong_pred(
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
-; CHECK-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
-; CHECK-NEXT:    [[AND:%.*]] = and i1 [[UMUL_OV]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[AND]]
+; CHECK-NEXT:    ret i1 false
 ;
   %cmp = icmp eq i4 %size, 0 ; not 'ne'
   %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)
@@ -63,10 +59,7 @@ define i1 @n3_wrong_pred(i4 %size, i4 %nmemb) {
 define i1 @n4_not_and(i4 %size, i4 %nmemb) {
 ; CHECK-LABEL: @n4_not_and(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i4 [[SIZE:%.*]], 0
-; CHECK-NEXT:    [[UMUL:%.*]] = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 [[SIZE]], i4 [[NMEMB:%.*]])
-; CHECK-NEXT:    [[UMUL_OV:%.*]] = extractvalue { i4, i1 } [[UMUL]], 1
-; CHECK-NEXT:    [[AND:%.*]] = or i1 [[UMUL_OV]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[AND]]
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %cmp = icmp ne i4 %size, 0
   %umul = tail call { i4, i1 } @llvm.umul.with.overflow.i4(i4 %size, i4 %nmemb)

diff --git a/llvm/test/Transforms/InstCombine/ispow2.ll b/llvm/test/Transforms/InstCombine/ispow2.ll
@@ -392,9 +392,7 @@ define i1 @is_pow2_ctpop_wrong_pred1(i32 %x) {
 ; CHECK-LABEL: @is_pow2_ctpop_wrong_pred1(
 ; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ugt i32 [[T0]], 2
-; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT:    [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp ugt i32 %t0, 2
@@ -946,9 +944,7 @@ define i1 @is_pow2or0_ctpop_wrong_pred1(i32 %x) {
 ; CHECK-LABEL: @is_pow2or0_ctpop_wrong_pred1(
 ; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[T0]], 1
-; CHECK-NEXT:    [[ISZERO:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT:    [[R:%.*]] = or i1 [[ISZERO]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp ne i32 %t0, 1
@@ -959,11 +955,7 @@ define i1 @is_pow2or0_ctpop_wrong_pred1(i32 %x) {
 
 define i1 @is_pow2or0_ctpop_wrong_pred2(i32 %x) {
 ; CHECK-LABEL: @is_pow2or0_ctpop_wrong_pred2(
-; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[T0]], 1
-; CHECK-NEXT:    [[ISZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT:    [[R:%.*]] = or i1 [[ISZERO]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 true
 ;
   %t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp ne i32 %t0, 1
@@ -1149,9 +1141,7 @@ define i1 @isnot_pow2nor0_ctpop_wrong_pred1(i32 %x) {
 ; CHECK-LABEL: @isnot_pow2nor0_ctpop_wrong_pred1(
 ; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[T0]], 1
-; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT:    [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp eq i32 %t0, 1
@@ -1162,11 +1152,7 @@ define i1 @isnot_pow2nor0_ctpop_wrong_pred1(i32 %x) {
 
 define i1 @isnot_pow2nor0_ctpop_wrong_pred2(i32 %x) {
 ; CHECK-LABEL: @isnot_pow2nor0_ctpop_wrong_pred2(
-; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]]), !range [[RNG0]]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[T0]], 1
-; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT:    [[R:%.*]] = and i1 [[NOTZERO]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp eq i32 %t0, 1

diff --git a/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll b/llvm/test/Transforms/InstSimplify/and-or-icmp-ctpop.ll
@@ -40,11 +40,7 @@ define <2 x i1> @eq_or_non_0_commute(<2 x i32> %x) {
 
 define i1 @eq_or_non_0_wrong_pred1(i32 %x) {
 ; CHECK-LABEL: @eq_or_non_0_wrong_pred1(
-; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]])
-; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[T0]], 10
-; CHECK-NEXT:    [[NOTZERO:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT:    [[R:%.*]] = or i1 [[NOTZERO]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 true
 ;
   %t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp ne i32 %t0, 10
@@ -90,9 +86,7 @@ define i1 @ne_and_is_0_wrong_pred1(i32 %x) {
 ; CHECK-LABEL: @ne_and_is_0_wrong_pred1(
 ; CHECK-NEXT:    [[T0:%.*]] = tail call i32 @llvm.ctpop.i32(i32 [[X:%.*]])
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[T0]], 10
-; CHECK-NEXT:    [[ISZERO:%.*]] = icmp eq i32 [[X]], 0
-; CHECK-NEXT:    [[R:%.*]] = or i1 [[ISZERO]], [[CMP]]
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 [[CMP]]
 ;
   %t0 = tail call i32 @llvm.ctpop.i32(i32 %x)
   %cmp = icmp ne i32 %t0, 10