Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Fold select (or/and (icmp eq/ne a, b), other_cond), a, b to select (other_cond), a, b #76203

Closed
wants to merge 2 commits into from

Conversation

XChy
Copy link
Member

@XChy XChy commented Dec 22, 2023

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 22, 2023

@llvm/pr-subscribers-llvm-transforms

Author: XChy (XChy)

Changes

Fixes #75784
Alive2 proof: https://alive2.llvm.org/ce/z/PNBSjJ

Note: when V1 == undef and a == V2, such transform is not valid: https://alive2.llvm.org/ce/z/BDT7Q9


Full diff: https://github.com/llvm/llvm-project/pull/76203.diff

3 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineInternal.h (+1-1)
  • (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+66)
  • (modified) llvm/test/Transforms/InstCombine/select-and-or.ll (+174-24)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index 9e76a0cf17b183..0667b08af99f51 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -442,7 +442,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombinerImpl final
   // into simplier select instruction using isImpliedCondition.
   Instruction *foldAndOrOfSelectUsingImpliedCond(Value *Op, SelectInst &SI,
                                                  bool IsAnd);
-
+  Instruction *foldSelectOfAndOr(SelectInst &SI);
   Instruction *hoistFNegAboveFMulFDiv(Value *FNegOp, Instruction &FMFSource);
 
 public:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 20bf00344b144b..90d905ac1c69a8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2683,6 +2683,69 @@ Instruction *InstCombinerImpl::foldAndOrOfSelectUsingImpliedCond(Value *Op,
   }
 }
 
+Instruction *InstCombinerImpl::foldSelectOfAndOr(SelectInst &SI) {
+  BinaryOperator *CondVal = dyn_cast<BinaryOperator>(SI.getCondition());
+  if (!CondVal)
+    return nullptr;
+
+  BinaryOperator::BinaryOps BinOpCode = CondVal->getOpcode();
+  bool IsAnd;
+
+  if (BinOpCode == BinaryOperator::Or)
+    IsAnd = false;
+  else if (BinOpCode == BinaryOperator::And)
+    IsAnd = true;
+  else
+    return nullptr;
+
+  Value *TrueVal = SI.getTrueValue(), *FalseVal = SI.getFalseValue();
+
+  // %cmp = icmp eq i32 %a, %v1 (v1 must not be undef)
+  // %cmp1 = icmp eq i32 %a, %v2
+  // %cond = or i1 %cmp, %cmp1
+  // %select = select i1 %cond, i32 %a, i32 %v2
+  // =>
+  // %cond = icmp eq i32 %a, %v1
+  // %select = select i1 %cond, i32 %v1, i32 %v2
+
+  // Or for an inverted version, we fold it like:
+  // %cmp = icmp ne i32 %a, %v1 (v1 must not be undef)
+  // %cmp1 = icmp ne i32 %a, %v2
+  // %cond = and i1 %cmp, %cmp1
+  // %select = select i1 %cond, i32 %v2, i32 %a
+  // =>
+  // %cond = icmp eq i32 %a, %v1
+  // %select = select i1 %cond, i32 %v1, i32 %v2
+
+  Value *A, *V1, *V2;
+  CmpInst::Predicate ExpectedPred =
+      IsAnd ? ICmpInst::ICMP_NE : ICmpInst::ICMP_EQ;
+  CmpInst::Predicate Pred1, Pred2;
+  Value *Cmp1 = CondVal->getOperand(0);
+  Value *Cmp2 = CondVal->getOperand(1);
+  if (match(Cmp1, m_c_ICmp(Pred1, m_Value(A), m_Value(V1))) &&
+      Pred1 == ExpectedPred &&
+      match(Cmp2, m_c_ICmp(Pred2, m_Deferred(A), m_Value(V2))) &&
+      Pred2 == ExpectedPred) {
+    Value *NewFalseVal = IsAnd ? TrueVal : FalseVal;
+    Value *ExpectedA = IsAnd ? FalseVal : TrueVal;
+    if (ExpectedA == A && (NewFalseVal == V1 || NewFalseVal == V2)) {
+      Value *NewTrueVal = NewFalseVal == V1 ? V2 : V1;
+      if (!isGuaranteedNotToBeUndef(NewTrueVal, SQ.AC, &SI, &DT))
+        return nullptr;
+
+      Value *NewCond = NewTrueVal == V1 ? Cmp1 : Cmp2;
+      if (!IsAnd)
+        return SelectInst::Create(NewCond, NewTrueVal, NewFalseVal);
+      // Invert it when original select is inverted
+      return SelectInst::Create(NewCond, NewFalseVal, NewTrueVal);
+    }
+    return nullptr;
+  }
+
+  return nullptr;
+}
+
 // Canonicalize select with fcmp to fabs(). -0.0 makes this tricky. We need
 // fast-math-flags (nsz) or fsub with +0.0 (not fneg) for this to work.
 static Instruction *foldSelectWithFCmpToFabs(SelectInst &SI,
@@ -3409,6 +3472,9 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) {
     }
   }
 
+  if (Instruction *I = foldSelectOfAndOr(SI))
+    return I;
+
   auto *SIFPOp = dyn_cast<FPMathOperator>(&SI);
 
   if (auto *FCmp = dyn_cast<FCmpInst>(CondVal)) {
diff --git a/llvm/test/Transforms/InstCombine/select-and-or.ll b/llvm/test/Transforms/InstCombine/select-and-or.ll
index 7edcd767b86ecb..cf1b6377ff156d 100644
--- a/llvm/test/Transforms/InstCombine/select-and-or.ll
+++ b/llvm/test/Transforms/InstCombine/select-and-or.ll
@@ -613,9 +613,9 @@ define i1 @and_or2_wrong_operand(i1 %a, i1 %b, i1 %c, i1 %d) {
 
 define i1 @and_or3(i1 %a, i1 %b, i32 %x, i32 %y) {
 ; CHECK-LABEL: @and_or3(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C]], i1 true, i1 [[A:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[B:%.*]], i1 [[TMP1]], i1 false
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[A:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[B:%.*]], i1 [[TMP2]], i1 false
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %c = icmp eq i32 %x, %y
@@ -626,9 +626,9 @@ define i1 @and_or3(i1 %a, i1 %b, i32 %x, i32 %y) {
 
 define i1 @and_or3_commuted(i1 %a, i1 %b, i32 %x, i32 %y) {
 ; CHECK-LABEL: @and_or3_commuted(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C]], i1 true, i1 [[A:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[B:%.*]], i1 [[TMP1]], i1 false
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i1 true, i1 [[A:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[B:%.*]], i1 [[TMP2]], i1 false
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %c = icmp eq i32 %x, %y
@@ -665,9 +665,9 @@ define i1 @and_or3_multiuse(i1 %a, i1 %b, i32 %x, i32 %y) {
 
 define <2 x i1> @and_or3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @and_or3_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[A:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[A:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %c = icmp eq <2 x i32> %x, %y
@@ -678,9 +678,9 @@ define <2 x i1> @and_or3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %
 
 define <2 x i1> @and_or3_vec_commuted(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @and_or3_vec_commuted(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[A:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP1]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[A:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[B:%.*]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %c = icmp eq <2 x i32> %x, %y
@@ -877,9 +877,9 @@ entry:
 
 define i1 @or_and3(i1 %a, i1 %b, i32 %x, i32 %y) {
 ; CHECK-LABEL: @or_and3(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 false
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i1 [[B:%.*]], i1 false
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[TMP2]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %c = icmp eq i32 %x, %y
@@ -890,9 +890,9 @@ define i1 @or_and3(i1 %a, i1 %b, i32 %x, i32 %y) {
 
 define i1 @or_and3_commuted(i1 %a, i1 %b, i32 %x, i32 %y) {
 ; CHECK-LABEL: @or_and3_commuted(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select i1 [[C]], i1 [[B:%.*]], i1 false
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i1 [[B:%.*]], i1 false
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[A:%.*]], i1 true, i1 [[TMP2]]
 ; CHECK-NEXT:    ret i1 [[R]]
 ;
   %c = icmp eq i32 %x, %y
@@ -929,9 +929,9 @@ define i1 @or_and3_multiuse(i1 %a, i1 %b, i32 %x, i32 %y) {
 
 define <2 x i1> @or_and3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @or_and3_vec(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %c = icmp eq <2 x i32> %x, %y
@@ -942,9 +942,9 @@ define <2 x i1> @or_and3_vec(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %
 
 define <2 x i1> @or_and3_vec_commuted(<2 x i1> %a, <2 x i1> %b, <2 x i32> %x, <2 x i32> %y) {
 ; CHECK-LABEL: @or_and3_vec_commuted(
-; CHECK-NEXT:    [[C:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[TMP1:%.*]] = select <2 x i1> [[C]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer
-; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[TMP1]]
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x i1> [[B:%.*]], <2 x i1> zeroinitializer
+; CHECK-NEXT:    [[R:%.*]] = select <2 x i1> [[A:%.*]], <2 x i1> <i1 true, i1 true>, <2 x i1> [[TMP2]]
 ; CHECK-NEXT:    ret <2 x i1> [[R]]
 ;
   %c = icmp eq <2 x i32> %x, %y
@@ -965,3 +965,153 @@ define i1 @or_and3_wrong_operand(i1 %a, i1 %b, i32 %x, i32 %y, i1 %d) {
   %r = select i1 %cond, i1 %d, i1 %b
   ret i1 %r
 }
+
+define i32 @and_eq_v1(i32 %a, i32 noundef %v1, i32 %v2)  {
+; CHECK-LABEL: @and_eq_v1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[V1]], i32 [[V2:%.*]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+entry:
+  %cmp = icmp eq i32 %a, %v1
+  %cmp1 = icmp eq i32 %a, %v2
+  %cond = or i1 %cmp, %cmp1
+  %select = select i1 %cond, i32 %a, i32 %v2
+  ret i32 %select
+}
+
+define i32 @and_eq_v2(i32 %a, i32 %v1, i32 noundef %v2)  {
+; CHECK-LABEL: @and_eq_v2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], [[V2:%.*]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP1]], i32 [[V2]], i32 [[V1:%.*]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+entry:
+  %cmp = icmp eq i32 %a, %v1
+  %cmp1 = icmp eq i32 %a, %v2
+  %cond = or i1 %cmp, %cmp1
+  %select = select i1 %cond, i32 %a, i32 %v1
+  ret i32 %select
+}
+
+define i32 @and_ne_v1(i32 %a, i32 noundef %v1, i32 %v2)  {
+; CHECK-LABEL: @and_ne_v1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP_NOT:%.*]] = icmp eq i32 [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP_NOT]], i32 [[V1]], i32 [[V2:%.*]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+entry:
+  %cmp = icmp ne i32 %a, %v1
+  %cmp1 = icmp ne i32 %a, %v2
+  %cond = and i1 %cmp, %cmp1
+  %select = select i1 %cond, i32 %v2, i32 %a
+  ret i32 %select
+}
+
+define i32 @and_ne_v2(i32 %a, i32 %v1, i32 noundef %v2)  {
+; CHECK-LABEL: @and_ne_v2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP1_NOT:%.*]] = icmp eq i32 [[A:%.*]], [[V2:%.*]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP1_NOT]], i32 [[V2]], i32 [[V1:%.*]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+entry:
+  %cmp = icmp ne i32 %a, %v1
+  %cmp1 = icmp ne i32 %a, %v2
+  %cond = and i1 %cmp, %cmp1
+  %select = select i1 %cond, i32 %v1, i32 %a
+  ret i32 %select
+}
+
+define i32 @and_eq_v1_multi_use(i32 %a, i32 noundef %v1, i32 %v2)  {
+; CHECK-LABEL: @and_eq_v1_multi_use(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A]], [[V2:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    call void @use(i1 [[CMP]])
+; CHECK-NEXT:    call void @use(i1 [[CMP1]])
+; CHECK-NEXT:    call void @use(i1 [[COND]])
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[CMP]], i32 [[V1]], i32 [[V2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+entry:
+  %cmp = icmp eq i32 %a, %v1
+  %cmp1 = icmp eq i32 %a, %v2
+  %cond = or i1 %cmp, %cmp1
+  call void @use(i1 %cmp)
+  call void @use(i1 %cmp1)
+  call void @use(i1 %cond)
+  %select = select i1 %cond, i32 %a, i32 %v2
+  ret i32 %select
+}
+
+define <2 x i32> @and_eq_v1_vec(<2 x i32> %a, <2 x i32> noundef %v1, <2 x i32> %v2)  {
+; CHECK-LABEL: @and_eq_v1_vec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq <2 x i32> [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select <2 x i1> [[CMP]], <2 x i32> [[V1]], <2 x i32> [[V2:%.*]]
+; CHECK-NEXT:    ret <2 x i32> [[SELECT]]
+;
+entry:
+  %cmp = icmp eq <2 x i32> %a, %v1
+  %cmp1 = icmp eq <2 x i32> %a, %v2
+  %cond = or <2 x i1> %cmp, %cmp1
+  %select = select <2 x i1> %cond, <2 x i32> %a, <2 x i32> %v2
+  ret <2 x i32> %select
+}
+
+
+define i32 @and_slt_v1_fail(i32 %a, i32 noundef %v1, i32 noundef %v2)  {
+; CHECK-LABEL: @and_slt_v1_fail(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp slt i32 [[A]], [[V2:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], i32 [[A]], i32 [[V2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+entry:
+  %cmp = icmp slt i32 %a, %v1
+  %cmp1 = icmp slt i32 %a, %v2
+  %cond = or i1 %cmp, %cmp1
+  %select = select i1 %cond, i32 %a, i32 %v2
+  ret i32 %select
+}
+
+define i32 @and_ne_different_operands_fail(i32 %a, i32 %b, i32 noundef %v1, i32 noundef %v2)  {
+; CHECK-LABEL: @and_ne_different_operands_fail(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp ne i32 [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[B:%.*]], [[V2:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = and i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], i32 [[V1]], i32 [[A]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+entry:
+  %cmp = icmp ne i32 %a, %v1
+  %cmp1 = icmp ne i32 %b, %v2
+  %cond = and i1 %cmp, %cmp1
+  %select = select i1 %cond, i32 %v1, i32 %a
+  ret i32 %select
+}
+
+define i32 @and_eq_v1_undef_fail(i32 %a, i32 %v1, i32 %v2)  {
+; CHECK-LABEL: @and_eq_v1_undef_fail(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A:%.*]], [[V1:%.*]]
+; CHECK-NEXT:    [[CMP1:%.*]] = icmp eq i32 [[A]], [[V2:%.*]]
+; CHECK-NEXT:    [[COND:%.*]] = or i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT:    [[SELECT:%.*]] = select i1 [[COND]], i32 [[A]], i32 [[V2]]
+; CHECK-NEXT:    ret i32 [[SELECT]]
+;
+entry:
+  %cmp = icmp eq i32 %a, %v1
+  %cmp1 = icmp eq i32 %a, %v2
+  %cond = or i1 %cmp, %cmp1
+  %select = select i1 %cond, i32 %a, i32 %v2
+  ret i32 %select
+}

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Dec 22, 2023
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This transform looks unnecessarily specific to me. I think what you actually want to do is this: https://alive2.llvm.org/ce/z/sN48Rm

We only need the one icmp in the condition, the other one is irrelevant. If the other icmp allows an operand replacement in the select, we already have a separate fold that will do that (select value equivalence fold).

@XChy
Copy link
Member Author

XChy commented Dec 22, 2023

This transform looks unnecessarily specific to me. I think what you actually want to do is this: https://alive2.llvm.org/ce/z/sN48Rm

We only need the one icmp in the condition, the other one is irrelevant. If the other icmp allows an operand replacement in the select, we already have a separate fold that will do that (select value equivalence fold).

Sounds great to me! It also applies to and(cmp, icmp ne), with proof. I wonder how you generalize these patterns, just replace the component with argument randomly?

@XChy XChy changed the title [InstCombine] Fold select (a == V1 | a == V2), a, V2 to select (a == V1), V1, V2 [InstCombine] Fold select (a == b | other_cond), a, b to select (other_cond), a, b Dec 22, 2023
@XChy
Copy link
Member Author

XChy commented Dec 22, 2023

Complete alive2 proof for commuted version: https://alive2.llvm.org/ce/z/XduDgy

@XChy XChy force-pushed the fix75784 branch 2 times, most recently from db1ce41 to c923e04 Compare December 22, 2023 18:08
@XChy XChy requested a review from nikic December 22, 2023 18:09
Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be possible to generalize the fold via simplifyWithOpReplaced(), but this seems like a reasonable starting point.

llvm/test/Transforms/InstCombine/select-and-or.ll Outdated Show resolved Hide resolved
llvm/test/Transforms/InstCombine/select-and-or.ll Outdated Show resolved Hide resolved
@XChy XChy requested a review from nikic December 23, 2023 08:11
dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Dec 23, 2023
@XChy
Copy link
Member Author

XChy commented Dec 28, 2023

ping.

@dtcxzyw
Copy link
Member

dtcxzyw commented Dec 29, 2023

It also holds for logical and/or.
Alive2: https://alive2.llvm.org/ce/z/9Ko8wb

@dtcxzyw dtcxzyw changed the title [InstCombine] Fold select (a == b | other_cond), a, b to select (other_cond), a, b [InstCombine] Fold select (or/and (icmp eq/ne a, b), other_cond), a, b to select (other_cond), a, b Dec 29, 2023
@dtcxzyw
Copy link
Member

dtcxzyw commented Dec 30, 2023

Emm, I seem to have found a better approach. Will post a patch later.

dtcxzyw added a commit that referenced this pull request Dec 31, 2023
…ldable (#76621)

This patch does the following folds:
```
(select A && B, T, F) -> (select A, (select B, T, F), F)
(select A || B, T, F) -> (select A, T, (select B, T, F))
```
if `(select B, T, F)` can be folded into a value or a canonicalized SPF.
Alive2: https://alive2.llvm.org/ce/z/4Bdrbu

The original motivation of this patch is to simplify the following
pattern:
```
%.sroa.speculated.i = tail call i64 @llvm.umax.i64(i64 %sub.ptr.div.i.i, i64 1)
%add.i = add i64 %.sroa.speculated.i, %sub.ptr.div.i.i
%cmp7.i = icmp ult i64 %add.i, %sub.ptr.div.i.i
%cmp9.i = icmp ugt i64 %add.i, 1152921504606846975
%or.cond.i = or i1 %cmp7.i, %cmp9.i
%cond.i = select i1 %or.cond.i, i64 1152921504606846975, i64 %add.i
->
%.sroa.speculated.i = tail call i64 @llvm.umax.i64(i64 %sub.ptr.div.i.i, i64 1)
%add.i = add i64 %.sroa.speculated.i, %sub.ptr.div.i.i
%cmp7.i = icmp ult i64 %add.i, %sub.ptr.div.i.i
%max = call i64 @llvm.umax.i64(i64 %add.i, 1152921504606846975)
%cond.i = select i1 %cmp7.i, i64 1152921504606846975, i64 %max
```
The later form has a better codegen for some backends. It is also more
analysis-friendly than the original one.
Godbolt: https://godbolt.org/z/eK6eb5jf1
Alive2: https://alive2.llvm.org/ce/z/VHlxL2

Compile-time impact:
http://llvm-compile-time-tracker.com/compare.php?from=7c71d3996a72b9b024622f23bf556539b961c88c&to=638ce8666fadaca1ab2639a3c2bc52a4a8508f40&stat=instructions:u

|stage1-O3|stage1-ReleaseThinLTO|stage1-ReleaseLTO-g|stage1-O0-g|stage2-O3|stage2-O0-g|stage2-clang|
|--|--|--|--|--|--|--|
|+0.02%|-0.00%|+0.02%|-0.03%|-0.00%|-0.05%|-0.00%|

It is an alternative to #76203 and #76363 because we can simplify
`select (icmp eq/ne a, b), a, b` into `b` or `a`.
Fixes #75784.
Fixes #76043.

Thank @XChy for providing additional tests.
Co-authored-by: XChy <xxs_chy@outlook.com>
@XChy
Copy link
Member Author

XChy commented Dec 31, 2023

#76621 has fixed this issue.

@XChy XChy closed this Dec 31, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

[InstCombine] Missing optimization: fold select (a != V1 & a != V2), V1, a to select (a == V2), V2, V1
4 participants