-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reland "[InstCombine] Extend foldICmpBinOp
to add
-like or
"
#76531
Conversation
1d562df
to
210067c
Compare
@nikic See the last commit for the bug fix. |
@llvm/pr-subscribers-llvm-transforms Author: Mikhail Gudim (mgudim) ChangesThe original PR had a typo which was causing a bug. Full diff: https://github.com/llvm/llvm-project/pull/76531.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 0222c93faf24e9..b5a6366219db34 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -4624,27 +4624,35 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
}
bool NoOp0WrapProblem = false, NoOp1WrapProblem = false;
- if (BO0 && isa<OverflowingBinaryOperator>(BO0))
- NoOp0WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO0->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO0->hasNoSignedWrap());
- if (BO1 && isa<OverflowingBinaryOperator>(BO1))
- NoOp1WrapProblem =
- ICmpInst::isEquality(Pred) ||
- (CmpInst::isUnsigned(Pred) && BO1->hasNoUnsignedWrap()) ||
- (CmpInst::isSigned(Pred) && BO1->hasNoSignedWrap());
-
+ bool Op0HasNUW = false, Op1HasNUW = false;
+ bool Op0HasNSW = false, Op1HasNSW = false;
// Analyze the case when either Op0 or Op1 is an add instruction.
// Op0 = A + B (or A and B are null); Op1 = C + D (or C and D are null).
+ auto hasNoWrapProblem = [](const BinaryOperator &BO, CmpInst::Predicate Pred,
+ bool &HasNSW, bool &HasNUW) -> bool {
+ if (isa<OverflowingBinaryOperator>(BO)) {
+ HasNUW = BO.hasNoUnsignedWrap();
+ HasNSW = BO.hasNoSignedWrap();
+ return ICmpInst::isEquality(Pred) ||
+ (CmpInst::isUnsigned(Pred) && HasNUW) ||
+ (CmpInst::isSigned(Pred) && HasNSW);
+ } else if (BO.getOpcode() == Instruction::Or) {
+ HasNUW = true;
+ HasNSW = true;
+ return true;
+ } else {
+ return false;
+ }
+ };
Value *A = nullptr, *B = nullptr, *C = nullptr, *D = nullptr;
- if (BO0 && BO0->getOpcode() == Instruction::Add) {
- A = BO0->getOperand(0);
- B = BO0->getOperand(1);
+
+ if (BO0) {
+ match(BO0, m_AddLike(m_Value(A), m_Value(B)));
+ NoOp0WrapProblem = hasNoWrapProblem(*BO0, Pred, Op0HasNSW, Op0HasNUW);
}
- if (BO1 && BO1->getOpcode() == Instruction::Add) {
- C = BO1->getOperand(0);
- D = BO1->getOperand(1);
+ if (BO1) {
+ match(BO1, m_AddLike(m_Value(C), m_Value(D)));
+ NoOp1WrapProblem = hasNoWrapProblem(*BO1, Pred, Op1HasNSW, Op1HasNUW);
}
// icmp (A+B), A -> icmp B, 0 for equalities or if there is no overflow.
@@ -4764,17 +4772,15 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
APInt AP2Abs = AP2->abs();
if (AP1Abs.uge(AP2Abs)) {
APInt Diff = *AP1 - *AP2;
- bool HasNUW = BO0->hasNoUnsignedWrap() && Diff.ule(*AP1);
- bool HasNSW = BO0->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(A, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ A, C3, "", Op0HasNUW && Diff.ule(*AP1), Op0HasNSW);
return new ICmpInst(Pred, NewAdd, C);
} else {
APInt Diff = *AP2 - *AP1;
- bool HasNUW = BO1->hasNoUnsignedWrap() && Diff.ule(*AP2);
- bool HasNSW = BO1->hasNoSignedWrap();
Constant *C3 = Constant::getIntegerValue(BO0->getType(), Diff);
- Value *NewAdd = Builder.CreateAdd(C, C3, "", HasNUW, HasNSW);
+ Value *NewAdd = Builder.CreateAdd(
+ C, C3, "", Op1HasNUW && Diff.ule(*AP2), Op1HasNSW);
return new ICmpInst(Pred, A, NewAdd);
}
}
@@ -4868,16 +4874,14 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// if Z != 0 and nsw(X * Z) and nsw(Y * Z)
// X * Z eq/ne Y * Z -> X eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoSignedWrap() &&
- BO1->hasNoSignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNSW && Op1HasNSW)
return new ICmpInst(Pred, X, Y);
} else
NonZero = isKnownNonZero(Z, Q.DL, /*Depth=*/0, Q.AC, Q.CxtI, Q.DT);
// If Z != 0 and nuw(X * Z) and nuw(Y * Z)
// X * Z u{lt/le/gt/ge}/eq/ne Y * Z -> X u{lt/le/gt/ge}/eq/ne Y
- if (NonZero && BO0 && BO1 && BO0->hasNoUnsignedWrap() &&
- BO1->hasNoUnsignedWrap())
+ if (NonZero && BO0 && BO1 && Op0HasNUW && Op1HasNUW)
return new ICmpInst(Pred, X, Y);
}
}
@@ -4977,8 +4981,8 @@ Instruction *InstCombinerImpl::foldICmpBinOp(ICmpInst &I,
return new ICmpInst(Pred, BO0->getOperand(0), BO1->getOperand(0));
case Instruction::Shl: {
- bool NUW = BO0->hasNoUnsignedWrap() && BO1->hasNoUnsignedWrap();
- bool NSW = BO0->hasNoSignedWrap() && BO1->hasNoSignedWrap();
+ bool NUW = Op0HasNUW && Op1HasNUW;
+ bool NSW = Op0HasNSW && Op1HasNSW;
if (!NUW && !NSW)
break;
if (!NSW && I.isSigned())
diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll
index 9b2e141bdb0506..299fc9d5232fb8 100644
--- a/llvm/test/Transforms/InstCombine/icmp.ll
+++ b/llvm/test/Transforms/InstCombine/icmp.ll
@@ -3963,10 +3963,9 @@ define <8 x i1> @bitreverse_vec_ne(<8 x i16> %x, <8 x i16> %y) {
define i1 @knownbits1(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits1(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -3980,10 +3979,9 @@ define i1 @knownbits1(i8 %a, i8 %b) {
define i1 @knownbits2(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits2(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -3997,10 +3995,9 @@ define i1 @knownbits2(i8 %a, i8 %b) {
define i1 @knownbits3(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits3(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], 1
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[TMP1]], [[A1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 5
@@ -4014,10 +4011,9 @@ define i1 @knownbits3(i8 %a, i8 %b) {
define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
; CHECK-LABEL: @knownbits4(
; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 1, i8 1>
-; CHECK-NEXT: [[A2:%.*]] = or disjoint <2 x i8> [[A1]], <i8 4, i8 4>
; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT: [[B2:%.*]] = or disjoint <2 x i8> [[B1]], <i8 5, i8 5>
-; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i8> [[TMP1]], [[A1]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%a1 = and <2 x i8> %a, <i8 5, i8 5>
@@ -4033,10 +4029,9 @@ define <2 x i1> @knownbits4(<2 x i8> %a, <2 x i8> %b) {
define i1 @knownbits5(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits5(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
@@ -4050,10 +4045,9 @@ define i1 @knownbits5(i8 %a, i8 %b) {
define i1 @knownbits6(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits6(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A2]], [[B2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[A1]], [[TMP1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
@@ -4067,10 +4061,9 @@ define i1 @knownbits6(i8 %a, i8 %b) {
define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
; CHECK-LABEL: @knownbits7(
; CHECK-NEXT: [[A1:%.*]] = and <2 x i8> [[A:%.*]], <i8 -127, i8 -127>
-; CHECK-NEXT: [[A2:%.*]] = or disjoint <2 x i8> [[A1]], <i8 4, i8 4>
; CHECK-NEXT: [[B1:%.*]] = and <2 x i8> [[B:%.*]], <i8 2, i8 2>
-; CHECK-NEXT: [[B2:%.*]] = or disjoint <2 x i8> [[B1]], <i8 5, i8 5>
-; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint <2 x i8> [[B1]], <i8 1, i8 1>
+; CHECK-NEXT: [[C:%.*]] = icmp eq <2 x i8> [[TMP1]], [[A1]]
; CHECK-NEXT: ret <2 x i1> [[C]]
;
%a1 = and <2 x i8> %a, <i8 133, i8 133>
@@ -4084,10 +4077,9 @@ define <2 x i1> @knownbits7(<2 x i8> %a, <2 x i8> %b) {
define i1 @knownbits8(i8 %a, i8 %b) {
; CHECK-LABEL: @knownbits8(
; CHECK-NEXT: [[A1:%.*]] = and i8 [[A:%.*]], -127
-; CHECK-NEXT: [[A2:%.*]] = or disjoint i8 [[A1]], 4
; CHECK-NEXT: [[B1:%.*]] = and i8 [[B:%.*]], 2
-; CHECK-NEXT: [[B2:%.*]] = or disjoint i8 [[B1]], 5
-; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[B2]], [[A2]]
+; CHECK-NEXT: [[TMP1:%.*]] = or disjoint i8 [[B1]], 1
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 [[TMP1]], [[A1]]
; CHECK-NEXT: ret i1 [[C]]
;
%a1 = and i8 %a, 133
@@ -5013,3 +5005,80 @@ define i1 @or_positive_sgt_zero_multi_use(i8 %a) {
%cmp = icmp sgt i8 %b, 0
ret i1 %cmp
}
+
+
+define i1 @disjoint_or_sgt_1(i8 %a, i8 %b) {
+; CHECK-LABEL: @disjoint_or_sgt_1(
+; CHECK-NEXT: [[B1:%.*]] = add nsw i8 [[B:%.*]], 2
+; CHECK-NEXT: [[ICMP_:%.*]] = icmp sle i8 [[B1]], [[A:%.*]]
+; CHECK-NEXT: ret i1 [[ICMP_]]
+;
+ %a1 = or disjoint i8 %a, 1
+ %b1 = add nsw i8 %b, 2
+ %icmp_ = icmp sgt i8 %a1, %b1
+ ret i1 %icmp_
+}
+
+define i1 @disjoint_or_sgt_2(i8 %a, i8 %b) {
+; CHECK-LABEL: @disjoint_or_sgt_2(
+; CHECK-NEXT: [[A1:%.*]] = or disjoint i8 [[A:%.*]], 2
+; CHECK-NEXT: [[B1:%.*]] = add i8 [[B:%.*]], 1
+; CHECK-NEXT: [[ICMP_:%.*]] = icmp sgt i8 [[A1]], [[B1]]
+; CHECK-NEXT: ret i1 [[ICMP_]]
+;
+ %a1 = or disjoint i8 %a, 2
+ %b1 = add i8 %b, 1
+ %icmp_ = icmp sgt i8 %a1, %b1
+ ret i1 %icmp_
+}
+
+define i1 @disjoint_or_sgt_3(i8 %a, i8 %b) {
+; CHECK-LABEL: @disjoint_or_sgt_3(
+; CHECK-NEXT: [[A1:%.*]] = or disjoint i8 [[A:%.*]], 2
+; CHECK-NEXT: [[B1:%.*]] = add nuw i8 [[B:%.*]], 1
+; CHECK-NEXT: [[ICMP_:%.*]] = icmp sgt i8 [[A1]], [[B1]]
+; CHECK-NEXT: ret i1 [[ICMP_]]
+;
+ %a1 = or disjoint i8 %a, 2
+ %b1 = add nuw i8 %b, 1
+ %icmp_ = icmp sgt i8 %a1, %b1
+ ret i1 %icmp_
+}
+
+define i1 @disjoint_or_ugt_1(i8 %a, i8 %b) {
+; CHECK-LABEL: @disjoint_or_ugt_1(
+; CHECK-NEXT: [[B1:%.*]] = add nsw i8 [[B:%.*]], 2
+; CHECK-NEXT: [[ICMP_:%.*]] = icmp ule i8 [[B1]], [[A:%.*]]
+; CHECK-NEXT: ret i1 [[ICMP_]]
+;
+ %a1 = or disjoint i8 %a, 1
+ %b1 = add nsw i8 %b, 2
+ %icmp_ = icmp ugt i8 %a1, %b1
+ ret i1 %icmp_
+}
+
+define i1 @disjoint_or_ugt_2(i8 %a, i8 %b) {
+; CHECK-LABEL: @disjoint_or_ugt_2(
+; CHECK-NEXT: [[A1:%.*]] = or disjoint i8 [[A:%.*]], 2
+; CHECK-NEXT: [[B1:%.*]] = add i8 [[B:%.*]], 1
+; CHECK-NEXT: [[ICMP_:%.*]] = icmp ugt i8 [[A1]], [[B1]]
+; CHECK-NEXT: ret i1 [[ICMP_]]
+;
+ %a1 = or disjoint i8 %a, 2
+ %b1 = add i8 %b, 1
+ %icmp_ = icmp ugt i8 %a1, %b1
+ ret i1 %icmp_
+}
+
+define i1 @disjoint_or_ugt_3(i8 %a, i8 %b) {
+; CHECK-LABEL: @disjoint_or_ugt_3(
+; CHECK-NEXT: [[A1:%.*]] = or disjoint i8 [[A:%.*]], 2
+; CHECK-NEXT: [[B1:%.*]] = add nuw i8 [[B:%.*]], 1
+; CHECK-NEXT: [[ICMP_:%.*]] = icmp ugt i8 [[A1]], [[B1]]
+; CHECK-NEXT: ret i1 [[ICMP_]]
+;
+ %a1 = or disjoint i8 %a, 2
+ %b1 = add nuw i8 %b, 1
+ %icmp_ = icmp ugt i8 %a1, %b1
+ ret i1 %icmp_
+}
|
@mgudim Can you please also add a test for the bug fix? |
Yes, sorry I was about to do that. |
Done. See last commit. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
InstCombine canonicalizes `add` to `or` when possible, but this makes some optimizations applicable to `add` to be missed because they don't realize that the `or` is equivalent to `add`. In this patch we generalize `foldICmpBinOp` to handle such cases.
cba6a64
to
ddd5dea
Compare
The original PR had a typo which was causing a bug.