-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Add folds for (icmp eq/ne (and (add/sub/xor A, P2), P2), 0/P2) #67836
[InstCombine] Add folds for (icmp eq/ne (and (add/sub/xor A, P2), P2), 0/P2) #67836
Conversation
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-pgo Changes
All associated proofs: https://alive2.llvm.org/ce/z/45pq2x Full diff: https://github.com/llvm/llvm-project/pull/67836.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 9f034aba874a8c4..cb5b8a1377a80bc 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -5453,6 +5453,45 @@ Instruction *InstCombinerImpl::foldICmpEquality(ICmpInst &I) {
m_CombineAnd(m_Value(B), m_Unless(m_ImmConstant())))))
return new ICmpInst(Pred, Builder.CreateXor(A, B), Cst);
+ {
+ auto MatchAndP2OfAddSubXor = [&](unsigned Opc) -> std::optional<bool> {
+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)
+ std::optional<bool> Matched = std::nullopt;
+ if (match(&I, m_c_ICmp(
+ PredUnused,
+ m_OneUse(m_c_And(m_Value(A), m_c_BinOp(Opc, m_Value(B),
+ m_Deferred(A)))),
+ m_Deferred(A))))
+ Matched = false;
+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)
+ else if (match(&I, m_ICmp(PredUnused,
+ m_OneUse(m_c_And(
+ m_Value(A),
+ m_c_BinOp(Opc, m_Value(B), m_Deferred(A)))),
+ m_Zero())))
+ Matched = true;
+
+ if (Matched && isKnownToBeAPowerOfTwo(A, /* OrZero */ true, 0, &I))
+ return Matched;
+ return std::nullopt;
+ };
+ std::optional<bool> IsZero = MatchAndP2OfAddSubXor(Instruction::Add);
+ if (!IsZero)
+ IsZero = MatchAndP2OfAddSubXor(Instruction::Sub);
+ if (!IsZero)
+ IsZero = MatchAndP2OfAddSubXor(Instruction::Xor);
+
+ if (IsZero) {
+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), P2)
+ // -> (icmp eq/ne (and X, P2), 0)
+ // (icmp eq/ne (and (add/sub/xor X, P2), P2), 0)
+ // -> (icmp eq/ne (and X, P2), P2)
+ return new ICmpInst(Pred, Builder.CreateAnd(A, B),
+ *IsZero ? A
+ : ConstantInt::getNullValue(A->getType()));
+ }
+ }
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/icmp-and-add-sub-xor-p2.ll b/llvm/test/Transforms/InstCombine/icmp-and-add-sub-xor-p2.ll
new file mode 100644
index 000000000000000..590113179bab418
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/icmp-and-add-sub-xor-p2.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -passes=instcombine -S -o - %s | FileCheck %s
+
+declare void @use.i8(i8)
+define i1 @src_add_eq_p2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_add_eq_p2(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[YY:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[NY]], [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], [[YY]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[TMP2]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 0, %yy
+ %y = and i8 %ny, %yy
+ %x1 = add i8 %x, %y
+ %v = and i8 %x1, %y
+ %r = icmp eq i8 %v, %y
+ ret i1 %r
+}
+
+define i1 @src_add_eq_p2_fail_multiuse(i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_add_eq_p2_fail_multiuse(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[YY:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = and i8 [[NY]], [[YY]]
+; CHECK-NEXT: [[X1:%.*]] = add i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[V:%.*]] = and i8 [[X1]], [[Y]]
+; CHECK-NEXT: call void @use.i8(i8 [[V]])
+; CHECK-NEXT: [[R:%.*]] = icmp eq i8 [[V]], [[Y]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 0, %yy
+ %y = and i8 %ny, %yy
+ %x1 = add i8 %x, %y
+ %v = and i8 %x1, %y
+ call void @use.i8(i8 %v)
+ %r = icmp eq i8 %v, %y
+ ret i1 %r
+}
+
+define i1 @src_xor_ne_zero(i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_xor_ne_zero(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[YY:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = and i8 [[NY]], [[YY]]
+; CHECK-NEXT: [[X1:%.*]] = xor i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT: call void @use.i8(i8 [[X1]])
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y]], [[X]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[TMP1]], [[Y]]
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 0, %yy
+ %y = and i8 %ny, %yy
+ %x1 = xor i8 %x, %y
+ call void @use.i8(i8 %x1)
+ %v = and i8 %x1, %y
+ %r = icmp ne i8 %v, 0
+ ret i1 %r
+}
+
+define i1 @src_xor_ne_zero_fail_different_p2(i8 %x, i8 %yy) {
+; CHECK-LABEL: @src_xor_ne_zero_fail_different_p2(
+; CHECK-NEXT: [[NY:%.*]] = sub i8 0, [[YY:%.*]]
+; CHECK-NEXT: [[Y:%.*]] = and i8 [[NY]], [[YY]]
+; CHECK-NEXT: [[Y2:%.*]] = shl i8 [[Y]], 1
+; CHECK-NEXT: [[X1:%.*]] = xor i8 [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[V:%.*]] = and i8 [[X1]], [[Y2]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[V]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %ny = sub i8 0, %yy
+ %y = and i8 %ny, %yy
+ %y2 = shl i8 %y, 1
+ %x1 = xor i8 %x, %y
+ %v = and i8 %x1, %y2
+ %r = icmp ne i8 %v, 0
+ ret i1 %r
+}
+
+define <2 x i1> @src_sub_ne_p2(<2 x i8> %x, <2 x i8> %yy) {
+; CHECK-LABEL: @src_sub_ne_p2(
+; CHECK-NEXT: [[NY:%.*]] = sub <2 x i8> zeroinitializer, [[YY:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[NY]], [[X:%.*]]
+; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i8> [[TMP1]], [[YY]]
+; CHECK-NEXT: [[R:%.*]] = icmp ne <2 x i8> [[TMP2]], zeroinitializer
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %ny = sub <2 x i8> zeroinitializer, %yy
+ %y = and <2 x i8> %ny, %yy
+ %x1 = sub <2 x i8> %x, %y
+ %v = and <2 x i8> %x1, %y
+ %r = icmp ne <2 x i8> %v, %y
+ ret <2 x i1> %r
+}
+
+define <2 x i1> @src_sub_eq_zero(<2 x i8> %x, <2 x i8> %yy) {
+; CHECK-LABEL: @src_sub_eq_zero(
+; CHECK-NEXT: [[Y:%.*]] = shl <2 x i8> <i8 1, i8 2>, [[YY:%.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[Y]], [[X:%.*]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[TMP1]], [[Y]]
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %y = shl <2 x i8> <i8 1, i8 2>, %yy
+ %x1 = sub <2 x i8> %x, %y
+ %v = and <2 x i8> %x1, %y
+ %r = icmp eq <2 x i8> %v, zeroinitializer
+ ret <2 x i1> %r
+}
+
+define <2 x i1> @src_sub_eq_zero_fail_non_p2(<2 x i8> %x, <2 x i8> %yy) {
+; CHECK-LABEL: @src_sub_eq_zero_fail_non_p2(
+; CHECK-NEXT: [[Y:%.*]] = shl <2 x i8> <i8 1, i8 3>, [[YY:%.*]]
+; CHECK-NEXT: [[X1:%.*]] = sub <2 x i8> [[X:%.*]], [[Y]]
+; CHECK-NEXT: [[V:%.*]] = and <2 x i8> [[X1]], [[Y]]
+; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i8> [[V]], zeroinitializer
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %y = shl <2 x i8> <i8 1, i8 3>, %yy
+ %x1 = sub <2 x i8> %x, %y
+ %v = and <2 x i8> %x1, %y
+ %r = icmp eq <2 x i8> %v, zeroinitializer
+ ret <2 x i1> %r
+}
diff --git a/llvm/test/Transforms/InstCombine/pr25342.ll b/llvm/test/Transforms/InstCombine/pr25342.ll
index e1a6822e7908474..2f85f99c4ce003e 100644
--- a/llvm/test/Transforms/InstCombine/pr25342.ll
+++ b/llvm/test/Transforms/InstCombine/pr25342.ll
@@ -78,7 +78,7 @@ define void @multi_phi(i32 signext %n) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
-; CHECK-NEXT: [[TMP0:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP6:%.*]], [[ODD_BB:%.*]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[TMP7:%.*]], [[ODD_BB:%.*]] ]
; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[ODD_BB]] ]
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[I_0]], [[N:%.*]]
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
@@ -92,14 +92,14 @@ define void @multi_phi(i32 signext %n) {
; CHECK-NEXT: [[SUB_I:%.*]] = fsub float [[MUL_I]], [[MUL4_I]]
; CHECK-NEXT: [[ADD_I:%.*]] = fadd float [[SUB_I]], [[TMP0]]
; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1
-; CHECK-NEXT: [[BIT0:%.*]] = and i32 [[INC]], 1
-; CHECK-NEXT: [[EVEN_NOT_NOT:%.*]] = icmp eq i32 [[BIT0]], 0
-; CHECK-NEXT: br i1 [[EVEN_NOT_NOT]], label [[EVEN_BB:%.*]], label [[ODD_BB]]
+; CHECK-NEXT: [[TMP5:%.*]] = and i32 [[I_0]], 1
+; CHECK-NEXT: [[EVEN_NOT_NOT_NOT:%.*]] = icmp eq i32 [[TMP5]], 0
+; CHECK-NEXT: br i1 [[EVEN_NOT_NOT_NOT]], label [[ODD_BB]], label [[EVEN_BB:%.*]]
; CHECK: even.bb:
-; CHECK-NEXT: [[TMP5:%.*]] = fadd float [[SUB_I]], [[ADD_I]]
+; CHECK-NEXT: [[TMP6:%.*]] = fadd float [[SUB_I]], [[ADD_I]]
; CHECK-NEXT: br label [[ODD_BB]]
; CHECK: odd.bb:
-; CHECK-NEXT: [[TMP6]] = phi float [ [[ADD_I]], [[FOR_BODY]] ], [ [[TMP5]], [[EVEN_BB]] ]
+; CHECK-NEXT: [[TMP7]] = phi float [ [[ADD_I]], [[FOR_BODY]] ], [ [[TMP6]], [[EVEN_BB]] ]
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end:
; CHECK-NEXT: store float [[TMP0]], ptr @dd, align 4
diff --git a/llvm/test/Transforms/PGOProfile/cspgo_profile_summary.ll b/llvm/test/Transforms/PGOProfile/cspgo_profile_summary.ll
index b4a27ee65613365..7040bac6a4c43f0 100644
--- a/llvm/test/Transforms/PGOProfile/cspgo_profile_summary.ll
+++ b/llvm/test/Transforms/PGOProfile/cspgo_profile_summary.ll
@@ -104,7 +104,7 @@ for.end:
; CSPGOSUMMARY-LABEL: @foo
; CSPGOSUMMARY: %even.odd.i = select i1 %tobool.i{{[0-9]*}}, ptr @even, ptr @odd
; CSPGOSUMMARY-SAME: !prof ![[BW_CSPGO_BAR]]
-; CSPGOSUMMARY: %even.odd.i2 = select i1 %tobool.i{{[0-9]*}}, ptr @even, ptr @odd
+; CSPGOSUMMARY: %even.odd.i2 = select i1 %tobool.i{{[0-9]*}}, ptr @odd, ptr @even
; CSPGOSUMMARY-SAME: !prof ![[BW_CSPGO_BAR]]
declare dso_local i32 @bar_m(i32)
|
43ca1e7
to
914ee8e
Compare
…P2), P2), 0/P2); NFC
…, 0/P2) - `(icmp eq/ne (and (add/sub/xor X, P2), P2), P2)` -> `(icmp eq/ne (and X, P2), 0)` - `(icmp eq/ne (and (add/sub/xor X, P2), P2), 0)` -> `(icmp eq/ne (and X, P2), P2)` Folds like this come up with reasonable regularity in odd/even loops. Proofs: https://alive2.llvm.org/ce/z/45pq2x
914ee8e
to
9e8f9fc
Compare
…power of 2 (#67915) This patch canonicalizes the pattern `(X +/- Y) & Y` into `~X & Y` when `Y` is a power of 2 or zero. It will reduce the patterns to match in #67836 and exploit more optimization opportunities. Alive2: https://alive2.llvm.org/ce/z/LBpvRF
#67915 has been merged. Please update pre-commit tests. |
No diff. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
ping. |
Just to double check: Is there real world motivation for this pattern? As this is a multi-use-only pattern, we should not handle it without specific motivation. (As you can imagine, you can construct a huge number of multi-use-only patterns by combining any number of use-restricted patterns.) |
Motivation case was essentially odd-even pattern where noticed the cmp was happen after the increment/decrement. The |
All associated proofs: https://alive2.llvm.org/ce/z/45pq2x