-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[InstCombine] Fold (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0 #157072
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Shamshura Egor (egorshamshura) ChangesFixies: #152797 alive2: https://alive2.llvm.org/ce/z/h8HYTo Full diff: https://github.com/llvm/llvm-project/pull/157072.diff 4 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index a13d3ceb61320..c238142524f4f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -2476,6 +2476,20 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) {
return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y);
}
+ // (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0
+ if (match(Op0, m_Add(m_Value(X), m_Value(Y)))) {
+ const APInt *PowerC;
+ if (match(Op1, m_Power2(PowerC)) && !PowerC->isOne()) {
+ KnownBits YKnown = computeKnownBits(Y, &I);
+ unsigned ShiftAmount = PowerC->logBase2() + 1;
+
+ APInt YMod = YKnown.Zero;
+ if (YMod.getLoBits(ShiftAmount).isZero()) {
+ return BinaryOperator::CreateAnd(X, Op1);
+ }
+ }
+ }
+
// Canonicalize:
// (X +/- Y) & Y --> ~X & Y when Y is a power of 2.
if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr(
diff --git a/llvm/test/Transforms/InstCombine/redundant-sum-in-and.ll b/llvm/test/Transforms/InstCombine/redundant-sum-in-and.ll
new file mode 100644
index 0000000000000..f698ea6ec30a6
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/redundant-sum-in-and.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt < %s -passes=instcombine -S | FileCheck %s
+
+define i1 @addition_and_bitwise1(ptr %0) {
+; CHECK-LABEL: define i1 @addition_and_bitwise1(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[V0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[V1]], 2
+; CHECK-NEXT: [[V6:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT: ret i1 [[V6]]
+;
+ %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
+ %v1 = load i32, ptr %v0, align 4
+ %v2 = zext i32 %v1 to i64
+ %v3 = ptrtoint ptr %v0 to i64
+ %v4 = add i64 %v2, %v3
+ %v5 = and i64 %v4, 2
+ %v6 = icmp eq i64 %v5, 0
+ ret i1 %v6
+}
+
+define i1 @addition_and_bitwise2(ptr %0) {
+; CHECK-LABEL: define i1 @addition_and_bitwise2(
+; CHECK-SAME: ptr [[TMP0:%.*]]) {
+; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4
+; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[V0]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[V1]], 4
+; CHECK-NEXT: [[V6:%.*]] = icmp eq i32 [[TMP2]], 0
+; CHECK-NEXT: ret i1 [[V6]]
+;
+ %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4
+ %v1 = load i32, ptr %v0, align 16
+ %v2 = zext i32 %v1 to i64
+ %v3 = ptrtoint ptr %v0 to i64
+ %v4 = add i64 %v2, %v3
+ %v5 = and i64 %v4, 4
+ %v6 = icmp eq i64 %v5, 0
+ ret i1 %v6
+}
diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
index 920497c07e380..de589198c52eb 100644
--- a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
+++ b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll
@@ -903,8 +903,7 @@ define i64 @urem_shl_vscale_overlap() vscale_range(1,16) {
; CHECK-LABEL: @urem_shl_vscale_overlap(
; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64()
; CHECK-NEXT: [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 10
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047
-; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], 1024
+; CHECK-NEXT: [[REM:%.*]] = and i64 [[SHIFT]], 1024
; CHECK-NEXT: ret i64 [[REM]]
;
%vscale = call i64 @llvm.vscale.i64()
@@ -956,10 +955,7 @@ define i32 @and_add_shl_vscale_not_power2_negative() vscale_range(1,16) {
; Negative test: the %sign may be 0, https://alive2.llvm.org/ce/z/WU_j4a
define i32 @and_add_and (i32 %x) {
; CHECK-LABEL: @and_add_and(
-; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 24
-; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -2147483648
-; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP2]], -2147483648
-; CHECK-NEXT: ret i32 [[AND]]
+; CHECK-NEXT: ret i32 0
;
%x1 = lshr i32 %x, 7
%sign = and i32 %x1, 1 ; %sign = (%x >> 7) & 1
diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll
index cbf9cc11d9009..3a2ff4381d07e 100644
--- a/llvm/test/Transforms/LoopVectorize/induction.ll
+++ b/llvm/test/Transforms/LoopVectorize/induction.ll
@@ -1854,8 +1854,8 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
; INTERLEAVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[DOTIDX]]
; INTERLEAVE-NEXT: [[DOTIDX5:%.*]] = shl nsw i64 [[TMP14]], 4
; INTERLEAVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[DOTIDX5]]
-; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP18]], align 1
-; INTERLEAVE-NEXT: [[WIDE_VEC3:%.*]] = load <16 x i32>, ptr [[TMP19]], align 1
+; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP18]], align 1, !alias.scope [[META17:![0-9]+]]
+; INTERLEAVE-NEXT: [[WIDE_VEC3:%.*]] = load <16 x i32>, ptr [[TMP19]], align 1, !alias.scope [[META17]]
; INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1
; INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]], i32 1
; INTERLEAVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP12]], i32 1
@@ -1865,21 +1865,21 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) {
; INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP16]], i32 1
; INTERLEAVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP17]], i32 1
; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 0
-; INTERLEAVE-NEXT: store i32 [[TMP28]], ptr [[TMP20]], align 1, !alias.scope [[META17:![0-9]+]], !noalias [[META20:![0-9]+]]
+; INTERLEAVE-NEXT: store i32 [[TMP28]], ptr [[TMP20]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]]
; INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 4
-; INTERLEAVE-NEXT: store i32 [[TMP29]], ptr [[TMP21]], align 1, !alias.scope [[META17]], !noalias [[META20]]
+; INTERLEAVE-NEXT: store i32 [[TMP29]], ptr [[TMP21]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 8
-; INTERLEAVE-NEXT: store i32 [[TMP30]], ptr [[TMP22]], align 1, !alias.scope [[META17]], !noalias [[META20]]
+; INTERLEAVE-NEXT: store i32 [[TMP30]], ptr [[TMP22]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 12
-; INTERLEAVE-NEXT: store i32 [[TMP31]], ptr [[TMP23]], align 1, !alias.scope [[META17]], !noalias [[META20]]
+; INTERLEAVE-NEXT: store i32 [[TMP31]], ptr [[TMP23]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; INTERLEAVE-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[WIDE_VEC3]], i64 0
-; INTERLEAVE-NEXT: store i32 [[TMP32]], ptr [[TMP24]], align 1, !alias.scope [[META17]], !noalias [[META20]]
+; INTERLEAVE-NEXT: store i32 [[TMP32]], ptr [[TMP24]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; INTERLEAVE-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[WIDE_VEC3]], i64 4
-; INTERLEAVE-NEXT: store i32 [[TMP33]], ptr [[TMP25]], align 1, !alias.scope [[META17]], !noalias [[META20]]
+; INTERLEAVE-NEXT: store i32 [[TMP33]], ptr [[TMP25]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[WIDE_VEC3]], i64 8
-; INTERLEAVE-NEXT: store i32 [[TMP34]], ptr [[TMP26]], align 1, !alias.scope [[META17]], !noalias [[META20]]
+; INTERLEAVE-NEXT: store i32 [[TMP34]], ptr [[TMP26]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; INTERLEAVE-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[WIDE_VEC3]], i64 12
-; INTERLEAVE-NEXT: store i32 [[TMP35]], ptr [[TMP27]], align 1, !alias.scope [[META17]], !noalias [[META20]]
+; INTERLEAVE-NEXT: store i32 [[TMP35]], ptr [[TMP27]], align 1, !alias.scope [[META20]], !noalias [[META17]]
; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; INTERLEAVE-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; INTERLEAVE-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
@@ -4332,10 +4332,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; IND: vector.scevcheck:
-; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; IND-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; IND-NEXT: [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; IND-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; IND-NEXT: [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; IND-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; IND-NEXT: [[TMP4:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; IND-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; IND: vector.ph:
-; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967294
+; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 6442450942
; IND-NEXT: br label [[VECTOR_BODY:%.*]]
; IND: vector.body:
; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4372,10 +4376,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; UNROLL: vector.scevcheck:
-; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; UNROLL-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; UNROLL-NEXT: [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; UNROLL-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; UNROLL-NEXT: [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; UNROLL-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; UNROLL-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; UNROLL-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; UNROLL: vector.ph:
-; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967292
+; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 6442450940
; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]]
; UNROLL: vector.body:
; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -4460,10 +4468,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) {
; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8
; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; INTERLEAVE: vector.scevcheck:
-; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649
-; INTERLEAVE-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
+; INTERLEAVE-NEXT: [[TMP5:%.*]] = and i64 [[K]], 2147483648
+; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0
+; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i64 [[K]], -4294967297
+; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296
+; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]]
+; INTERLEAVE-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; INTERLEAVE: vector.ph:
-; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967288
+; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 6442450936
; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]]
; INTERLEAVE: vector.body:
; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
|
; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -2147483648 | ||
; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP2]], -2147483648 | ||
; CHECK-NEXT: ret i32 [[AND]] | ||
; CHECK-NEXT: ret i32 0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is incorrect: https://alive2.llvm.org/ce/z/Bsn7-J
unsigned ShiftAmount = PowerC->logBase2() + 1; | ||
|
||
APInt YMod = YKnown.Zero; | ||
if (YMod.getLoBits(ShiftAmount).isZero()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use YMod.countMinTrailingZeros() > PowerC->logBase2()
.
} | ||
|
||
// (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0 | ||
if (match(Op0, m_Add(m_Value(X), m_Value(Y)))) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please add a commuted test (i..e, (x + y) & (2^C) -> y & 2^C when x % 2^(C+1) == 0
).
Fixies: #152797
alive2: https://alive2.llvm.org/ce/z/h8HYTo
godbolt: https://godbolt.org/z/Mqzs9W8q4