diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index a13d3ceb61320..c238142524f4f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -2476,6 +2476,20 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { return SelectInst::Create(Cmp, ConstantInt::getNullValue(Ty), Y); } + // (x + y) & (2^C) -> x & 2^C when y % 2^(C+1) == 0 + if (match(Op0, m_Add(m_Value(X), m_Value(Y)))) { + const APInt *PowerC; + if (match(Op1, m_Power2(PowerC)) && !PowerC->isOne()) { + KnownBits YKnown = computeKnownBits(Y, &I); + unsigned ShiftAmount = PowerC->logBase2() + 1; + + APInt YMod = YKnown.Zero; + if (YMod.getLoBits(ShiftAmount).isZero()) { + return BinaryOperator::CreateAnd(X, Op1); + } + } + } + // Canonicalize: // (X +/- Y) & Y --> ~X & Y when Y is a power of 2. if (match(&I, m_c_And(m_Value(Y), m_OneUse(m_CombineOr( diff --git a/llvm/test/Transforms/InstCombine/redundant-sum-in-and.ll b/llvm/test/Transforms/InstCombine/redundant-sum-in-and.ll new file mode 100644 index 0000000000000..f698ea6ec30a6 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/redundant-sum-in-and.ll @@ -0,0 +1,40 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i1 @addition_and_bitwise1(ptr %0) { +; CHECK-LABEL: define i1 @addition_and_bitwise1( +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[V0]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[V1]], 2 +; CHECK-NEXT: [[V6:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[V6]] +; + %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4 + %v1 = load i32, ptr %v0, align 4 + %v2 = zext i32 %v1 to i64 + %v3 = ptrtoint ptr %v0 to i64 + %v4 = add i64 %v2, %v3 + %v5 = and i64 %v4, 2 + %v6 = icmp eq i64 %v5, 0 + ret i1 %v6 +} + +define i1 @addition_and_bitwise2(ptr %0) { +; CHECK-LABEL: define i1 @addition_and_bitwise2( +; CHECK-SAME: ptr [[TMP0:%.*]]) { +; CHECK-NEXT: [[V0:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP0]], i64 4 +; CHECK-NEXT: [[V1:%.*]] = load i32, ptr [[V0]], align 16 +; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[V1]], 4 +; CHECK-NEXT: [[V6:%.*]] = icmp eq i32 [[TMP2]], 0 +; CHECK-NEXT: ret i1 [[V6]] +; + %v0 = getelementptr inbounds nuw i8, ptr %0, i64 4 + %v1 = load i32, ptr %v0, align 16 + %v2 = zext i32 %v1 to i64 + %v3 = ptrtoint ptr %v0 to i64 + %v4 = add i64 %v2, %v3 + %v5 = and i64 %v4, 4 + %v6 = icmp eq i64 %v5, 0 + ret i1 %v6 +} diff --git a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll index 920497c07e380..de589198c52eb 100644 --- a/llvm/test/Transforms/InstCombine/rem-mul-shl.ll +++ b/llvm/test/Transforms/InstCombine/rem-mul-shl.ll @@ -903,8 +903,7 @@ define i64 @urem_shl_vscale_overlap() vscale_range(1,16) { ; CHECK-LABEL: @urem_shl_vscale_overlap( ; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[SHIFT:%.*]] = shl nuw nsw i64 [[VSCALE]], 10 -; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[SHIFT]], 2047 -; CHECK-NEXT: [[REM:%.*]] = and i64 [[TMP1]], 1024 +; CHECK-NEXT: [[REM:%.*]] = and i64 [[SHIFT]], 1024 ; CHECK-NEXT: ret i64 [[REM]] ; %vscale = call i64 @llvm.vscale.i64() @@ -956,10 +955,7 @@ define i32 @and_add_shl_vscale_not_power2_negative() vscale_range(1,16) { ; Negative test: the %sign may be 0, https://alive2.llvm.org/ce/z/WU_j4a define i32 @and_add_and (i32 %x) { ; CHECK-LABEL: @and_add_and( -; CHECK-NEXT: [[TMP1:%.*]] = shl i32 [[X:%.*]], 24 -; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -2147483648 -; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP2]], -2147483648 -; CHECK-NEXT: ret i32 [[AND]] +; CHECK-NEXT: ret i32 0 ; %x1 = lshr i32 %x, 7 %sign = and i32 %x1, 1 ; %sign = (%x >> 7) & 1 diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index cbf9cc11d9009..3a2ff4381d07e 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -1854,8 +1854,8 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; INTERLEAVE-NEXT: [[TMP18:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[DOTIDX]] ; INTERLEAVE-NEXT: [[DOTIDX5:%.*]] = shl nsw i64 [[TMP14]], 4 ; INTERLEAVE-NEXT: [[TMP19:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[DOTIDX5]] -; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP18]], align 1 -; INTERLEAVE-NEXT: [[WIDE_VEC3:%.*]] = load <16 x i32>, ptr [[TMP19]], align 1 +; INTERLEAVE-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP18]], align 1, !alias.scope [[META17:![0-9]+]] +; INTERLEAVE-NEXT: [[WIDE_VEC3:%.*]] = load <16 x i32>, ptr [[TMP19]], align 1, !alias.scope [[META17]] ; INTERLEAVE-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], ptr [[P]], i64 [[INDEX]], i32 1 ; INTERLEAVE-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP11]], i32 1 ; INTERLEAVE-NEXT: [[TMP22:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP12]], i32 1 @@ -1865,21 +1865,21 @@ define void @scalarize_induction_variable_04(ptr %a, ptr %p, i32 %n) { ; INTERLEAVE-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP16]], i32 1 ; INTERLEAVE-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], ptr [[P]], i64 [[TMP17]], i32 1 ; INTERLEAVE-NEXT: [[TMP28:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 0 -; INTERLEAVE-NEXT: store i32 [[TMP28]], ptr [[TMP20]], align 1, !alias.scope [[META17:![0-9]+]], !noalias [[META20:![0-9]+]] +; INTERLEAVE-NEXT: store i32 [[TMP28]], ptr [[TMP20]], align 1, !alias.scope [[META20:![0-9]+]], !noalias [[META17]] ; INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 4 -; INTERLEAVE-NEXT: store i32 [[TMP29]], ptr [[TMP21]], align 1, !alias.scope [[META17]], !noalias [[META20]] +; INTERLEAVE-NEXT: store i32 [[TMP29]], ptr [[TMP21]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; INTERLEAVE-NEXT: [[TMP30:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 8 -; INTERLEAVE-NEXT: store i32 [[TMP30]], ptr [[TMP22]], align 1, !alias.scope [[META17]], !noalias [[META20]] +; INTERLEAVE-NEXT: store i32 [[TMP30]], ptr [[TMP22]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; INTERLEAVE-NEXT: [[TMP31:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 12 -; INTERLEAVE-NEXT: store i32 [[TMP31]], ptr [[TMP23]], align 1, !alias.scope [[META17]], !noalias [[META20]] +; INTERLEAVE-NEXT: store i32 [[TMP31]], ptr [[TMP23]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; INTERLEAVE-NEXT: [[TMP32:%.*]] = extractelement <16 x i32> [[WIDE_VEC3]], i64 0 -; INTERLEAVE-NEXT: store i32 [[TMP32]], ptr [[TMP24]], align 1, !alias.scope [[META17]], !noalias [[META20]] +; INTERLEAVE-NEXT: store i32 [[TMP32]], ptr [[TMP24]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; INTERLEAVE-NEXT: [[TMP33:%.*]] = extractelement <16 x i32> [[WIDE_VEC3]], i64 4 -; INTERLEAVE-NEXT: store i32 [[TMP33]], ptr [[TMP25]], align 1, !alias.scope [[META17]], !noalias [[META20]] +; INTERLEAVE-NEXT: store i32 [[TMP33]], ptr [[TMP25]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <16 x i32> [[WIDE_VEC3]], i64 8 -; INTERLEAVE-NEXT: store i32 [[TMP34]], ptr [[TMP26]], align 1, !alias.scope [[META17]], !noalias [[META20]] +; INTERLEAVE-NEXT: store i32 [[TMP34]], ptr [[TMP26]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; INTERLEAVE-NEXT: [[TMP35:%.*]] = extractelement <16 x i32> [[WIDE_VEC3]], i64 12 -; INTERLEAVE-NEXT: store i32 [[TMP35]], ptr [[TMP27]], align 1, !alias.scope [[META17]], !noalias [[META20]] +; INTERLEAVE-NEXT: store i32 [[TMP35]], ptr [[TMP27]], align 1, !alias.scope [[META20]], !noalias [[META17]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; INTERLEAVE-NEXT: [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; INTERLEAVE-NEXT: br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]] @@ -4332,10 +4332,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; IND-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2 ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; IND: vector.scevcheck: -; IND-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 -; IND-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] +; IND-NEXT: [[TMP5:%.*]] = and i64 [[K]], 2147483648 +; IND-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; IND-NEXT: [[TMP7:%.*]] = add i64 [[K]], -4294967297 +; IND-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296 +; IND-NEXT: [[TMP4:%.*]] = or i1 [[TMP6]], [[TMP8]] +; IND-NEXT: br i1 [[TMP4]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: -; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967294 +; IND-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 6442450942 ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4372,10 +4376,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; UNROLL-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4 ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; UNROLL: vector.scevcheck: -; UNROLL-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 -; UNROLL-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] +; UNROLL-NEXT: [[TMP5:%.*]] = and i64 [[K]], 2147483648 +; UNROLL-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; UNROLL-NEXT: [[TMP7:%.*]] = add i64 [[K]], -4294967297 +; UNROLL-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296 +; UNROLL-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]] +; UNROLL-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: -; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967292 +; UNROLL-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 6442450940 ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -4460,10 +4468,14 @@ define void @trunciv(ptr nocapture %a, i32 %start, i64 %k) { ; INTERLEAVE-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8 ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; INTERLEAVE: vector.scevcheck: -; INTERLEAVE-NEXT: [[DOTNOT:%.*]] = icmp ult i64 [[K]], 2147483649 -; INTERLEAVE-NEXT: br i1 [[DOTNOT]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]] +; INTERLEAVE-NEXT: [[TMP5:%.*]] = and i64 [[K]], 2147483648 +; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp ne i64 [[TMP5]], 0 +; INTERLEAVE-NEXT: [[TMP7:%.*]] = add i64 [[K]], -4294967297 +; INTERLEAVE-NEXT: [[TMP8:%.*]] = icmp ult i64 [[TMP7]], -4294967296 +; INTERLEAVE-NEXT: [[TMP9:%.*]] = or i1 [[TMP6]], [[TMP8]] +; INTERLEAVE-NEXT: br i1 [[TMP9]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: -; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 4294967288 +; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i64 [[K]], 6442450936 ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]