diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index b33359c9bb0d6..18c2bef90f08f 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -145,6 +145,16 @@ inline int_pred_ty m_AllOnes() { return int_pred_ty(); } +struct is_zero_int { + bool isValue(const APInt &C) const { return C.isZero(); } +}; + +/// Match an integer 0 or a vector with all elements equal to 0. +/// For vectors, this includes constants with undefined elements. +inline int_pred_ty m_ZeroInt() { + return int_pred_ty(); +} + /// Matching combinators template struct match_combine_or { LTy L; @@ -397,6 +407,13 @@ m_c_Mul(const Op0_t &Op0, const Op1_t &Op1) { return m_c_Binary(Op0, Op1); } +/// Match a binary AND operation. +template +inline AllRecipe_commutative_match +m_c_BinaryAnd(const Op0_t &Op0, const Op1_t &Op1) { + return m_c_Binary(Op0, Op1); +} + /// Match a binary OR operation. Note that while conceptually the operands can /// be matched commutatively, \p Commutative defaults to false in line with the /// IR-based pattern matching infrastructure. Use m_c_BinaryOr for a commutative diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d32d2a9ad11f7..6c5f9b7302292 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1084,6 +1084,13 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } + // AND x, 0 -> 0 + if (match(&R, m_c_BinaryAnd(m_VPValue(X), m_ZeroInt()))) { + Def->replaceAllUsesWith(R.getOperand(0) == X ? R.getOperand(1) + : R.getOperand(0)); + return; + } + if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) return Def->replaceAllUsesWith(X); diff --git a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll index da1a5aa3a9f04..db54ca61f715b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/constant-fold.ll @@ -243,3 +243,188 @@ loop.latch: exit: ret void } + +define void @redundant_and_1(ptr %dst, i1 %c.0, i1 %c.1) { +; CHECK-LABEL: @redundant_and_1( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT]], splat (i1 true) +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_1:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = select <4 x i1> , <4 x i1> [[BROADCAST_SPLAT2]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = select <4 x i1> [[TMP2]], <4 x i1> [[TMP5]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 +; CHECK-NEXT: store i32 0, ptr [[TMP9]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK: pred.store.if3: +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 +; CHECK-NEXT: store i32 0, ptr [[TMP12]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK: pred.store.if5: +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 +; CHECK-NEXT: store i32 0, ptr [[TMP15]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue6: +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] +; CHECK: pred.store.if7: +; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 +; CHECK-NEXT: store i32 0, ptr [[TMP18]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] +; CHECK: pred.store.continue8: +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK: then.1: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 +; CHECK-NEXT: [[OR:%.*]] = or i1 [[CMP]], false +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1]], i1 false +; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] +; CHECK: then.2: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 %c.0, label %loop.latch, label %then.1 + +then.1: + %cmp = icmp eq i32 %iv, 2 + %or = or i1 %cmp, false + %cond = select i1 %or, i1 %c.1, i1 false + br i1 %cond, label %then.2, label %loop.latch + +then.2: + %gep = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 0, ptr %gep, align 4 + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 3 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} + +define void @redundant_and_2(ptr %dst, i1 %c.0, i1 %c.1) { +; CHECK-LABEL: @redundant_and_2( +; CHECK-NEXT: entry: +; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i1> poison, i1 [[C_0:%.*]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT1]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP0:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT2]], splat (i1 true) +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> , <4 x i1> [[TMP0]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i1> zeroinitializer, <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i32 0 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] +; CHECK: pred.store.if: +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[DST:%.*]], i32 0 +; CHECK-NEXT: store i32 0, ptr [[TMP7]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE]] +; CHECK: pred.store.continue: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP4]], i32 1 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_STORE_IF3:%.*]], label [[PRED_STORE_CONTINUE4:%.*]] +; CHECK: pred.store.if1: +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 1 +; CHECK-NEXT: store i32 0, ptr [[TMP10]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE4]] +; CHECK: pred.store.continue2: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i32 2 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_STORE_IF5:%.*]], label [[PRED_STORE_CONTINUE6:%.*]] +; CHECK: pred.store.if3: +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 2 +; CHECK-NEXT: store i32 0, ptr [[TMP13]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE6]] +; CHECK: pred.store.continue4: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP4]], i32 3 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF7:%.*]], label [[PRED_STORE_CONTINUE8:%.*]] +; CHECK: pred.store.if5: +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 3 +; CHECK-NEXT: store i32 0, ptr [[TMP16]], align 4 +; CHECK-NEXT: br label [[PRED_STORE_CONTINUE8]] +; CHECK: pred.store.continue6: +; CHECK-NEXT: br label [[MIDDLE_BLOCK:%.*]] +; CHECK: middle.block: +; CHECK-NEXT: br label [[EXIT:%.*]] +; CHECK: scalar.ph: +; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: loop.header: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: br i1 [[C_0]], label [[LOOP_LATCH]], label [[THEN_1:%.*]] +; CHECK: then.1: +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[IV]], 2 +; CHECK-NEXT: [[OR:%.*]] = and i1 false, [[CMP]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR]], i1 [[C_1:%.*]], i1 false +; CHECK-NEXT: br i1 [[COND]], label [[THEN_2:%.*]], label [[LOOP_LATCH]] +; CHECK: then.2: +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i32, ptr [[DST]], i32 [[IV]] +; CHECK-NEXT: store i32 0, ptr [[GEP]], align 4 +; CHECK-NEXT: br label [[LOOP_LATCH]] +; CHECK: loop.latch: +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV_NEXT]], 3 +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: ret void +; +entry: + br label %loop.header + +loop.header: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ] + br i1 %c.0, label %loop.latch, label %then.1 + +then.1: + %cmp = icmp eq i32 %iv, 2 + %or = and i1 false, %cmp + %cond = select i1 %or, i1 %c.1, i1 false + br i1 %cond, label %then.2, label %loop.latch + +then.2: + %gep = getelementptr inbounds i32, ptr %dst, i32 %iv + store i32 0, ptr %gep, align 4 + br label %loop.latch + +loop.latch: + %iv.next = add nuw nsw i32 %iv, 1 + %ec = icmp eq i32 %iv.next, 3 + br i1 %ec, label %exit, label %loop.header + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll index aecfc668cf293..7fa911feb8db5 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-constant-known-via-scev.ll @@ -77,14 +77,12 @@ define i64 @second_lshr_operand_zero_via_scev() { ; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND2]], splat (i32 2) ; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <2 x i64> [[VEC_IND]], zeroinitializer ; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i64> [[STEP_ADD]], zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = and <2 x i64> [[VEC_IND]], zeroinitializer -; CHECK-NEXT: [[TMP3:%.*]] = and <2 x i64> [[STEP_ADD]], zeroinitializer ; CHECK-NEXT: [[TMP4:%.*]] = lshr <2 x i32> [[VEC_IND2]], zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = lshr <2 x i32> [[STEP_ADD4]], zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = zext <2 x i32> [[TMP4]] to <2 x i64> ; CHECK-NEXT: [[TMP7:%.*]] = zext <2 x i32> [[TMP5]] to <2 x i64> -; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP0]], <2 x i64> [[TMP2]], <2 x i64> [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> [[TMP3]], <2 x i64> [[TMP7]] +; CHECK-NEXT: [[TMP8:%.*]] = select <2 x i1> [[TMP0]], <2 x i64> zeroinitializer, <2 x i64> [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = select <2 x i1> [[TMP1]], <2 x i64> zeroinitializer, <2 x i64> [[TMP7]] ; CHECK-NEXT: [[TMP10]] = or <2 x i64> [[TMP8]], [[VEC_PHI]] ; CHECK-NEXT: [[TMP11]] = or <2 x i64> [[TMP9]], [[VEC_PHI1]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 diff --git a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll index 9e94768fc2cbc..089fc99ff8ba5 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-extended-icmps.ll @@ -129,16 +129,8 @@ define void @ext_cmp(ptr %src.1, ptr %src.2, ptr noalias %dst) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[SRC_1]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i16>, ptr [[TMP1]], align 2 -; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt <4 x i16> zeroinitializer, [[WIDE_LOAD]] -; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[SRC_2]], i64 [[INDEX]] -; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i8>, ptr [[TMP4]], align 2 -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i16> -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> zeroinitializer, <4 x i16> [[TMP6]] -; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i16> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]] -; CHECK-NEXT: store <4 x i16> [[TMP8]], ptr [[TMP9]], align 2 +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i16, ptr [[DST]], i64 [[INDEX]] +; CHECK-NEXT: store <4 x i16> zeroinitializer, ptr [[TMP0]], align 2 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000 ; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]