diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 1f10058ab4a9a..83d33e73c6145 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -1722,7 +1722,9 @@ class VPHistogramRecipe : public VPRecipeBase { #endif }; -/// A recipe for widening select instructions. +/// A recipe for widening select instructions. Supports both wide vector and +/// single-scalar conditions, matching the behavior of LLVM IR's select +/// instruction. struct LLVM_ABI_FOR_TEST VPWidenSelectRecipe : public VPRecipeWithIRFlags, public VPIRMetadata { VPWidenSelectRecipe(SelectInst &I, ArrayRef Operands) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index bde62dd6dd4bc..8a78ea65aff6e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -655,7 +655,9 @@ Value *VPInstruction::generate(VPTransformState &State) { } case Instruction::Select: { bool OnlyFirstLaneUsed = vputils::onlyFirstLaneUsed(this); - Value *Cond = State.get(getOperand(0), OnlyFirstLaneUsed); + Value *Cond = + State.get(getOperand(0), + OnlyFirstLaneUsed || vputils::isSingleScalar(getOperand(0))); Value *Op1 = State.get(getOperand(1), OnlyFirstLaneUsed); Value *Op2 = State.get(getOperand(2), OnlyFirstLaneUsed); return Builder.CreateSelect(Cond, Op1, Op2, Name); @@ -1962,16 +1964,13 @@ void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, getOperand(1)->printAsOperand(O, SlotTracker); O << ", "; getOperand(2)->printAsOperand(O, SlotTracker); - O << (isInvariantCond() ? " (condition is loop invariant)" : ""); + O << (vputils::isSingleScalar(getCond()) ? " (condition is single-scalar)" + : ""); } #endif void VPWidenSelectRecipe::execute(VPTransformState &State) { - // The condition can be loop invariant but still defined inside the - // loop. This means that we can't just use the original 'cond' value. - // We have to take the 'vectorized' value and pick the first lane. - // Instcombine will make this a no-op. - Value *Cond = State.get(getCond(), isInvariantCond()); + Value *Cond = State.get(getCond(), vputils::isSingleScalar(getCond())); Value *Op0 = State.get(getOperand(1)); Value *Op1 = State.get(getOperand(2)); diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 986c801abf684..4ff627051671e 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1282,6 +1282,21 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { return; } + if (match(Def, m_BuildVector()) && all_equal(R.operands())) { + Def->replaceAllUsesWith( + Builder.createNaryOp(VPInstruction::Broadcast, Def->getOperand(0))); + return; + } + + // Look through broadcast of single-scalar when used as select conditions; in + // that case the scalar condition can be used directly. + if (match(Def, + m_Select(m_Broadcast(m_VPValue(C)), m_VPValue(), m_VPValue())) && + vputils::isSingleScalar(C)) { + Def->setOperand(0, C); + return; + } + if (auto *Phi = dyn_cast(Def)) { if (Phi->getNumOperands() == 1) Phi->replaceAllUsesWith(Phi->getOperand(0)); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll index dc52e644742e2..a49f089bd2085 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-iv-select-cmp.ll @@ -150,10 +150,11 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; CHECK-NEXT: [[STEP_ADD_2:%.*]] = add <4 x i32> [[STEP_ADD]], splat (i32 4) ; CHECK-NEXT: [[STEP_ADD_3:%.*]] = add <4 x i32> [[STEP_ADD_2]], splat (i32 4) -; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] -; CHECK-NEXT: [[TMP4]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI2]] -; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI3]] -; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP1]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI4]] +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], <4 x i32> [[VEC_IND]], <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[TMP4]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD]], <4 x i32> [[VEC_PHI2]] +; CHECK-NEXT: [[TMP5]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD_2]], <4 x i32> [[VEC_PHI3]] +; CHECK-NEXT: [[TMP6]] = select i1 [[TMP2]], <4 x i32> [[STEP_ADD_3]], <4 x i32> [[VEC_PHI4]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD_3]], splat (i32 4) ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -191,7 +192,8 @@ define i32 @select_icmp_var_start_iv_trunc(i32 %N, i32 %start) #0 { ; CHECK-NEXT: [[INDEX11:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT17:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI12:%.*]] = phi <4 x i32> [ [[DOTSPLAT14]], %[[VEC_EPILOG_PH]] ], [ [[TMP14:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ [[INDUCTION]], %[[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT16:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP14]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]] +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0 +; CHECK-NEXT: [[TMP14]] = select i1 [[TMP13]], <4 x i32> [[VEC_IND15]], <4 x i32> [[VEC_PHI12]] ; CHECK-NEXT: [[INDEX_NEXT17]] = add nuw i64 [[INDEX11]], 4 ; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], splat (i32 4) ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT17]], [[N_VEC8]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll index 56a1abd2384c8..3c83c01929aae 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmax-without-fast-math-flags.ll @@ -66,13 +66,11 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP15:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP18]], [[TMP15]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll index d4f1227a38bda..711a9cd03ac15 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/fmin-without-fast-math-flags.ll @@ -66,13 +66,11 @@ define float @fminnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.minnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll index bfee39eac0ae2..068f82c7db670 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/force-target-instruction-cost.ll @@ -365,8 +365,8 @@ define void @invalid_legacy_cost(i64 %N, ptr %x) #0 { ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP6:%.*]] = alloca i8, i64 0, align 16 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i32 0 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x ptr> [[TMP7]], ptr [[TMP6]], i32 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP6]], i64 0 +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP9:%.*]] = getelementptr ptr, ptr [[X]], i64 [[INDEX]] ; CHECK-NEXT: store <2 x ptr> [[TMP8]], ptr [[TMP9]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll b/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll index d80fdd1ce7270..9dfb987bd24a6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/invariant-replicate-region.ll @@ -11,8 +11,6 @@ define i32 @test_invariant_replicate_region(i32 %x, i1 %c) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_UREM_CONTINUE6:.*]] ] @@ -43,8 +41,8 @@ define i32 @test_invariant_replicate_region(i32 %x, i1 %c) { ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP13]], i32 3 ; CHECK-NEXT: br label %[[PRED_UREM_CONTINUE6]] ; CHECK: [[PRED_UREM_CONTINUE6]]: -; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP11]], %[[PRED_UREM_CONTINUE4]] ], [ [[TMP14]], %[[PRED_UREM_IF5]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP15]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ [[TMP11]], %[[PRED_UREM_CONTINUE4]] ], [ [[TMP14]], %[[PRED_UREM_IF5]] ] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll index ea0148952f51b..0a9494e4c7ade 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/licm-calls.ll @@ -10,8 +10,8 @@ define void @licm_replicate_call(double %x, ptr %dst) { ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP1:%.*]] = tail call double @llvm.pow.f64(double [[X]], double 3.000000e+00) -; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[TMP1]], i32 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll index 157b78704234a..3311cbc11881b 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/masked-call-scalarize.ll @@ -64,11 +64,12 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE6:%.*]] ] ; TFCOMMON-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE6]] ] ; TFCOMMON-NEXT: [[LD:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR3:[0-9]+]] -; TFCOMMON-NEXT: [[TMP7:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i32 0 -; TFCOMMON-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[TMP7]], double [[TMP5]], i32 1 +; TFCOMMON-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[LD]]) #[[ATTR2:[0-9]+]] +; TFCOMMON-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 +; TFCOMMON-NEXT: [[TMP8:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFCOMMON-NEXT: [[TMP9:%.*]] = fcmp ogt <2 x double> [[TMP8]], zeroinitializer -; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP9]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) +; TFCOMMON-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP9]], i32 0 +; TFCOMMON-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP7]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFCOMMON-NEXT: [[TMP16:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; TFCOMMON-NEXT: br i1 [[TMP16]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; TFCOMMON: pred.store.if: @@ -79,7 +80,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFCOMMON-NEXT: [[TMP14:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFCOMMON-NEXT: br i1 [[TMP14]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.if1: -; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 1 +; TFCOMMON-NEXT: [[TMP19:%.*]] = extractelement <2 x double> [[PREDPHI]], i32 0 ; TFCOMMON-NEXT: store double [[TMP19]], ptr [[P]], align 8 ; TFCOMMON-NEXT: br label [[PRED_STORE_CONTINUE6]] ; TFCOMMON: pred.store.continue2: @@ -105,11 +106,12 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[ACTIVE_LANE_MASK2:%.*]] = phi <2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY1]], [[ENTRY]] ], [ [[ACTIVE_LANE_MASK_NEXT10:%.*]], [[PRED_STORE_CONTINUE9]] ] ; TFA_INTERLEAVE-NEXT: [[TMP4:%.*]] = load double, ptr [[P2:%.*]], align 8 -; TFA_INTERLEAVE-NEXT: [[TMP9:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR3:[0-9]+]] -; TFA_INTERLEAVE-NEXT: [[TMP11:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i32 0 -; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = insertelement <2 x double> [[TMP11]], double [[TMP9]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP5:%.*]] = tail call double @llvm.exp.f64(double [[TMP4]]) #[[ATTR2:[0-9]+]] +; TFA_INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0 +; TFA_INTERLEAVE-NEXT: [[TMP12:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer ; TFA_INTERLEAVE-NEXT: [[TMP14:%.*]] = fcmp ogt <2 x double> [[TMP12]], zeroinitializer -; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select <2 x i1> [[TMP14]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) +; TFA_INTERLEAVE-NEXT: [[TMP7:%.*]] = extractelement <2 x i1> [[TMP14]], i32 0 +; TFA_INTERLEAVE-NEXT: [[PREDPHI3:%.*]] = select i1 [[TMP7]], <2 x double> zeroinitializer, <2 x double> splat (double 1.000000e+00) ; TFA_INTERLEAVE-NEXT: [[TMP19:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 0 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP19]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]] ; TFA_INTERLEAVE: pred.store.if: @@ -120,7 +122,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP29:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP29]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]] ; TFA_INTERLEAVE: pred.store.if3: -; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP22:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP22]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE5]] ; TFA_INTERLEAVE: pred.store.continue4: @@ -134,7 +136,7 @@ define void @test_widen_exp_v2(ptr noalias %p2, ptr noalias %p, i64 %n) #5 { ; TFA_INTERLEAVE-NEXT: [[TMP25:%.*]] = extractelement <2 x i1> [[ACTIVE_LANE_MASK2]], i32 1 ; TFA_INTERLEAVE-NEXT: br i1 [[TMP25]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.if7: -; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 1 +; TFA_INTERLEAVE-NEXT: [[TMP34:%.*]] = extractelement <2 x double> [[PREDPHI3]], i32 0 ; TFA_INTERLEAVE-NEXT: store double [[TMP34]], ptr [[P]], align 8 ; TFA_INTERLEAVE-NEXT: br label [[PRED_STORE_CONTINUE9]] ; TFA_INTERLEAVE: pred.store.continue8: diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll index 01b4502308c95..f88778b991b0b 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/divrem.ll @@ -285,7 +285,8 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = udiv [[WIDE_LOAD]], [[TMP10]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[TMP6]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] @@ -304,14 +305,16 @@ define void @predicated_udiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; FIXED-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[TMP8:%.*]] = udiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; FIXED-NEXT: [[PREDPHI2:%.*]] = select i1 [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -366,7 +369,8 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; CHECK-NEXT: [[TMP16:%.*]] = select [[TMP15]], [[TMP6]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = select [[TMP16]], [[BROADCAST_SPLAT]], splat (i64 1) ; CHECK-NEXT: [[TMP11:%.*]] = sdiv [[WIDE_LOAD]], [[TMP10]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[TMP6]], [[TMP11]], [[WIDE_LOAD]] +; CHECK-NEXT: [[TMP9:%.*]] = extractelement [[TMP6]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], [[TMP11]], [[WIDE_LOAD]] ; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0( [[PREDPHI]], ptr align 8 [[TMP8]], splat (i1 true), i32 [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = zext i32 [[TMP12]] to i64 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[TMP13]], [[INDEX]] @@ -385,14 +389,16 @@ define void @predicated_sdiv(ptr noalias nocapture %a, i64 %v, i64 %n) { ; FIXED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[V:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: [[TMP0:%.*]] = icmp ne <4 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; FIXED-NEXT: [[TMP5:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) +; FIXED-NEXT: [[TMP1:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; FIXED-NEXT: [[TMP5:%.*]] = select i1 [[TMP1]], <4 x i64> [[BROADCAST_SPLAT]], <4 x i64> splat (i64 1) ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; FIXED-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[INDEX]] ; FIXED-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i64>, ptr [[TMP2]], align 8 ; FIXED-NEXT: [[TMP8:%.*]] = sdiv <4 x i64> [[WIDE_LOAD1]], [[TMP5]] -; FIXED-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[TMP0]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] +; FIXED-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 +; FIXED-NEXT: [[PREDPHI2:%.*]] = select i1 [[TMP6]], <4 x i64> [[TMP8]], <4 x i64> [[WIDE_LOAD1]] ; FIXED-NEXT: store <4 x i64> [[PREDPHI2]], ptr [[TMP2]], align 8 ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; FIXED-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 @@ -661,14 +667,12 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i8 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector [[BROADCAST_SPLATINSERT3]], poison, zeroinitializer ; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i32 [[N_VEC]] to i16 ; CHECK-NEXT: [[TMP4:%.*]] = add i16 -12, [[DOTCAST]] ; CHECK-NEXT: [[DOTCAST5:%.*]] = trunc i32 [[N_VEC]] to i8 ; CHECK-NEXT: [[TMP5:%.*]] = add i8 -12, [[DOTCAST5]] -; CHECK-NEXT: [[TMP6:%.*]] = select [[BROADCAST_SPLAT4]], splat (i8 1), [[BROADCAST_SPLAT2]] -; CHECK-NEXT: [[TMP7:%.*]] = select [[BROADCAST_SPLAT4]], splat (i16 1), [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[C:%.*]], splat (i8 1), [[BROADCAST_SPLAT2]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[C]], splat (i16 1), [[BROADCAST_SPLAT]] ; CHECK-NEXT: [[TMP8:%.*]] = call @llvm.stepvector.nxv2i8() ; CHECK-NEXT: [[TMP9:%.*]] = mul [[TMP8]], splat (i8 1) ; CHECK-NEXT: [[INDUCTION:%.*]] = add splat (i8 -12), [[TMP9]] @@ -683,7 +687,7 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; CHECK-NEXT: [[TMP12:%.*]] = zext [[TMP11]] to ; CHECK-NEXT: [[TMP13:%.*]] = sdiv [[TMP12]], [[TMP7]] ; CHECK-NEXT: [[TMP14:%.*]] = sext [[TMP13]] to -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[BROADCAST_SPLAT4]], zeroinitializer, [[TMP14]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], zeroinitializer, [[TMP14]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]] ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[BROADCAST_SPLAT7]] ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -727,10 +731,8 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; FIXED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer ; FIXED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i8> poison, i8 [[X:%.*]], i64 0 ; FIXED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT1]], <4 x i8> poison, <4 x i32> zeroinitializer -; FIXED-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 -; FIXED-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT3]], <4 x i1> poison, <4 x i32> zeroinitializer -; FIXED-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i8> splat (i8 1), <4 x i8> [[BROADCAST_SPLAT2]] -; FIXED-NEXT: [[TMP1:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i16> splat (i16 1), <4 x i16> [[BROADCAST_SPLAT]] +; FIXED-NEXT: [[TMP0:%.*]] = select i1 [[C:%.*]], <4 x i8> splat (i8 1), <4 x i8> [[BROADCAST_SPLAT2]] +; FIXED-NEXT: [[TMP1:%.*]] = select i1 [[C]], <4 x i16> splat (i16 1), <4 x i16> [[BROADCAST_SPLAT]] ; FIXED-NEXT: br label [[VECTOR_BODY:%.*]] ; FIXED: vector.body: ; FIXED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -739,7 +741,7 @@ define i32 @udiv_sdiv_with_invariant_divisors(i8 %x, i16 %y, i1 %c) { ; FIXED-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[TMP2]] to <4 x i16> ; FIXED-NEXT: [[TMP4:%.*]] = sdiv <4 x i16> [[TMP3]], [[TMP1]] ; FIXED-NEXT: [[TMP5:%.*]] = sext <4 x i16> [[TMP4]] to <4 x i32> -; FIXED-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT4]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]] +; FIXED-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <4 x i32> zeroinitializer, <4 x i32> [[TMP5]] ; FIXED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; FIXED-NEXT: [[VEC_IND_NEXT]] = add <4 x i8> [[VEC_IND]], splat (i8 4) ; FIXED-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 12 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index 1c6954c187e5f..212a5c99676f4 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -40,7 +40,7 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) { ; CHECK-NEXT: [[AVL:%.*]] = phi i32 [ [[TMP3]], [[VECTOR_PH]] ], [ [[AVL_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[AVL]], i32 4, i1 true) ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv4i32.nxv4p0( align 4 [[BROADCAST_SPLAT]], [[TMP13]], i32 [[TMP10]]), !alias.scope [[META0:![0-9]+]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select [[BROADCAST_SPLAT1]], zeroinitializer, [[WIDE_MASKED_GATHER]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], zeroinitializer, [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0( [[PREDPHI]], align 4 [[BROADCAST_SPLAT4]], splat (i1 true), i32 [[TMP10]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] ; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i32 [[AVL]], [[TMP10]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[AVL_NEXT]], 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll index baedf34b5548f..6ec010cdcc248 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll @@ -1193,7 +1193,7 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 { ; CHECK: pred.udiv.continue62: ; CHECK-NEXT: [[TMP161:%.*]] = phi <32 x i32> [ [[TMP156]], [[PRED_UDIV_CONTINUE60]] ], [ [[TMP160]], [[PRED_UDIV_IF61]] ] ; CHECK-NEXT: [[TMP162:%.*]] = zext <32 x i32> [[TMP161]] to <32 x i64> -; CHECK-NEXT: [[PREDPHI:%.*]] = select <32 x i1> [[BROADCAST_SPLAT]], <32 x i64> zeroinitializer, <32 x i64> [[TMP162]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <32 x i64> zeroinitializer, <32 x i64> [[TMP162]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32) ; CHECK-NEXT: [[TMP163:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 @@ -1289,7 +1289,7 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 { ; CHECK: pred.udiv.continue84: ; CHECK-NEXT: [[TMP206:%.*]] = phi <8 x i32> [ [[TMP201]], [[PRED_UDIV_CONTINUE82]] ], [ [[TMP205]], [[PRED_UDIV_IF83]] ] ; CHECK-NEXT: [[TMP207:%.*]] = zext <8 x i32> [[TMP206]] to <8 x i64> -; CHECK-NEXT: [[PREDPHI85:%.*]] = select <8 x i1> [[BROADCAST_SPLAT64]], <8 x i64> zeroinitializer, <8 x i64> [[TMP207]] +; CHECK-NEXT: [[PREDPHI85:%.*]] = select i1 [[C]], <8 x i64> zeroinitializer, <8 x i64> [[TMP207]] ; CHECK-NEXT: [[INDEX_NEXT86]] = add nuw i32 [[INDEX67]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT87]] = add <8 x i32> [[VEC_IND68]], splat (i32 8) ; CHECK-NEXT: [[TMP208:%.*]] = icmp eq i32 [[INDEX_NEXT86]], 1000 diff --git a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll index 0bc86fff9831b..7e5964ac30cba 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/invariant-load-gather.ll @@ -37,7 +37,8 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX]] ; CHECK-NEXT: store <16 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP2]], align 4, !alias.scope [[META0:![0-9]+]], !noalias [[META3:![0-9]+]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> align 4 [[BROADCAST_SPLAT]], <16 x i1> [[TMP1]], <16 x i32> poison), !alias.scope [[META3]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <16 x i1> [[TMP1]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP9]], <16 x i32> [[WIDE_MASKED_GATHER]], <16 x i32> splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 ; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] @@ -63,7 +64,8 @@ define i32 @inv_load_conditional(ptr %a, i64 %n, ptr %b, i32 %k) { ; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[INDEX10]] ; CHECK-NEXT: store <8 x i32> [[BROADCAST_SPLAT12]], ptr [[TMP6]], align 4, !alias.scope [[META0]], !noalias [[META3]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER13:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> align 4 [[BROADCAST_SPLAT9]], <8 x i1> [[TMP5]], <8 x i32> poison), !alias.scope [[META3]] -; CHECK-NEXT: [[PREDPHI14:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[WIDE_MASKED_GATHER13]], <8 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP5]], i32 0 +; CHECK-NEXT: [[PREDPHI14:%.*]] = select i1 [[TMP10]], <8 x i32> [[WIDE_MASKED_GATHER13]], <8 x i32> splat (i32 1) ; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX10]], 8 ; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC7]] ; CHECK-NEXT: br i1 [[TMP7]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll index 619693abf51e4..57cbe7f4c241b 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/pr141968-instsimplifyfolder.ll @@ -97,8 +97,7 @@ define i8 @pr141968(i1 %cond, i8 %v) { ; CHECK: [[PRED_SDIV_IF29]]: ; CHECK-NEXT: br label %[[PRED_SDIV_CONTINUE30]] ; CHECK: [[PRED_SDIV_CONTINUE30]]: -; CHECK-NEXT: [[TMP18:%.*]] = extractelement <16 x i1> [[BROADCAST_SPLAT]], i32 0 -; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP18]], i8 0, i8 [[V]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], i8 0, i8 [[V]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 16 ; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256 ; CHECK-NEXT: br i1 [[TMP17]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll index 03087bb883464..715d6db50488f 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicate-recipe-with-only-first-lane-used.ll @@ -199,10 +199,6 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) { ; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[INDEX]], 7 ; CHECK-NEXT: [[TMP6:%.*]] = load float, ptr [[A]], align 4 ; CHECK-NEXT: [[TMP10:%.*]] = fcmp ogt float [[TMP6]], 0.000000e+00 -; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i32 0 -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i1> [[TMP8]], i1 [[TMP10]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = insertelement <4 x i1> [[TMP9]], i1 [[TMP10]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i1> [[TMP13]], i1 [[TMP10]], i32 3 ; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP2]] ; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP3]] ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds float, ptr [[B]], i64 [[TMP4]] @@ -211,7 +207,7 @@ define float @uniform_load_replicating_select(ptr %A, ptr %B, i64 %1) { ; CHECK-NEXT: [[TMP20:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP16]] ; CHECK-NEXT: [[TMP21:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP17]] ; CHECK-NEXT: [[TMP22:%.*]] = select i1 [[TMP10]], ptr [[A]], ptr [[TMP18]] -; CHECK-NEXT: [[TMP23:%.*]] = select <4 x i1> [[TMP14]], <4 x float> splat (float 1.000000e+01), <4 x float> splat (float 1.000000e+00) +; CHECK-NEXT: [[TMP23:%.*]] = select i1 [[TMP10]], <4 x float> splat (float 1.000000e+01), <4 x float> splat (float 1.000000e+00) ; CHECK-NEXT: [[TMP24:%.*]] = load float, ptr [[TMP19]], align 4 ; CHECK-NEXT: [[TMP25:%.*]] = load float, ptr [[TMP20]], align 4 ; CHECK-NEXT: [[TMP26:%.*]] = load float, ptr [[TMP21]], align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll index 22eb0ca380033..9cd5625e5f8e6 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/vector-scalar-select-cost.ll @@ -23,8 +23,8 @@ define void @scalarselect(i1 %cond) { %7 = getelementptr inbounds [2048 x i32], ptr @a, i64 0, i64 %indvars.iv ; CHECK: cost of 1 for VF 1 {{.*}} select i1 %cond, i32 %6, i32 0 -; CHECK: Cost of 2 for VF 2: WIDEN-SELECT ir<%sel> = select ir<%cond>, ir<%6>, ir<0> (condition is loop invariant) -; CHECK: Cost of 2 for VF 4: WIDEN-SELECT ir<%sel> = select ir<%cond>, ir<%6>, ir<0> (condition is loop invariant) +; CHECK: Cost of 2 for VF 2: WIDEN-SELECT ir<%sel> = select ir<%cond>, ir<%6>, ir<0> (condition is single-scalar) +; CHECK: Cost of 2 for VF 4: WIDEN-SELECT ir<%sel> = select ir<%cond>, ir<%6>, ir<0> (condition is single-scalar) %sel = select i1 %cond, i32 %6, i32 zeroinitializer store i32 %sel, ptr %7, align 4 diff --git a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll index deef94aa3fe9d..67fe87a328976 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/x86-predication.ll @@ -39,7 +39,7 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; CHECK: pred.sdiv.continue2: ; CHECK-NEXT: [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_SDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_SDIV_IF1]] ] ; CHECK-NEXT: [[TMP15:%.*]] = add nsw <2 x i32> [[TMP14]], [[WIDE_LOAD]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <2 x i32> [[TMP15]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP17]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 @@ -127,7 +127,7 @@ define i32 @predicated_sdiv_masked_load(ptr %a, ptr %b, i32 %x, i1 %c) { ; SINK-GATHER: pred.sdiv.continue14: ; SINK-GATHER-NEXT: [[TMP44:%.*]] = phi <8 x i32> [ [[TMP39]], [[PRED_SDIV_CONTINUE12]] ], [ [[TMP43]], [[PRED_SDIV_IF13]] ] ; SINK-GATHER-NEXT: [[TMP45:%.*]] = add nsw <8 x i32> [[TMP44]], [[WIDE_LOAD]] -; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP45]], <8 x i32> [[WIDE_LOAD]] +; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <8 x i32> [[TMP45]], <8 x i32> [[WIDE_LOAD]] ; SINK-GATHER-NEXT: [[TMP47]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; SINK-GATHER-NEXT: [[TMP48:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 @@ -179,15 +179,13 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i1> poison, i1 [[TMP1:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT4]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE4:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE4]] ] -; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; CHECK-NEXT: br i1 [[TMP1:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: ; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP0]], 777 @@ -199,7 +197,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK: pred.udiv.continue: ; CHECK-NEXT: [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; CHECK-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4]] -; CHECK: pred.udiv.if3: +; CHECK: pred.udiv.if1: ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP7]], 777 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] @@ -207,9 +205,9 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], [[X]] ; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP13]], i32 1 ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE4]] -; CHECK: pred.udiv.continue4: +; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF3]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP1]], <2 x i32> [[TMP16]], <2 x i32> [[BROADCAST_SPLAT4]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] @@ -250,8 +248,6 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER: vector.ph: ; SINK-GATHER-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 8 ; SINK-GATHER-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] -; SINK-GATHER-NEXT: [[BROADCAST_SPLATINSERT16:%.*]] = insertelement <8 x i1> poison, i1 [[TMP1:%.*]], i64 0 -; SINK-GATHER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i1> [[BROADCAST_SPLATINSERT16]], <8 x i1> poison, <8 x i32> zeroinitializer ; SINK-GATHER-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i32> poison, i32 [[X:%.*]], i64 0 ; SINK-GATHER-NEXT: [[BROADCAST_SPLAT16:%.*]] = shufflevector <8 x i32> [[BROADCAST_SPLATINSERT1]], <8 x i32> poison, <8 x i32> zeroinitializer ; SINK-GATHER-NEXT: br label [[VECTOR_BODY:%.*]] @@ -260,7 +256,7 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE16]] ] ; SINK-GATHER-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP66:%.*]], [[PRED_UDIV_CONTINUE16]] ] ; SINK-GATHER-NEXT: [[TMP0:%.*]] = mul <8 x i64> [[VEC_IND]], splat (i64 777) -; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; SINK-GATHER-NEXT: br i1 [[TMP1:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; SINK-GATHER: pred.udiv.if: ; SINK-GATHER-NEXT: [[TMP2:%.*]] = extractelement <8 x i64> [[TMP0]], i32 0 ; SINK-GATHER-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP2]] @@ -271,76 +267,76 @@ define i32 @scalarize_and_sink_gather(ptr %a, i1 %c, i32 %x, i64 %n) { ; SINK-GATHER: pred.udiv.continue: ; SINK-GATHER-NEXT: [[TMP8:%.*]] = phi <8 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]] -; SINK-GATHER: pred.udiv.if3: +; SINK-GATHER: pred.udiv.if1: ; SINK-GATHER-NEXT: [[TMP10:%.*]] = extractelement <8 x i64> [[TMP0]], i32 1 ; SINK-GATHER-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP10]] ; SINK-GATHER-NEXT: [[TMP12:%.*]] = load i32, ptr [[TMP11]], align 4 ; SINK-GATHER-NEXT: [[TMP13:%.*]] = udiv i32 [[TMP12]], [[X]] ; SINK-GATHER-NEXT: [[TMP14:%.*]] = insertelement <8 x i32> [[TMP8]], i32 [[TMP13]], i32 1 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE4]] -; SINK-GATHER: pred.udiv.continue4: +; SINK-GATHER: pred.udiv.continue2: ; SINK-GATHER-NEXT: [[TMP16:%.*]] = phi <8 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP14]], [[PRED_UDIV_IF5]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]] -; SINK-GATHER: pred.udiv.if5: +; SINK-GATHER: pred.udiv.if3: ; SINK-GATHER-NEXT: [[TMP18:%.*]] = extractelement <8 x i64> [[TMP0]], i32 2 ; SINK-GATHER-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP18]] ; SINK-GATHER-NEXT: [[TMP20:%.*]] = load i32, ptr [[TMP19]], align 4 ; SINK-GATHER-NEXT: [[TMP21:%.*]] = udiv i32 [[TMP20]], [[X]] ; SINK-GATHER-NEXT: [[TMP22:%.*]] = insertelement <8 x i32> [[TMP16]], i32 [[TMP21]], i32 2 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE6]] -; SINK-GATHER: pred.udiv.continue6: +; SINK-GATHER: pred.udiv.continue4: ; SINK-GATHER-NEXT: [[TMP24:%.*]] = phi <8 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP22]], [[PRED_UDIV_IF6]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]] -; SINK-GATHER: pred.udiv.if7: +; SINK-GATHER: pred.udiv.if5: ; SINK-GATHER-NEXT: [[TMP26:%.*]] = extractelement <8 x i64> [[TMP0]], i32 3 ; SINK-GATHER-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP26]] ; SINK-GATHER-NEXT: [[TMP28:%.*]] = load i32, ptr [[TMP27]], align 4 ; SINK-GATHER-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], [[X]] ; SINK-GATHER-NEXT: [[TMP30:%.*]] = insertelement <8 x i32> [[TMP24]], i32 [[TMP29]], i32 3 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE8]] -; SINK-GATHER: pred.udiv.continue8: +; SINK-GATHER: pred.udiv.continue6: ; SINK-GATHER-NEXT: [[TMP32:%.*]] = phi <8 x i32> [ [[TMP24]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP30]], [[PRED_UDIV_IF7]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]] -; SINK-GATHER: pred.udiv.if9: +; SINK-GATHER: pred.udiv.if7: ; SINK-GATHER-NEXT: [[TMP34:%.*]] = extractelement <8 x i64> [[TMP0]], i32 4 ; SINK-GATHER-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP34]] ; SINK-GATHER-NEXT: [[TMP36:%.*]] = load i32, ptr [[TMP35]], align 4 ; SINK-GATHER-NEXT: [[TMP37:%.*]] = udiv i32 [[TMP36]], [[X]] ; SINK-GATHER-NEXT: [[TMP38:%.*]] = insertelement <8 x i32> [[TMP32]], i32 [[TMP37]], i32 4 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE10]] -; SINK-GATHER: pred.udiv.continue10: +; SINK-GATHER: pred.udiv.continue8: ; SINK-GATHER-NEXT: [[TMP40:%.*]] = phi <8 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP38]], [[PRED_UDIV_IF9]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]] -; SINK-GATHER: pred.udiv.if11: +; SINK-GATHER: pred.udiv.if9: ; SINK-GATHER-NEXT: [[TMP42:%.*]] = extractelement <8 x i64> [[TMP0]], i32 5 ; SINK-GATHER-NEXT: [[TMP43:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP42]] ; SINK-GATHER-NEXT: [[TMP44:%.*]] = load i32, ptr [[TMP43]], align 4 ; SINK-GATHER-NEXT: [[TMP45:%.*]] = udiv i32 [[TMP44]], [[X]] ; SINK-GATHER-NEXT: [[TMP46:%.*]] = insertelement <8 x i32> [[TMP40]], i32 [[TMP45]], i32 5 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE12]] -; SINK-GATHER: pred.udiv.continue12: +; SINK-GATHER: pred.udiv.continue10: ; SINK-GATHER-NEXT: [[TMP48:%.*]] = phi <8 x i32> [ [[TMP40]], [[PRED_UDIV_CONTINUE10]] ], [ [[TMP46]], [[PRED_UDIV_IF11]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]] -; SINK-GATHER: pred.udiv.if13: +; SINK-GATHER: pred.udiv.if11: ; SINK-GATHER-NEXT: [[TMP50:%.*]] = extractelement <8 x i64> [[TMP0]], i32 6 ; SINK-GATHER-NEXT: [[TMP51:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP50]] ; SINK-GATHER-NEXT: [[TMP52:%.*]] = load i32, ptr [[TMP51]], align 4 ; SINK-GATHER-NEXT: [[TMP53:%.*]] = udiv i32 [[TMP52]], [[X]] ; SINK-GATHER-NEXT: [[TMP54:%.*]] = insertelement <8 x i32> [[TMP48]], i32 [[TMP53]], i32 6 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE14]] -; SINK-GATHER: pred.udiv.continue14: +; SINK-GATHER: pred.udiv.continue12: ; SINK-GATHER-NEXT: [[TMP56:%.*]] = phi <8 x i32> [ [[TMP48]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP54]], [[PRED_UDIV_IF13]] ] ; SINK-GATHER-NEXT: br i1 [[TMP1]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16]] -; SINK-GATHER: pred.udiv.if15: +; SINK-GATHER: pred.udiv.if13: ; SINK-GATHER-NEXT: [[TMP58:%.*]] = extractelement <8 x i64> [[TMP0]], i32 7 ; SINK-GATHER-NEXT: [[TMP59:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP58]] ; SINK-GATHER-NEXT: [[TMP60:%.*]] = load i32, ptr [[TMP59]], align 4 ; SINK-GATHER-NEXT: [[TMP61:%.*]] = udiv i32 [[TMP60]], [[X]] ; SINK-GATHER-NEXT: [[TMP62:%.*]] = insertelement <8 x i32> [[TMP56]], i32 [[TMP61]], i32 7 ; SINK-GATHER-NEXT: br label [[PRED_UDIV_CONTINUE16]] -; SINK-GATHER: pred.udiv.continue16: +; SINK-GATHER: pred.udiv.continue14: ; SINK-GATHER-NEXT: [[TMP64:%.*]] = phi <8 x i32> [ [[TMP56]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP62]], [[PRED_UDIV_IF15]] ] -; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select <8 x i1> [[BROADCAST_SPLAT]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] +; SINK-GATHER-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP1]], <8 x i32> [[TMP64]], <8 x i32> [[BROADCAST_SPLAT16]] ; SINK-GATHER-NEXT: [[TMP66]] = add <8 x i32> [[VEC_PHI]], [[PREDPHI]] ; SINK-GATHER-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; SINK-GATHER-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll index 198a30af814ba..8a579734a06e1 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll @@ -137,7 +137,8 @@ define i16 @for_phi_removed(ptr %src) { ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP4]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 ; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104 ; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -202,7 +203,8 @@ define i16 @for_phi_removed(ptr %src) { ; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer ; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer -; SINK-AFTER-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer +; SINK-AFTER-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0 +; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP4]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer ; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108 ; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll index 5b7c27a0b5f1b..af648df9fc5c7 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags-interleave.ll @@ -66,13 +66,11 @@ define float @fmaxnum(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP18:%.*]] = freeze <4 x i1> [[TMP4]] ; CHECK-NEXT: [[TMP5:%.*]] = or <4 x i1> [[TMP15]], [[TMP18]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = or i1 [[TMP6]], [[TMP9]] ; CHECK-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP11:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] -; CHECK-NEXT: [[TMP12:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP7]] +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI1]], <4 x float> [[TMP8]] ; CHECK-NEXT: [[TMP14:%.*]] = select i1 [[TMP6]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[RDX_MINMAX_SELECT:%.*]] = call <4 x float> @llvm.maxnum.v4f32(<4 x float> [[TMP11]], <4 x float> [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[RDX_MINMAX_SELECT]]) diff --git a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll index 8b6a6e1e46101..242df1fcf7618 100644 --- a/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmax-without-fast-math-flags.ll @@ -209,12 +209,10 @@ define float @fmaxnum_1(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -276,12 +274,10 @@ define float @fmaxnum_2(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -345,12 +341,10 @@ define float @fmaxnum_induction_starts_at_10(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP6]], i64 [[INDEX]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 10, [[TMP9]] @@ -415,12 +409,10 @@ define float @fmaxnum_induction_starts_at_value(ptr %src, i64 %start, i64 %n) { ; CHECK-NEXT: [[TMP5:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP12:%.*]] = freeze <4 x i1> [[TMP5]] ; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP12]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP6]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP6]], [[TMP4]] ; CHECK-NEXT: br i1 [[TMP7]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] +; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[TMP6]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP3]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP6]], i64 [[INDEX]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP10:%.*]] = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> [[TMP8]]) ; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[START]], [[TMP9]] diff --git a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll index 211d3bf4c1f6a..7f65306bcbe52 100644 --- a/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll +++ b/llvm/test/Transforms/LoopVectorize/fmin-without-fast-math-flags.ll @@ -209,12 +209,10 @@ define float @fminnum_1(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] @@ -276,12 +274,10 @@ define float @fminnum_2(ptr %src, i64 %n) { ; CHECK-NEXT: [[TMP2:%.*]] = fcmp uno <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP10:%.*]] = freeze <4 x i1> [[TMP2]] ; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP10]]) -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP3]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = or i1 [[TMP3]], [[TMP5]] ; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP7:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] +; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP3]], <4 x float> [[VEC_PHI]], <4 x float> [[TMP4]] ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP3]], i64 [[IV]], i64 [[N_VEC]] ; CHECK-NEXT: [[TMP8:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP7]]) ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll index 7b9fcebb34049..c236b0af2a61d 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-non-void.ll @@ -716,15 +716,13 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = add nsw i32 [[TMP4]], [[X:%.*]] @@ -744,7 +742,7 @@ define i32 @predicated_udiv_scalarized_operand(ptr %a, i1 %c, i32 %x, i64 %n) { ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP16:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP15]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP16]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <2 x i32> [[TMP16]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP18]] = add <2 x i32> [[VEC_PHI]], [[PREDPHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] diff --git a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll index c164c4a46bd94..ae555a61f6032 100644 --- a/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll +++ b/llvm/test/Transforms/LoopVectorize/if-pred-stores.ll @@ -296,8 +296,6 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]] ; VEC-NEXT: [[IND_END:%.*]] = add i64 [[V_1]], [[N_VEC]] ; VEC-NEXT: [[TMP5:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[V_2:%.*]], i32 0 -; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[COND_2:%.*]], i64 0 -; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; VEC-NEXT: br label [[VECTOR_BODY:%.*]] ; VEC: vector.body: ; VEC-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ] @@ -305,7 +303,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: [[OFFSET_IDX:%.*]] = add i64 [[V_1]], [[INDEX]] ; VEC-NEXT: [[TMP7:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR:%.*]], i64 0, i64 [[OFFSET_IDX]] ; VEC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP7]], align 4 -; VEC-NEXT: br i1 [[COND_2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] +; VEC-NEXT: br i1 [[COND_2:%.*]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.if: ; VEC-NEXT: [[INDVARS_IV3:%.*]] = add i64 [[OFFSET_IDX]], 0 ; VEC-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [768 x i32], ptr [[PTR]], i64 0, i64 [[INDVARS_IV3]] @@ -318,7 +316,7 @@ define void @bug18724(i1 %cond, ptr %ptr, i1 %cond.2, i64 %v.1, i32 %v.2) { ; VEC-NEXT: br label [[PRED_STORE_CONTINUE2]] ; VEC: pred.store.continue2: ; VEC-NEXT: [[TMP15:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 1) -; VEC-NEXT: [[PREDPHI]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP15]], <2 x i32> [[VEC_PHI]] +; VEC-NEXT: [[PREDPHI]] = select i1 [[COND_2]], <2 x i32> [[TMP15]], <2 x i32> [[VEC_PHI]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 ; VEC-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; VEC-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/induction.ll b/llvm/test/Transforms/LoopVectorize/induction.ll index e33995327b856..66e4de5da7955 100644 --- a/llvm/test/Transforms/LoopVectorize/induction.ll +++ b/llvm/test/Transforms/LoopVectorize/induction.ll @@ -1959,15 +1959,13 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK: vector.ph: ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 2 ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[PRED_UDIV_CONTINUE2]] ] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; CHECK-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; CHECK: pred.udiv.if: ; CHECK-NEXT: [[TMP4:%.*]] = add i32 [[INDEX]], 0 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 @@ -1985,7 +1983,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; CHECK-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; CHECK: pred.udiv.continue2: ; CHECK-NEXT: [[TMP11:%.*]] = phi <2 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP11]], <2 x i32> [[WIDE_LOAD]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <2 x i32> [[TMP11]], <2 x i32> [[WIDE_LOAD]] ; CHECK-NEXT: [[TMP16]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -2024,8 +2022,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; IND: vector.ph: ; IND-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483646 -; IND-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; IND-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; IND-NEXT: br label [[VECTOR_BODY:%.*]] ; IND: vector.body: ; IND-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ] @@ -2033,7 +2029,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: [[TMP0:%.*]] = sext i32 [[INDEX]] to i64 ; IND-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] ; IND-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 -; IND-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; IND-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; IND: pred.udiv.if: ; IND-NEXT: [[TMP2:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0 ; IND-NEXT: [[TMP3:%.*]] = udiv i32 [[TMP2]], [[INDEX]] @@ -2049,8 +2045,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; IND-NEXT: [[TMP9:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP8]], i64 1 ; IND-NEXT: br label [[PRED_UDIV_CONTINUE2]] ; IND: pred.udiv.continue2: -; IND-NEXT: [[TMP10:%.*]] = phi <2 x i32> [ [[TMP5]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], [[PRED_UDIV_IF1]] ] -; IND-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP10]], <2 x i32> [[WIDE_LOAD]] +; IND-NEXT: [[PREDPHI:%.*]] = phi <2 x i32> [ [[WIDE_LOAD]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], [[PRED_UDIV_IF1]] ] ; IND-NEXT: [[TMP13]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; IND-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; IND-NEXT: [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] @@ -2090,8 +2085,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; UNROLL: vector.ph: ; UNROLL-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483644 -; UNROLL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; UNROLL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; UNROLL-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL: vector.body: ; UNROLL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ] @@ -2102,7 +2095,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 8 ; UNROLL-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP1]], align 4 ; UNROLL-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; UNROLL-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; UNROLL-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL: pred.udiv.if: ; UNROLL-NEXT: [[TMP3:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0 ; UNROLL-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP3]], [[INDEX]] @@ -2136,9 +2129,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NEXT: [[TMP20:%.*]] = insertelement <2 x i32> [[TMP16]], i32 [[TMP19]], i64 1 ; UNROLL-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL: pred.udiv.continue8: -; UNROLL-NEXT: [[TMP21:%.*]] = phi <2 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ] -; UNROLL-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP11]], <2 x i32> [[WIDE_LOAD]] -; UNROLL-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP21]], <2 x i32> [[WIDE_LOAD2]] +; UNROLL-NEXT: [[PREDPHI9:%.*]] = phi <2 x i32> [ [[WIDE_LOAD2]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ] +; UNROLL-NEXT: [[PREDPHI:%.*]] = phi <2 x i32> [ [[WIDE_LOAD]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP11]], [[PRED_UDIV_IF7]] ] ; UNROLL-NEXT: [[TMP22]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; UNROLL-NEXT: [[TMP23]] = add <2 x i32> [[PREDPHI9]], [[VEC_PHI1]] ; UNROLL-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2181,8 +2173,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC: vector.ph: ; UNROLL-NO-IC-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 4 ; UNROLL-NO-IC-NEXT: [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]] -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0 -; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; UNROLL-NO-IC-NEXT: br label [[VECTOR_BODY:%.*]] ; UNROLL-NO-IC: vector.body: ; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ] @@ -2192,7 +2182,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP0]], i32 2 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i32>, ptr [[TMP0]], align 4 ; UNROLL-NO-IC-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i32>, ptr [[TMP2]], align 4 -; UNROLL-NO-IC-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; UNROLL-NO-IC-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; UNROLL-NO-IC: pred.udiv.if: ; UNROLL-NO-IC-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 ; UNROLL-NO-IC-NEXT: [[TMP8:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0 @@ -2228,8 +2218,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; UNROLL-NO-IC-NEXT: br label [[PRED_UDIV_CONTINUE8]] ; UNROLL-NO-IC: pred.udiv.continue8: ; UNROLL-NO-IC-NEXT: [[TMP29:%.*]] = phi <2 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP28]], [[PRED_UDIV_IF7]] ] -; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] -; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select <2 x i1> [[BROADCAST_SPLAT]], <2 x i32> [[TMP29]], <2 x i32> [[WIDE_LOAD2]] +; UNROLL-NO-IC-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <2 x i32> [[TMP17]], <2 x i32> [[WIDE_LOAD]] +; UNROLL-NO-IC-NEXT: [[PREDPHI9:%.*]] = select i1 [[C]], <2 x i32> [[TMP29]], <2 x i32> [[WIDE_LOAD2]] ; UNROLL-NO-IC-NEXT: [[TMP32]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]] ; UNROLL-NO-IC-NEXT: [[TMP33]] = add <2 x i32> [[PREDPHI9]], [[VEC_PHI1]] ; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -2270,8 +2260,6 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] ; INTERLEAVE: vector.ph: ; INTERLEAVE-NEXT: [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483640 -; INTERLEAVE-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 -; INTERLEAVE-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; INTERLEAVE-NEXT: br label [[VECTOR_BODY:%.*]] ; INTERLEAVE: vector.body: ; INTERLEAVE-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE16:%.*]] ] @@ -2282,7 +2270,7 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i8, ptr [[TMP1]], i64 16 ; INTERLEAVE-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 ; INTERLEAVE-NEXT: [[WIDE_LOAD2:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4 -; INTERLEAVE-NEXT: br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] +; INTERLEAVE-NEXT: br i1 [[C:%.*]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]] ; INTERLEAVE: pred.udiv.if: ; INTERLEAVE-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = udiv i32 [[TMP3]], [[INDEX]] @@ -2352,9 +2340,8 @@ define i32 @scalarize_induction_variable_05(ptr %a, i32 %x, i1 %c, i32 %n) { ; INTERLEAVE-NEXT: [[TMP40:%.*]] = insertelement <4 x i32> [[TMP36]], i32 [[TMP39]], i64 3 ; INTERLEAVE-NEXT: br label [[PRED_UDIV_CONTINUE16]] ; INTERLEAVE: pred.udiv.continue16: -; INTERLEAVE-NEXT: [[TMP41:%.*]] = phi <4 x i32> [ [[TMP36]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP40]], [[PRED_UDIV_IF15]] ] -; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP21]], <4 x i32> [[WIDE_LOAD]] -; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP41]], <4 x i32> [[WIDE_LOAD2]] +; INTERLEAVE-NEXT: [[PREDPHI17:%.*]] = phi <4 x i32> [ [[WIDE_LOAD2]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP40]], [[PRED_UDIV_IF15]] ] +; INTERLEAVE-NEXT: [[PREDPHI:%.*]] = phi <4 x i32> [ [[WIDE_LOAD]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP21]], [[PRED_UDIV_IF15]] ] ; INTERLEAVE-NEXT: [[TMP42]] = add <4 x i32> [[PREDPHI]], [[VEC_PHI]] ; INTERLEAVE-NEXT: [[TMP43]] = add <4 x i32> [[PREDPHI17]], [[VEC_PHI1]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8 diff --git a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll index a1cb361d20bee..9921f2916ce00 100644 --- a/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll +++ b/llvm/test/Transforms/LoopVectorize/pr44488-predication.ll @@ -36,7 +36,8 @@ define i16 @test_true_and_false_branch_equal() { ; CHECK-NEXT: br label [[PRED_SREM_CONTINUE2]] ; CHECK: pred.srem.continue2: ; CHECK-NEXT: [[TMP10:%.*]] = phi <2 x i16> [ [[TMP6]], [[PRED_SREM_CONTINUE]] ], [ [[TMP9]], [[PRED_SREM_IF1]] ] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> [[TMP10]], <2 x i16> splat (i16 5786) +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[TMP13]], <2 x i16> [[TMP10]], <2 x i16> splat (i16 5786) ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i16> [[PREDPHI]], i32 1 ; CHECK-NEXT: store i16 [[TMP11]], ptr @v_39, align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 diff --git a/llvm/test/Transforms/LoopVectorize/pr45525.ll b/llvm/test/Transforms/LoopVectorize/pr45525.ll index f32de2d75cdef..b05cf6ef76675 100644 --- a/llvm/test/Transforms/LoopVectorize/pr45525.ll +++ b/llvm/test/Transforms/LoopVectorize/pr45525.ll @@ -9,14 +9,12 @@ define void @main(i1 %cond, ptr %arr) { ; CHECK-NEXT: [[BB_0:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[COND]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[VEC_IND]], splat (i32 3) -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> splat (i32 7), <4 x i32> [[TMP5]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[COND]], <4 x i32> splat (i32 7), <4 x i32> [[TMP5]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i32 [[INDEX]] ; CHECK-NEXT: store <4 x i32> [[PREDPHI]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 @@ -24,8 +22,9 @@ define void @main(i1 %cond, ptr %arr) { ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 32 ; CHECK-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: br [[BB_4:label %.*]] -; CHECK: [[SCALAR_PH:.*:]] +; CHECK-NEXT: br label %[[BB_4:.*]] +; CHECK: [[BB_4]]: +; CHECK-NEXT: ret void ; bb.0: br label %bb.1 diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index 7b3500933314a..ebd532aa5032c 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -23,9 +23,9 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: [[TMP0:%.*]] = add <2 x i32> [[VEC_PHI]], splat (i32 10) ; CHECK-NEXT: [[TMP1:%.*]] = add <2 x i32> [[TMP0]], splat (i32 20) ; CHECK-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP1]], [[TMP2]] -; CHECK-NEXT: [[PREDPHI5:%.*]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_IND]], <2 x i32> splat (i32 9) +; CHECK-NEXT: [[PREDPHI5:%.*]] = select i1 [[C_2]], <2 x i32> [[VEC_IND]], <2 x i32> splat (i32 9) ; CHECK-NEXT: [[PREDPHI6:%.*]] = select <2 x i1> [[TMP5]], <2 x i32> [[TMP0]], <2 x i32> [[TMP3]] -; CHECK-NEXT: [[PREDPHI7]] = select <2 x i1> [[BROADCAST_SPLAT4]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]] +; CHECK-NEXT: [[PREDPHI7]] = select i1 [[C_2]], <2 x i32> [[VEC_PHI]], <2 x i32> [[PREDPHI6]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 176 diff --git a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll index 13cc1b657d231..de764226ba8b5 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-small-size.ll @@ -86,11 +86,9 @@ define i8 @PR34687_no_undef(i1 %c, i32 %x, i32 %n) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT2]], <4 x i1> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) +; CHECK-NEXT: [[TMP0:%.*]] = select i1 [[C:%.*]], <4 x i32> [[BROADCAST_SPLAT2]], <4 x i32> splat (i32 1) ; CHECK-NEXT: [[TMP1:%.*]] = sdiv <4 x i32> splat (i32 99), [[TMP0]] -; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[C]], <4 x i32> [[TMP1]], <4 x i32> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] diff --git a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll index 71311db33cf1a..3b515a2acb1a7 100644 --- a/llvm/test/Transforms/LoopVectorize/uniform-blend.ll +++ b/llvm/test/Transforms/LoopVectorize/uniform-blend.ll @@ -96,13 +96,11 @@ define void @blend_chain_iv(i1 %c) { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: br label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[PREDPHI1:%.*]] = phi <4 x i64> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[PREDPHI2:%.*]] = select <4 x i1> [[BROADCAST_SPLAT]], <4 x i64> [[PREDPHI1]], <4 x i64> poison +; CHECK-NEXT: [[PREDPHI2:%.*]] = select i1 [[C]], <4 x i64> [[PREDPHI1]], <4 x i64> poison ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x i64> [[PREDPHI2]], i32 2