diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index d05c22e3aeb61..6451620186f65 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1425,32 +1425,33 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) { continue; } - // Skip recipes that aren't single scalars or don't have only their - // scalar results used. In the latter case, we would introduce extra - // broadcasts. + // Skip recipes that aren't single scalars or when conversion to + // single-scalar does not introduce additional broadcasts. That is, either + // only the scalars of the recipe are used, or at least one of the + // operands would require a broadcast. In the latter case, the + // single-scalar may need to be broadcasted, but another broadcast is + // removed. scalar results used. In the latter case, we would introduce + // extra broadcasts. if (!vputils::isSingleScalar(RepOrWidenR) || - !all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) { - if (auto *Store = dyn_cast(U)) { - // VPWidenStore doesn't have users, and stores are always - // profitable to widen: hence, permitting address and mask - // operands, and single-scalar stored values is an important leaf - // condition. The assert must hold as we checked the RepOrWidenR - // operand against vputils::isSingleScalar. - assert(RepOrWidenR != Store->getStoredValue() || - vputils::isSingleScalar(Store->getStoredValue())); - return true; - } - - if (auto *VPI = dyn_cast(U)) { - unsigned Opcode = VPI->getOpcode(); - if (Opcode == VPInstruction::ExtractLastElement || - Opcode == VPInstruction::ExtractLastLanePerPart || - Opcode == VPInstruction::ExtractPenultimateElement) - return true; - } - - return U->usesScalars(RepOrWidenR); - })) + (!all_of(RepOrWidenR->users(), + [RepOrWidenR](const VPUser *U) { + if (auto *VPI = dyn_cast(U)) { + unsigned Opcode = VPI->getOpcode(); + if (Opcode == VPInstruction::ExtractLastElement || + Opcode == VPInstruction::ExtractLastLanePerPart || + Opcode == VPInstruction::ExtractPenultimateElement) + return true; + } + + return U->usesScalars(RepOrWidenR); + }) && + none_of(RepOrWidenR->operands(), [RepOrWidenR](VPValue *Op) { + return Op->getSingleUser() == RepOrWidenR && + ((Op->isLiveIn() && + !isa(Op->getLiveInIRValue())) || + (isa(Op) && + cast(Op)->isSingleScalar())); + }))) continue; auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(), diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll index 2f7e3568d5654..042341f27b873 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll @@ -531,10 +531,10 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]] ; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2 -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0 +; DEFAULT-NEXT: [[TMP2:%.*]] = or i16 [[TMP1]], 1 +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP2]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1) -; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double> +; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[BROADCAST_SPLAT]] to <8 x double> ; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[NEXT_GEP]], align 8 ; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 @@ -563,10 +563,10 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 { ; PRED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8 ; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]] ; PRED-NEXT: [[TMP12:%.*]] = load i16, ptr [[SRC]], align 2 -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[TMP12]], i64 0 +; PRED-NEXT: [[TMP11:%.*]] = or i16 [[TMP12]], 1 +; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i16 [[TMP11]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; PRED-NEXT: [[TMP13:%.*]] = or [[BROADCAST_SPLAT]], splat (i16 1) -; PRED-NEXT: [[TMP14:%.*]] = uitofp [[TMP13]] to +; PRED-NEXT: [[TMP14:%.*]] = uitofp [[BROADCAST_SPLAT]] to ; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0( [[TMP14]], ptr align 8 [[NEXT_GEP]], [[ACTIVE_LANE_MASK]]) ; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]] ; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]]) @@ -672,10 +672,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP15]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer ; DEFAULT-NEXT: [[TMP16:%.*]] = load float, ptr [[SRC_2]], align 4 -; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP16]], i64 0 +; DEFAULT-NEXT: [[TMP17:%.*]] = fmul float [[TMP16]], 0.000000e+00 +; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP17]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer -; DEFAULT-NEXT: [[TMP17:%.*]] = fmul <8 x float> [[BROADCAST_SPLAT]], zeroinitializer -; DEFAULT-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[TMP17]]) +; DEFAULT-NEXT: [[TMP18:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[BROADCAST_SPLAT]]) ; DEFAULT-NEXT: [[TMP19:%.*]] = load float, ptr [[SRC_3]], align 4 ; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x float> poison, float [[TMP19]], i64 0 ; DEFAULT-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT10]], <8 x float> poison, <8 x i32> zeroinitializer @@ -857,10 +857,10 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias ; PRED-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <8 x float> poison, float [[TMP18]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT9:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT8]], <8 x float> poison, <8 x i32> zeroinitializer ; PRED-NEXT: [[TMP19:%.*]] = load float, ptr [[SRC_2]], align 4 -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP19]], i64 0 +; PRED-NEXT: [[TMP20:%.*]] = fmul float [[TMP19]], 0.000000e+00 +; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x float> poison, float [[TMP20]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT]], <8 x float> poison, <8 x i32> zeroinitializer -; PRED-NEXT: [[TMP20:%.*]] = fmul <8 x float> [[BROADCAST_SPLAT]], zeroinitializer -; PRED-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[TMP20]]) +; PRED-NEXT: [[TMP21:%.*]] = call <8 x float> @llvm.fmuladd.v8f32(<8 x float> [[BROADCAST_SPLAT9]], <8 x float> zeroinitializer, <8 x float> [[BROADCAST_SPLAT]]) ; PRED-NEXT: [[TMP22:%.*]] = load float, ptr [[SRC_3]], align 4 ; PRED-NEXT: [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <8 x float> poison, float [[TMP22]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT11:%.*]] = shufflevector <8 x float> [[BROADCAST_SPLATINSERT10]], <8 x float> poison, <8 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll index 44ae1757ce6e6..f2c0ca30a6c18 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/reduction-recurrence-costs-sve.ll @@ -59,8 +59,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: [[TMP4:%.*]] = mul nuw i64 [[TMP3]], 8 ; VSCALEFORTUNING2-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], [[TMP4]] ; VSCALEFORTUNING2-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] -; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[Z]], i64 0 -; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[X]], i64 0 ; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; VSCALEFORTUNING2-NEXT: [[TMP7:%.*]] = add i64 [[Y]], 1 @@ -68,7 +66,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; VSCALEFORTUNING2-NEXT: [[TMP9:%.*]] = lshr [[BROADCAST_SPLAT]], splat (i32 1) ; VSCALEFORTUNING2-NEXT: [[TMP10:%.*]] = shl [[BROADCAST_SPLAT]], splat (i32 1) ; VSCALEFORTUNING2-NEXT: [[TMP11:%.*]] = or [[TMP9]], [[TMP10]] -; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = or [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT]] +; VSCALEFORTUNING2-NEXT: [[TMP16:%.*]] = or i32 [[Z]], [[X]] +; VSCALEFORTUNING2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP16]], i64 0 +; VSCALEFORTUNING2-NEXT: [[TMP12:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; VSCALEFORTUNING2-NEXT: [[TMP13:%.*]] = and [[TMP12]], splat (i32 1) ; VSCALEFORTUNING2-NEXT: [[TMP14:%.*]] = xor [[TMP13]], splat (i32 1) ; VSCALEFORTUNING2-NEXT: [[TMP15:%.*]] = zext [[TMP14]] to @@ -180,8 +180,6 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED: [[VECTOR_PH]]: ; PRED-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() ; PRED-NEXT: [[TMP2:%.*]] = mul nuw i64 [[TMP1]], 4 -; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[Z]], i64 0 -; PRED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; PRED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[X]], i64 0 ; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer ; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() @@ -195,7 +193,9 @@ define i32 @chained_recurrences(i32 %x, i64 %y, ptr %src.1, i32 %z, ptr %src.2) ; PRED-NEXT: [[TMP13:%.*]] = lshr [[BROADCAST_SPLAT]], splat (i32 1) ; PRED-NEXT: [[TMP14:%.*]] = shl [[BROADCAST_SPLAT]], splat (i32 1) ; PRED-NEXT: [[TMP15:%.*]] = or [[TMP13]], [[TMP14]] -; PRED-NEXT: [[TMP16:%.*]] = or [[BROADCAST_SPLAT2]], [[BROADCAST_SPLAT]] +; PRED-NEXT: [[TMP20:%.*]] = or i32 [[Z]], [[X]] +; PRED-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement poison, i32 [[TMP20]], i64 0 +; PRED-NEXT: [[TMP16:%.*]] = shufflevector [[BROADCAST_SPLATINSERT2]], poison, zeroinitializer ; PRED-NEXT: [[TMP17:%.*]] = and [[TMP16]], splat (i32 1) ; PRED-NEXT: [[TMP18:%.*]] = xor [[TMP17]], splat (i32 1) ; PRED-NEXT: [[TMP19:%.*]] = zext [[TMP18]] to diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll index 8d4d282a5236d..0723f16677090 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/pr88802.ll @@ -9,10 +9,10 @@ define void @test(ptr %p, i64 %a, i8 %b) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i8 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[A]], 48 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP5:%.*]] = shl [[BROADCAST_SPLAT2]], splat (i64 48) -; CHECK-NEXT: [[TMP6:%.*]] = ashr [[TMP5]], splat (i64 52) +; CHECK-NEXT: [[TMP6:%.*]] = ashr [[BROADCAST_SPLAT2]], splat (i64 52) ; CHECK-NEXT: [[TMP7:%.*]] = trunc [[TMP6]] to ; CHECK-NEXT: [[TMP8:%.*]] = zext [[BROADCAST_SPLAT]] to ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement poison, ptr [[P]], i64 0 diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll index 6ec010cdcc248..651e2ad5e74da 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-conditional-branches.ll @@ -686,16 +686,256 @@ exit: ; Test for https://github.com/llvm/llvm-project/issues/129236. define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) { ; CHECK-LABEL: @cost_ashr_with_op_known_invariant_via_scev( -; CHECK-NEXT: entry: +; CHECK-NEXT: iter.check: ; CHECK-NEXT: [[CMP_I:%.*]] = icmp eq i16 0, 0 ; CHECK-NEXT: [[CONV_I:%.*]] = sext i16 0 to i32 ; CHECK-NEXT: [[CONV5_I:%.*]] = sext i8 [[A:%.*]] to i32 +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <32 x i1> poison, i1 [[CMP_I]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <32 x i1> [[BROADCAST_SPLATINSERT]], <32 x i1> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP60:%.*]] = xor <32 x i1> [[BROADCAST_SPLAT]], splat (i1 true) ; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE62:%.*]] ] +; CHECK-NEXT: [[TMP61:%.*]] = extractelement <32 x i1> [[TMP60]], i32 0 +; CHECK-NEXT: br i1 [[TMP61]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]] +; CHECK: pred.urem.if: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE]] +; CHECK: pred.urem.continue: +; CHECK-NEXT: [[TMP2:%.*]] = extractelement <32 x i1> [[TMP60]], i32 1 +; CHECK-NEXT: br i1 [[TMP2]], label [[PRED_UREM_IF1:%.*]], label [[PRED_UREM_CONTINUE2:%.*]] +; CHECK: pred.urem.if1: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE2]] +; CHECK: pred.urem.continue2: +; CHECK-NEXT: [[TMP3:%.*]] = extractelement <32 x i1> [[TMP60]], i32 2 +; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4:%.*]] +; CHECK: pred.urem.if3: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE4]] +; CHECK: pred.urem.continue4: +; CHECK-NEXT: [[TMP4:%.*]] = extractelement <32 x i1> [[TMP60]], i32 3 +; CHECK-NEXT: br i1 [[TMP4]], label [[PRED_UREM_IF5:%.*]], label [[PRED_UREM_CONTINUE6:%.*]] +; CHECK: pred.urem.if5: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE6]] +; CHECK: pred.urem.continue6: +; CHECK-NEXT: [[TMP5:%.*]] = extractelement <32 x i1> [[TMP60]], i32 4 +; CHECK-NEXT: br i1 [[TMP5]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]] +; CHECK: pred.urem.if7: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE8]] +; CHECK: pred.urem.continue8: +; CHECK-NEXT: [[TMP6:%.*]] = extractelement <32 x i1> [[TMP60]], i32 5 +; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]] +; CHECK: pred.urem.if9: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE10]] +; CHECK: pred.urem.continue10: +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <32 x i1> [[TMP60]], i32 6 +; CHECK-NEXT: br i1 [[TMP7]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12:%.*]] +; CHECK: pred.urem.if11: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE12]] +; CHECK: pred.urem.continue12: +; CHECK-NEXT: [[TMP8:%.*]] = extractelement <32 x i1> [[TMP60]], i32 7 +; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_UREM_IF13:%.*]], label [[PRED_UREM_CONTINUE14:%.*]] +; CHECK: pred.urem.if13: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE14]] +; CHECK: pred.urem.continue14: +; CHECK-NEXT: [[TMP9:%.*]] = extractelement <32 x i1> [[TMP60]], i32 8 +; CHECK-NEXT: br i1 [[TMP9]], label [[PRED_UREM_IF15:%.*]], label [[PRED_UREM_CONTINUE16:%.*]] +; CHECK: pred.urem.if15: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE16]] +; CHECK: pred.urem.continue16: +; CHECK-NEXT: [[TMP10:%.*]] = extractelement <32 x i1> [[TMP60]], i32 9 +; CHECK-NEXT: br i1 [[TMP10]], label [[PRED_UREM_IF17:%.*]], label [[PRED_UREM_CONTINUE18:%.*]] +; CHECK: pred.urem.if17: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE18]] +; CHECK: pred.urem.continue18: +; CHECK-NEXT: [[TMP11:%.*]] = extractelement <32 x i1> [[TMP60]], i32 10 +; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_UREM_IF19:%.*]], label [[PRED_UREM_CONTINUE20:%.*]] +; CHECK: pred.urem.if19: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE20]] +; CHECK: pred.urem.continue20: +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <32 x i1> [[TMP60]], i32 11 +; CHECK-NEXT: br i1 [[TMP12]], label [[PRED_UREM_IF21:%.*]], label [[PRED_UREM_CONTINUE22:%.*]] +; CHECK: pred.urem.if21: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE22]] +; CHECK: pred.urem.continue22: +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <32 x i1> [[TMP60]], i32 12 +; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_UREM_IF23:%.*]], label [[PRED_UREM_CONTINUE24:%.*]] +; CHECK: pred.urem.if23: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE24]] +; CHECK: pred.urem.continue24: +; CHECK-NEXT: [[TMP14:%.*]] = extractelement <32 x i1> [[TMP60]], i32 13 +; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_UREM_IF25:%.*]], label [[PRED_UREM_CONTINUE26:%.*]] +; CHECK: pred.urem.if25: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE26]] +; CHECK: pred.urem.continue26: +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <32 x i1> [[TMP60]], i32 14 +; CHECK-NEXT: br i1 [[TMP15]], label [[PRED_UREM_IF27:%.*]], label [[PRED_UREM_CONTINUE28:%.*]] +; CHECK: pred.urem.if27: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE28]] +; CHECK: pred.urem.continue28: +; CHECK-NEXT: [[TMP16:%.*]] = extractelement <32 x i1> [[TMP60]], i32 15 +; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_UREM_IF29:%.*]], label [[PRED_UREM_CONTINUE30:%.*]] +; CHECK: pred.urem.if29: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE30]] +; CHECK: pred.urem.continue30: +; CHECK-NEXT: [[TMP17:%.*]] = extractelement <32 x i1> [[TMP60]], i32 16 +; CHECK-NEXT: br i1 [[TMP17]], label [[PRED_UREM_IF31:%.*]], label [[PRED_UREM_CONTINUE32:%.*]] +; CHECK: pred.urem.if31: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE32]] +; CHECK: pred.urem.continue32: +; CHECK-NEXT: [[TMP18:%.*]] = extractelement <32 x i1> [[TMP60]], i32 17 +; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_UREM_IF33:%.*]], label [[PRED_UREM_CONTINUE34:%.*]] +; CHECK: pred.urem.if33: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE34]] +; CHECK: pred.urem.continue34: +; CHECK-NEXT: [[TMP19:%.*]] = extractelement <32 x i1> [[TMP60]], i32 18 +; CHECK-NEXT: br i1 [[TMP19]], label [[PRED_UREM_IF35:%.*]], label [[PRED_UREM_CONTINUE36:%.*]] +; CHECK: pred.urem.if35: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE36]] +; CHECK: pred.urem.continue36: +; CHECK-NEXT: [[TMP20:%.*]] = extractelement <32 x i1> [[TMP60]], i32 19 +; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_UREM_IF37:%.*]], label [[PRED_UREM_CONTINUE38:%.*]] +; CHECK: pred.urem.if37: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE38]] +; CHECK: pred.urem.continue38: +; CHECK-NEXT: [[TMP21:%.*]] = extractelement <32 x i1> [[TMP60]], i32 20 +; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_UREM_IF39:%.*]], label [[PRED_UREM_CONTINUE40:%.*]] +; CHECK: pred.urem.if39: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE40]] +; CHECK: pred.urem.continue40: +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <32 x i1> [[TMP60]], i32 21 +; CHECK-NEXT: br i1 [[TMP22]], label [[PRED_UREM_IF41:%.*]], label [[PRED_UREM_CONTINUE42:%.*]] +; CHECK: pred.urem.if41: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE42]] +; CHECK: pred.urem.continue42: +; CHECK-NEXT: [[TMP23:%.*]] = extractelement <32 x i1> [[TMP60]], i32 22 +; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_UREM_IF43:%.*]], label [[PRED_UREM_CONTINUE44:%.*]] +; CHECK: pred.urem.if43: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE44]] +; CHECK: pred.urem.continue44: +; CHECK-NEXT: [[TMP24:%.*]] = extractelement <32 x i1> [[TMP60]], i32 23 +; CHECK-NEXT: br i1 [[TMP24]], label [[PRED_UREM_IF45:%.*]], label [[PRED_UREM_CONTINUE46:%.*]] +; CHECK: pred.urem.if45: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE46]] +; CHECK: pred.urem.continue46: +; CHECK-NEXT: [[TMP25:%.*]] = extractelement <32 x i1> [[TMP60]], i32 24 +; CHECK-NEXT: br i1 [[TMP25]], label [[PRED_UREM_IF47:%.*]], label [[PRED_UREM_CONTINUE48:%.*]] +; CHECK: pred.urem.if47: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE48]] +; CHECK: pred.urem.continue48: +; CHECK-NEXT: [[TMP26:%.*]] = extractelement <32 x i1> [[TMP60]], i32 25 +; CHECK-NEXT: br i1 [[TMP26]], label [[PRED_UREM_IF49:%.*]], label [[PRED_UREM_CONTINUE50:%.*]] +; CHECK: pred.urem.if49: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE50]] +; CHECK: pred.urem.continue50: +; CHECK-NEXT: [[TMP27:%.*]] = extractelement <32 x i1> [[TMP60]], i32 26 +; CHECK-NEXT: br i1 [[TMP27]], label [[PRED_UREM_IF51:%.*]], label [[PRED_UREM_CONTINUE52:%.*]] +; CHECK: pred.urem.if51: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE52]] +; CHECK: pred.urem.continue52: +; CHECK-NEXT: [[TMP28:%.*]] = extractelement <32 x i1> [[TMP60]], i32 27 +; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_UREM_IF53:%.*]], label [[PRED_UREM_CONTINUE54:%.*]] +; CHECK: pred.urem.if53: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE54]] +; CHECK: pred.urem.continue54: +; CHECK-NEXT: [[TMP29:%.*]] = extractelement <32 x i1> [[TMP60]], i32 28 +; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_UREM_IF55:%.*]], label [[PRED_UREM_CONTINUE56:%.*]] +; CHECK: pred.urem.if55: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE56]] +; CHECK: pred.urem.continue56: +; CHECK-NEXT: [[TMP30:%.*]] = extractelement <32 x i1> [[TMP60]], i32 29 +; CHECK-NEXT: br i1 [[TMP30]], label [[PRED_UREM_IF57:%.*]], label [[PRED_UREM_CONTINUE58:%.*]] +; CHECK: pred.urem.if57: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE58]] +; CHECK: pred.urem.continue58: +; CHECK-NEXT: [[TMP31:%.*]] = extractelement <32 x i1> [[TMP60]], i32 30 +; CHECK-NEXT: br i1 [[TMP31]], label [[PRED_UREM_IF59:%.*]], label [[PRED_UREM_CONTINUE60:%.*]] +; CHECK: pred.urem.if59: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE60]] +; CHECK: pred.urem.continue60: +; CHECK-NEXT: [[TMP32:%.*]] = extractelement <32 x i1> [[TMP60]], i32 31 +; CHECK-NEXT: br i1 [[TMP32]], label [[PRED_UREM_IF61:%.*]], label [[PRED_UREM_CONTINUE62]] +; CHECK: pred.urem.if61: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE62]] +; CHECK: pred.urem.continue62: +; CHECK-NEXT: [[TMP33:%.*]] = select <32 x i1> [[TMP60]], <32 x i1> poison, <32 x i1> zeroinitializer +; CHECK-NEXT: [[TMP34:%.*]] = or <32 x i1> [[TMP33]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[PREDPHI:%.*]] = select i1 [[CMP_I]], <32 x i32> zeroinitializer, <32 x i32> poison +; CHECK-NEXT: [[TMP35:%.*]] = extractelement <32 x i32> [[PREDPHI]], i32 0 +; CHECK-NEXT: [[TMP36:%.*]] = ashr i32 [[CONV5_I]], [[TMP35]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT63:%.*]] = insertelement <32 x i32> poison, i32 [[TMP36]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT64:%.*]] = shufflevector <32 x i32> [[BROADCAST_SPLATINSERT63]], <32 x i32> poison, <32 x i32> zeroinitializer +; CHECK-NEXT: [[TMP37:%.*]] = icmp eq <32 x i32> [[BROADCAST_SPLAT64]], zeroinitializer +; CHECK-NEXT: [[TMP38:%.*]] = shl <32 x i32> [[PREDPHI]], splat (i32 24) +; CHECK-NEXT: [[TMP39:%.*]] = ashr exact <32 x i32> [[TMP38]], splat (i32 24) +; CHECK-NEXT: [[TMP40:%.*]] = extractelement <32 x i1> [[TMP37]], i32 0 +; CHECK-NEXT: [[TMP41:%.*]] = select i1 [[TMP40]], <32 x i32> [[TMP39]], <32 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI65:%.*]] = select <32 x i1> [[TMP34]], <32 x i32> [[TMP41]], <32 x i32> zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 +; CHECK-NEXT: [[TMP42:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96 +; CHECK-NEXT: br i1 [[TMP42]], label [[MIDDLE_BLOCK:%.*]], label [[LOOP_HEADER]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP43:%.*]] = extractelement <32 x i32> [[PREDPHI65]], i32 31 +; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11:![0-9]+]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 96, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT66:%.*]] = insertelement <4 x i1> poison, i1 [[CMP_I]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT67:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT66]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP44:%.*]] = xor <4 x i1> [[BROADCAST_SPLAT67]], splat (i1 true) +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX68:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT81:%.*]], [[PRED_UREM_CONTINUE76:%.*]] ] +; CHECK-NEXT: [[TMP45:%.*]] = extractelement <4 x i1> [[TMP44]], i32 0 +; CHECK-NEXT: br i1 [[TMP45]], label [[PRED_UREM_IF69:%.*]], label [[PRED_UREM_CONTINUE70:%.*]] +; CHECK: pred.urem.if69: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE70]] +; CHECK: pred.urem.continue70: +; CHECK-NEXT: [[TMP46:%.*]] = extractelement <4 x i1> [[TMP44]], i32 1 +; CHECK-NEXT: br i1 [[TMP46]], label [[PRED_UREM_IF71:%.*]], label [[PRED_UREM_CONTINUE72:%.*]] +; CHECK: pred.urem.if71: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE72]] +; CHECK: pred.urem.continue72: +; CHECK-NEXT: [[TMP47:%.*]] = extractelement <4 x i1> [[TMP44]], i32 2 +; CHECK-NEXT: br i1 [[TMP47]], label [[PRED_UREM_IF73:%.*]], label [[PRED_UREM_CONTINUE74:%.*]] +; CHECK: pred.urem.if73: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE74]] +; CHECK: pred.urem.continue74: +; CHECK-NEXT: [[TMP48:%.*]] = extractelement <4 x i1> [[TMP44]], i32 3 +; CHECK-NEXT: br i1 [[TMP48]], label [[PRED_UREM_IF75:%.*]], label [[PRED_UREM_CONTINUE76]] +; CHECK: pred.urem.if75: +; CHECK-NEXT: br label [[PRED_UREM_CONTINUE76]] +; CHECK: pred.urem.continue76: +; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP44]], <4 x i1> poison, <4 x i1> zeroinitializer +; CHECK-NEXT: [[TMP50:%.*]] = or <4 x i1> [[TMP49]], [[BROADCAST_SPLAT67]] +; CHECK-NEXT: [[PREDPHI77:%.*]] = select i1 [[CMP_I]], <4 x i32> zeroinitializer, <4 x i32> poison +; CHECK-NEXT: [[TMP51:%.*]] = extractelement <4 x i32> [[PREDPHI77]], i32 0 +; CHECK-NEXT: [[TMP52:%.*]] = ashr i32 [[CONV5_I]], [[TMP51]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT78:%.*]] = insertelement <4 x i32> poison, i32 [[TMP52]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT79:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT78]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[TMP53:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT79]], zeroinitializer +; CHECK-NEXT: [[TMP54:%.*]] = shl <4 x i32> [[PREDPHI77]], splat (i32 24) +; CHECK-NEXT: [[TMP55:%.*]] = ashr exact <4 x i32> [[TMP54]], splat (i32 24) +; CHECK-NEXT: [[TMP56:%.*]] = extractelement <4 x i1> [[TMP53]], i32 0 +; CHECK-NEXT: [[TMP57:%.*]] = select i1 [[TMP56]], <4 x i32> [[TMP55]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[PREDPHI80:%.*]] = select <4 x i1> [[TMP50]], <4 x i32> [[TMP57]], <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT81]] = add nuw i32 [[INDEX68]], 4 +; CHECK-NEXT: [[TMP58:%.*]] = icmp eq i32 [[INDEX_NEXT81]], 100 +; CHECK-NEXT: br i1 [[TMP58]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP59:%.*]] = extractelement <4 x i32> [[PREDPHI80]], i32 3 +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ 4, [[VEC_EPILOG_ITER_CHECK]] ], [ 100, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: br label [[LOOP_HEADER1:%.*]] ; CHECK: loop.header: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 100, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: br i1 [[CMP_I]], label [[THEN:%.*]], label [[ELSE:%.*]] ; CHECK: then: -; CHECK-NEXT: [[P_1:%.*]] = phi i32 [ [[REM_I:%.*]], [[ELSE]] ], [ 0, [[LOOP_HEADER]] ] +; CHECK-NEXT: [[P_1:%.*]] = phi i32 [ [[REM_I:%.*]], [[ELSE]] ], [ 0, [[LOOP_HEADER1]] ] ; CHECK-NEXT: [[SHR_I:%.*]] = ashr i32 [[CONV5_I]], [[P_1]] ; CHECK-NEXT: [[TOBOOL6_NOT_I:%.*]] = icmp eq i32 [[SHR_I]], 0 ; CHECK-NEXT: [[SEXT_I:%.*]] = shl i32 [[P_1]], 24 @@ -710,9 +950,9 @@ define i32 @cost_ashr_with_op_known_invariant_via_scev(i8 %a) { ; CHECK-NEXT: [[P_2:%.*]] = phi i32 [ 0, [[ELSE]] ], [ [[TMP1]], [[THEN]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i8 [[IV]], -1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i8 [[IV_NEXT]], 0 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_HEADER]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER1]], !llvm.loop [[LOOP13:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: [[P_2_LCSSA:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ] +; CHECK-NEXT: [[P_2_LCSSA:%.*]] = phi i32 [ [[P_2]], [[LOOP_LATCH]] ], [ [[TMP43]], [[MIDDLE_BLOCK]] ], [ [[TMP59]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[P_2_LCSSA]] ; entry: @@ -838,7 +1078,7 @@ define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 { ; CHECK-NEXT: store <8 x i32> [[PREDPHI]], ptr [[TMP42]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 ; CHECK-NEXT: [[TMP43:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 -; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP43]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br label [[SCALAR_PH:%.*]] ; CHECK: scalar.ph: @@ -858,7 +1098,7 @@ define void @sdiv_by_zero(ptr noalias %src, ptr noalias %dst, i32 %d) #2 { ; CHECK-NEXT: store i32 [[MERGE]], ptr [[GEP_DST]], align 4 ; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp ult i64 [[IV]], 16 -; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !llvm.loop [[LOOP11:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[LOOP_HEADER]], label [[EXIT:%.*]], !llvm.loop [[LOOP15:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: ret void ; @@ -1197,12 +1437,12 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 { ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32) ; CHECK-NEXT: [[TMP163:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992 -; CHECK-NEXT: br i1 [[TMP163]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP163]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: [[TMP164:%.*]] = extractelement <32 x i64> [[PREDPHI]], i32 31 ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF13:![0-9]+]] +; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF17:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 992, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BROADCAST_SPLATINSERT63:%.*]] = insertelement <8 x i1> poison, i1 [[C]], i64 0 @@ -1293,7 +1533,7 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 { ; CHECK-NEXT: [[INDEX_NEXT86]] = add nuw i32 [[INDEX67]], 8 ; CHECK-NEXT: [[VEC_IND_NEXT87]] = add <8 x i32> [[VEC_IND68]], splat (i32 8) ; CHECK-NEXT: [[TMP208:%.*]] = icmp eq i32 [[INDEX_NEXT86]], 1000 -; CHECK-NEXT: br i1 [[TMP208]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] +; CHECK-NEXT: br i1 [[TMP208]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] ; CHECK: vec.epilog.middle.block: ; CHECK-NEXT: [[TMP209:%.*]] = extractelement <8 x i64> [[PREDPHI85]], i32 7 ; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] @@ -1312,7 +1552,7 @@ define i64 @test_predicated_udiv(i32 %d, i1 %c) #2 { ; CHECK-NEXT: [[MERGE:%.*]] = phi i64 [ [[ZEXT]], [[THEN]] ], [ 0, [[LOOP_HEADER]] ] ; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 ; CHECK-NEXT: [[EC:%.*]] = icmp eq i32 [[IV]], 1000 -; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP15:![0-9]+]] +; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]] ; CHECK: exit: ; CHECK-NEXT: [[MERGE_LCSSA:%.*]] = phi i64 [ [[MERGE]], [[LOOP_LATCH]] ], [ [[TMP164]], [[MIDDLE_BLOCK]] ], [ [[TMP209]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i64 [[MERGE_LCSSA]] diff --git a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll index 725fa49c0930c..2bffbdf4bf039 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/cost-model.ll @@ -334,7 +334,7 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 { ; CHECK-NEXT: [[TMP1:%.*]] = freeze i64 [[TMP0]] ; CHECK-NEXT: [[UMIN7:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP1]], i64 [[A:%.*]]) ; CHECK-NEXT: [[TMP2:%.*]] = add nuw i64 [[UMIN7]], 1 -; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 28 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP2]], 14 ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]] ; CHECK: vector.scevcheck: ; CHECK-NEXT: [[UMAX:%.*]] = call i64 @llvm.umax.i64(i64 [[B]], i64 1) @@ -369,13 +369,13 @@ define void @multi_exit(ptr %dst, ptr %src.1, ptr %src.2, i64 %A, i64 %B) #0 { ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP13:%.*]] = load i64, ptr [[SRC_1]], align 8, !alias.scope [[META6:![0-9]+]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP13]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = load i64, ptr [[SRC_2]], align 8, !alias.scope [[META9:![0-9]+]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <2 x i64> poison, i64 [[TMP14]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT10:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT9]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP15:%.*]] = icmp eq <2 x i64> [[BROADCAST_SPLAT]], zeroinitializer -; CHECK-NEXT: [[TMP16:%.*]] = icmp ne <2 x i64> [[BROADCAST_SPLAT10]], zeroinitializer +; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[TMP13]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT8:%.*]] = insertelement <2 x i1> poison, i1 [[TMP21]], i64 0 +; CHECK-NEXT: [[TMP15:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT8]], <2 x i1> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP22:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[TMP22]], i64 0 +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = and <2 x i1> [[TMP16]], [[TMP15]] ; CHECK-NEXT: [[TMP18:%.*]] = zext <2 x i1> [[TMP17]] to <2 x i8> ; CHECK-NEXT: [[TMP19:%.*]] = extractelement <2 x i8> [[TMP18]], i32 1 @@ -725,10 +725,10 @@ define i64 @cost_loop_invariant_recipes(i1 %x, i64 %y) { ; CHECK: vector.ph: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[Y:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[X:%.*]], i64 0 +; CHECK-NEXT: [[X:%.*]] = xor i1 [[X1:%.*]], true +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[X]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], splat (i1 true) -; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[TMP0]] to <2 x i64> +; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> [[BROADCAST_SPLAT]] to <2 x i64> ; CHECK-NEXT: [[TMP2:%.*]] = shl <2 x i64> [[BROADCAST_SPLAT2]], [[TMP1]] ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll index 14a83176f02ff..83d8fbbea2a45 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/replicating-load-store-costs.ll @@ -483,16 +483,16 @@ define double @test_load_used_by_other_load_scev(ptr %ptr.a, ptr %ptr.b, ptr %pt ; I64-NEXT: [[TMP8:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP6]] ; I64-NEXT: [[TMP9:%.*]] = getelementptr double, ptr [[PTR_B]], i64 [[TMP7]] ; I64-NEXT: [[TMP10:%.*]] = load double, ptr [[PTR_A]], align 8 -; I64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP10]], i64 0 +; I64-NEXT: [[TMP11:%.*]] = fadd double [[TMP10]], 0.000000e+00 +; I64-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP11]], i64 0 ; I64-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer -; I64-NEXT: [[TMP11:%.*]] = fadd <2 x double> [[BROADCAST_SPLAT]], zeroinitializer ; I64-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[TMP2]], i64 8 ; I64-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[TMP3]], i64 8 ; I64-NEXT: [[TMP14:%.*]] = load double, ptr [[TMP12]], align 8 ; I64-NEXT: [[TMP15:%.*]] = load double, ptr [[TMP13]], align 8 ; I64-NEXT: [[TMP16:%.*]] = insertelement <2 x double> poison, double [[TMP14]], i32 0 ; I64-NEXT: [[TMP17:%.*]] = insertelement <2 x double> [[TMP16]], double [[TMP15]], i32 1 -; I64-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[TMP11]], zeroinitializer +; I64-NEXT: [[TMP18:%.*]] = fmul <2 x double> [[BROADCAST_SPLAT]], zeroinitializer ; I64-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x double> poison, double [[ACCUM]], i64 0 ; I64-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT1]], <2 x double> poison, <2 x i32> zeroinitializer ; I64-NEXT: [[TMP19:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLAT2]], <2 x double> [[TMP18]], <2 x i32> diff --git a/llvm/test/Transforms/LoopVectorize/cse-casts.ll b/llvm/test/Transforms/LoopVectorize/cse-casts.ll index fb45745eff1cb..910a25ea938c9 100644 --- a/llvm/test/Transforms/LoopVectorize/cse-casts.ll +++ b/llvm/test/Transforms/LoopVectorize/cse-casts.ll @@ -14,9 +14,9 @@ define i8 @preserve_flags_when_cloning_trunc(i8 %start, ptr noalias %src, ptr no ; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i8> [ [[TMP0]], %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <4 x i8> [ splat (i8 1), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[SRC]], align 4 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP1]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = icmp ne i32 [[TMP1]], 0 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP10]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i1> [[TMP2]] to <4 x i16> ; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i16, ptr [[DST]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i16, ptr [[TMP4]], i32 4 diff --git a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll index 1376a687b38b8..3fdf344f9621e 100644 --- a/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll +++ b/llvm/test/Transforms/LoopVectorize/first-order-recurrence-chains-vplan.ll @@ -180,7 +180,7 @@ define i32 @test_chained_first_order_recurrences_4(ptr %base, i64 %x) { ; CHECK-NEXT: Successor(s): scalar.ph, vector.ph ; CHECK-EMPTY: ; CHECK-NEXT: vector.ph: -; CHECK-NEXT: WIDEN ir<%for.x.next> = mul ir<%x>, ir<2> +; CHECK-NEXT: CLONE ir<%for.x.next> = mul ir<%x>, ir<2> ; CHECK-NEXT: Successor(s): vector loop ; CHECK-EMPTY: ; CHECK-NEXT: vector loop: { diff --git a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll index 440309d246899..13e0c4bd7e926 100644 --- a/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll +++ b/llvm/test/Transforms/LoopVectorize/narrow-to-single-scalar.ll @@ -161,18 +161,16 @@ define void @narrow_widen_store_user(i32 %x, ptr noalias %A, ptr noalias %B) { ; VF4IC1-NEXT: br label %[[VECTOR_PH:.*]] ; VF4IC1: [[VECTOR_PH]]: ; VF4IC1-NEXT: [[TMP0:%.*]] = add i32 [[X]], 1 -; VF4IC1-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 -; VF4IC1-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; VF4IC1-NEXT: [[TMP5:%.*]] = mul i32 [[TMP0]], 3 -; VF4IC1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 +; VF4IC1-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; VF4IC1-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer +; VF4IC1-NEXT: [[TMP5:%.*]] = mul <4 x i32> [[TMP1]], splat (i32 3) ; VF4IC1-NEXT: br label %[[VECTOR_BODY:.*]] ; VF4IC1: [[VECTOR_BODY]]: ; VF4IC1-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF4IC1-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]] ; VF4IC1-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]] -; VF4IC1-NEXT: store <4 x i32> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 4 -; VF4IC1-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4 +; VF4IC1-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP2]], align 4 +; VF4IC1-NEXT: store <4 x i32> [[TMP5]], ptr [[TMP3]], align 4 ; VF4IC1-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; VF4IC1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; VF4IC1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] @@ -187,22 +185,20 @@ define void @narrow_widen_store_user(i32 %x, ptr noalias %A, ptr noalias %B) { ; VF2IC2-NEXT: br label %[[VECTOR_PH:.*]] ; VF2IC2: [[VECTOR_PH]]: ; VF2IC2-NEXT: [[TMP0:%.*]] = add i32 [[X]], 1 -; VF2IC2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 -; VF2IC2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer -; VF2IC2-NEXT: [[TMP7:%.*]] = mul i32 [[TMP0]], 3 -; VF2IC2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0 +; VF2IC2-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0 ; VF2IC2-NEXT: [[TMP1:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer +; VF2IC2-NEXT: [[TMP7:%.*]] = mul <2 x i32> [[TMP1]], splat (i32 3) ; VF2IC2-NEXT: br label %[[VECTOR_BODY:.*]] ; VF2IC2: [[VECTOR_BODY]]: ; VF2IC2-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VF2IC2-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A]], i32 [[INDEX]] ; VF2IC2-NEXT: [[TMP3:%.*]] = getelementptr i32, ptr [[B]], i32 [[INDEX]] ; VF2IC2-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[TMP2]], i32 2 -; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP2]], align 4 -; VF2IC2-NEXT: store <2 x i32> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 4 +; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP2]], align 4 +; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP4]], align 4 ; VF2IC2-NEXT: [[TMP5:%.*]] = getelementptr i32, ptr [[TMP3]], i32 2 -; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP3]], align 4 -; VF2IC2-NEXT: store <2 x i32> [[TMP1]], ptr [[TMP5]], align 4 +; VF2IC2-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP3]], align 4 +; VF2IC2-NEXT: store <2 x i32> [[TMP7]], ptr [[TMP5]], align 4 ; VF2IC2-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 ; VF2IC2-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 ; VF2IC2-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] diff --git a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll index ebd532aa5032c..567d508ae2ff8 100644 --- a/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll +++ b/llvm/test/Transforms/LoopVectorize/pr55167-fold-tail-live-out.ll @@ -6,14 +6,14 @@ define i32 @test(i32 %a, i1 %c.1, i1 %c.2 ) #0 { ; CHECK-NEXT: bb: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i1> poison, i1 [[C_1:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT1]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x i1> poison, i1 [[C_2:%.*]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT3]], <2 x i1> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT4]], splat (i1 true) -; CHECK-NEXT: [[TMP2:%.*]] = xor <2 x i32> [[BROADCAST_SPLAT5]], splat (i32 1) +; CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[A:%.*]], 1 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0 +; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer ; CHECK-NEXT: [[TMP5:%.*]] = select <2 x i1> [[TMP6]], <2 x i1> [[BROADCAST_SPLAT2]], <2 x i1> zeroinitializer ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: diff --git a/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll b/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll index 52555d550f3d9..c94e42ddb718a 100644 --- a/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll +++ b/llvm/test/Transforms/LoopVectorize/predicatedinst-loop-invariant.ll @@ -9,10 +9,10 @@ define void @loop_invariant_store(ptr %p, i64 %a, i8 %b) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[A]], 48 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT2]], splat (i64 48) -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[TMP0]], splat (i64 52) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[BROADCAST_SPLAT2]], splat (i64 52) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] @@ -97,10 +97,10 @@ define void @loop_invariant_srem(ptr %p, i64 %a, i8 %b) { ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i8> [[BROADCAST_SPLATINSERT]], <4 x i8> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[A]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = shl i64 [[A]], 48 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i64> poison, i64 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT1]], <4 x i64> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i64> [[BROADCAST_SPLAT2]], splat (i64 48) -; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[TMP0]], splat (i64 52) +; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i64> [[BROADCAST_SPLAT2]], splat (i64 52) ; CHECK-NEXT: [[TMP2:%.*]] = trunc <4 x i64> [[TMP1]] to <4 x i32> ; CHECK-NEXT: [[TMP3:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i32> ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll index 183462f71d480..b55c563162e13 100644 --- a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll +++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll @@ -371,8 +371,8 @@ define i32 @cond_rdx_pred(i32 %cond, ptr noalias %a, i64 %N) { ; CHECK-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 15 ; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], -16 ; CHECK-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = add i64 [[N]], -1 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[COND:%.*]], i64 0 -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[BROADCAST_SPLATINSERT7]], splat (i32 7) +; CHECK-NEXT: [[TMP5:%.*]] = icmp sgt i32 [[COND:%.*]], 7 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i1> poison, i1 [[TMP5]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer diff --git a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll index 498c58d1bfd82..d34b14bf7be71 100644 --- a/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll +++ b/llvm/test/Transforms/LoopVectorize/trunc-shifts.ll @@ -9,10 +9,10 @@ define void @test_pr47927_lshr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = lshr i32 [[F]], 18 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = lshr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) -; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -53,10 +53,10 @@ define void @test_shl_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = shl i32 [[F]], 18 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = shl <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) -; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] @@ -97,10 +97,10 @@ define void @test_ashr_const_shift_ops(ptr %dst, i32 %f) { ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[F]], i64 0 +; CHECK-NEXT: [[TMP0:%.*]] = ashr i32 [[F]], 18 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0 ; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer -; CHECK-NEXT: [[TMP0:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 18) -; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[TMP0]] to <4 x i8> +; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i32> [[BROADCAST_SPLAT]] to <4 x i8> ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]