diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 2cee36003a39e..212da55439131 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -8812,6 +8812,29 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( DenseMap IVEndValues; addScalarResumePhis(RecipeBuilder, *Plan, IVEndValues); + // Optimize iv step users in exit blocks by replacing extracts with scalar end + // values. + for (const auto &[Phi, ID] : Legal->getInductionVars()) { + auto *IVInc = cast( + Phi->getIncomingValueForBlock(OrigLoop->getLoopLatch())); + if (isa(IVInc)) + continue; + VPValue *VPIVInc = RecipeBuilder.getRecipe(IVInc)->getVPSingleValue(); + for (auto *U : VPIVInc->users()) { + using namespace llvm::VPlanPatternMatch; + VPValue *Mask; + // Replace ExtractLastElement with precomputed IV end value. + if (match(U, m_VPInstruction( + m_Specific(VPIVInc)))) { + VPWidenInductionRecipe *WideIV = + cast(RecipeBuilder.getRecipe(Phi)); + cast(U)->replaceAllUsesWith(IVEndValues[WideIV]); + } + // TODO: Replace extract-lane with first-active-lane using scalar + // computation. + } + } + // --------------------------------------------------------------------------- // Transform initial VPlan: Apply previously taken decisions, in order, to // bring the VPlan to its final state. diff --git a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll index fea027d6803c6..4b4103e9806b9 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/induction-step.ll @@ -111,7 +111,6 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: -; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i16> [[TMP10]], i32 3 ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]] ; CHECK: scalar.ph: @@ -128,7 +127,7 @@ define i16 @wide_sub_induction_step_live_in(ptr %dst, i64 %N, i16 %off) { ; CHECK-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] ; CHECK-NEXT: br i1 [[EC]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP5:![0-9]+]] ; CHECK: exit: -; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i16 [ [[SUB]], [[LOOP]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i16 [[SUB_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll index 97d33858bd830..cb7cee6463e21 100644 --- a/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll +++ b/llvm/test/Transforms/LoopVectorize/iv_outside_user.ll @@ -1090,13 +1090,10 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC: [[VECTOR_PH]]: ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: -; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VEC-NEXT: [[TMP0:%.*]] = add i32 [[INDEX]], 0 -; VEC-NEXT: [[TMP6:%.*]] = add i32 [[INDEX]], 1 +; VEC-NEXT: [[TMP0:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[TMP1:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP0]] ; VEC-NEXT: store <2 x i16> zeroinitializer, ptr [[TMP1]], align 2 -; VEC-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP6]] -; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 +; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[TMP0]], 2 ; VEC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 ; VEC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: @@ -1112,7 +1109,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 ; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} ; VEC: [[E_EXIT]]: -; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ] ; VEC-NEXT: ret i32 [[RES]] ; ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification( @@ -1130,7 +1127,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] ; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2 ; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP1]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 8 ; INTERLEAVE-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -1147,7 +1143,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification(ptr %dst) { ; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 ; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[E_EXIT]]: -; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i32 [[RES]] ; entry: @@ -1176,12 +1172,9 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-NEXT: [[STEP_2:%.*]] = add nsw i32 [[STEP_1]], 1 ; VEC-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; VEC: [[VECTOR_PH]]: -; VEC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP_2]], i64 0 -; VEC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer ; VEC-NEXT: br label %[[VECTOR_BODY:.*]] ; VEC: [[VECTOR_BODY]]: ; VEC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; VEC-NEXT: [[VEC_IND:%.*]] = phi <2 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] ; VEC-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[INDEX]], 2 ; VEC-NEXT: [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0 ; VEC-NEXT: [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2 @@ -1189,14 +1182,10 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] ; VEC-NEXT: store i16 0, ptr [[TMP2]], align 2 ; VEC-NEXT: store i16 0, ptr [[TMP3]], align 2 -; VEC-NEXT: [[TMP4:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 1) -; VEC-NEXT: [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]] ; VEC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 -; VEC-NEXT: [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], splat (i32 4) ; VEC-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 ; VEC-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; VEC: [[MIDDLE_BLOCK]]: -; VEC-NEXT: [[TMP7:%.*]] = extractelement <2 x i32> [[TMP5]], i32 1 ; VEC-NEXT: br label %[[E_EXIT:.*]] ; VEC: [[SCALAR_PH]]: ; VEC-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, %[[ENTRY]] ] @@ -1210,7 +1199,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; VEC-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 ; VEC-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} ; VEC: [[E_EXIT]]: -; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP7]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ] ; VEC-NEXT: ret i32 [[RES]] ; ; INTERLEAVE-LABEL: define i32 @test_iv_uniform_with_outside_use_scev_simplification_2( @@ -1229,8 +1218,6 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; INTERLEAVE-NEXT: [[TMP3:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[TMP1]] ; INTERLEAVE-NEXT: store i16 0, ptr [[TMP2]], align 2 ; INTERLEAVE-NEXT: store i16 0, ptr [[TMP3]], align 2 -; INTERLEAVE-NEXT: [[TMP4:%.*]] = add i32 [[TMP1]], 1 -; INTERLEAVE-NEXT: [[TMP5:%.*]] = add i32 [[STEP_2]], [[TMP4]] ; INTERLEAVE-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2 ; INTERLEAVE-NEXT: [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 4 ; INTERLEAVE-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} @@ -1248,7 +1235,7 @@ define i32 @test_iv_uniform_with_outside_use_scev_simplification_2(ptr %dst) { ; INTERLEAVE-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 8 ; INTERLEAVE-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[E_EXIT]]: -; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP5]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ 8, %[[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i32 [[RES]] ; entry: @@ -1372,8 +1359,6 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[TMP0]], i32 0 ; VEC-NEXT: [[TMP2:%.*]] = getelementptr i16, ptr [[TMP1]], i32 -1 ; VEC-NEXT: store <2 x i16> splat (i16 1), ptr [[TMP2]], align 2 -; VEC-NEXT: [[TMP5:%.*]] = add i64 1, -1 -; VEC-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 1 ; VEC-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; VEC: [[MIDDLE_BLOCK]]: ; VEC-NEXT: br label %[[EXIT:.*]] @@ -1391,7 +1376,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; VEC-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1 ; VEC-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}} ; VEC: [[EXIT]]: -; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP6]], %[[MIDDLE_BLOCK]] ] +; VEC-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ 1, %[[MIDDLE_BLOCK]] ] ; VEC-NEXT: ret i64 [[IV_1_NEXT_LCSSA]] ; ; INTERLEAVE-LABEL: define i64 @test_iv_increment_incremented( @@ -1405,8 +1390,6 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; INTERLEAVE-NEXT: [[TMP1:%.*]] = getelementptr i16, ptr [[DST]], i64 2 ; INTERLEAVE-NEXT: store i16 1, ptr [[TMP0]], align 2 ; INTERLEAVE-NEXT: store i16 1, ptr [[TMP1]], align 2 -; INTERLEAVE-NEXT: [[TMP2:%.*]] = add i64 1, -1 -; INTERLEAVE-NEXT: [[TMP3:%.*]] = add i64 [[TMP2]], 1 ; INTERLEAVE-NEXT: br label %[[MIDDLE_BLOCK:.*]] ; INTERLEAVE: [[MIDDLE_BLOCK]]: ; INTERLEAVE-NEXT: br label %[[EXIT:.*]] @@ -1424,7 +1407,7 @@ define i64 @test_iv_increment_incremented(ptr %dst) { ; INTERLEAVE-NEXT: [[IV_1_NEXT]] = add i64 [[IV_2_NEXT]], 1 ; INTERLEAVE-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], {{!llvm.loop ![0-9]+}} ; INTERLEAVE: [[EXIT]]: -; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ [[TMP3]], %[[MIDDLE_BLOCK]] ] +; INTERLEAVE-NEXT: [[IV_1_NEXT_LCSSA:%.*]] = phi i64 [ [[IV_1_NEXT]], %[[LOOP]] ], [ 1, %[[MIDDLE_BLOCK]] ] ; INTERLEAVE-NEXT: ret i64 [[IV_1_NEXT_LCSSA]] ; entry: diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll index cdb9e9952586c..5a6d29dca1861 100644 --- a/llvm/test/Transforms/LoopVectorize/optsize.ll +++ b/llvm/test/Transforms/LoopVectorize/optsize.ll @@ -618,18 +618,14 @@ define i32 @pr45526_pgso() !prof !14 { ; NPGSO-NEXT: br label %[[VECTOR_BODY:.*]] ; NPGSO: [[VECTOR_BODY]]: ; NPGSO-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; NPGSO-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; NPGSO-NEXT: [[TMP0:%.*]] = add nuw nsw <4 x i32> [[VEC_IND]], splat (i32 1) ; NPGSO-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 -; NPGSO-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4) ; NPGSO-NEXT: [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 508 ; NPGSO-NEXT: br i1 [[TMP1]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]] ; NPGSO: [[MIDDLE_BLOCK]]: -; NPGSO-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3 ; NPGSO-NEXT: br label %[[SCALAR_PH]] ; NPGSO: [[SCALAR_PH]]: ; NPGSO-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 508, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] -; NPGSO-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 5, %[[ENTRY]] ] +; NPGSO-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 508, %[[MIDDLE_BLOCK]] ], [ 5, %[[ENTRY]] ] ; NPGSO-NEXT: br label %[[LOOP:.*]] ; NPGSO: [[LOOP]]: ; NPGSO-NEXT: [[PIV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[PIVPLUS1:%.*]], %[[LOOP]] ] diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll index d2c53f47a6670..4575e40780263 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll @@ -640,22 +640,8 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) { ; STRIDED-NEXT: br label [[VECTOR_BODY:%.*]] ; STRIDED: vector.body: ; STRIDED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; STRIDED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], [[TMP1]] -; STRIDED-NEXT: [[TMP4:%.*]] = mul i64 0, [[TMP1]] -; STRIDED-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], [[TMP4]] -; STRIDED-NEXT: [[TMP6:%.*]] = mul i64 1, [[TMP1]] -; STRIDED-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], [[TMP6]] -; STRIDED-NEXT: [[TMP8:%.*]] = mul i64 2, [[TMP1]] -; STRIDED-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], [[TMP8]] -; STRIDED-NEXT: [[TMP10:%.*]] = mul i64 3, [[TMP1]] -; STRIDED-NEXT: [[TMP11:%.*]] = add i64 [[OFFSET_IDX]], [[TMP10]] -; STRIDED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr null, i64 [[TMP5]] -; STRIDED-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr null, i64 [[TMP7]] -; STRIDED-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr null, i64 [[TMP9]] -; STRIDED-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr null, i64 [[TMP11]] ; STRIDED-NEXT: [[TMP12:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[INDEX]] ; STRIDED-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP12]], align 8 -; STRIDED-NEXT: [[TMP14:%.*]] = getelementptr i64, ptr [[NEXT_GEP3]], i64 [[STRIDE]] ; STRIDED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 ; STRIDED-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; STRIDED-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] @@ -678,7 +664,7 @@ define i64 @ivopt_widen_ptr_indvar_3(ptr noalias %a, i64 %stride, i64 %n) { ; STRIDED-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] ; STRIDED: exit: ; STRIDED-NEXT: [[DOTLCSSA:%.*]] = phi i64 [ [[TMP17]], [[FOR_BODY]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ] -; STRIDED-NEXT: [[PTR_IV_NEXT_LCSSA:%.*]] = phi ptr [ [[PTR_IV_NEXT]], [[FOR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] +; STRIDED-NEXT: [[PTR_IV_NEXT_LCSSA:%.*]] = phi ptr [ [[PTR_IV_NEXT]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] ; STRIDED-NEXT: [[CAST_PTR:%.*]] = ptrtoint ptr [[PTR_IV_NEXT_LCSSA]] to i64 ; STRIDED-NEXT: [[RESULT:%.*]] = add i64 [[CAST_PTR]], [[DOTLCSSA]] ; STRIDED-NEXT: ret i64 [[RESULT]] diff --git a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll index e251c2a853b9a..f09fddb4c1b2a 100644 --- a/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll +++ b/llvm/test/Transforms/LoopVectorize/scalable-iv-outside-user.ll @@ -13,38 +13,22 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] ; CHECK: [[VECTOR_PH]]: ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i32 [[TMP4]], 2 -; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TMP5]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer -; CHECK-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 2 +; CHECK-NEXT: [[TMP6:%.*]] = mul nuw i32 [[TMP4]], 4 ; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 2000, [[TMP6]] ; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 2000, [[N_MOD_VF]] -; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[STEP_2]], i64 0 -; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.stepvector.nxv2i32() -; CHECK-NEXT: [[TMP8:%.*]] = mul [[TMP7]], splat (i32 1) -; CHECK-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP8]] ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i16, ptr [[DST]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP13:%.*]] = mul nuw i64 [[TMP12]], 2 ; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i16, ptr [[TMP10]], i64 [[TMP13]] ; CHECK-NEXT: store zeroinitializer, ptr [[TMP10]], align 2 ; CHECK-NEXT: store zeroinitializer, ptr [[TMP14]], align 2 -; CHECK-NEXT: [[TMP15:%.*]] = add [[BROADCAST_SPLAT]], [[STEP_ADD]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[BROADCAST_SPLAT2]] ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: -; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vscale.i32() -; CHECK-NEXT: [[TMP18:%.*]] = mul nuw i32 [[TMP17]], 2 -; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP18]], 1 -; CHECK-NEXT: [[TMP20:%.*]] = extractelement [[TMP15]], i32 [[TMP19]] ; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 2000, [[N_VEC]] ; CHECK-NEXT: br i1 [[CMP_N]], label %[[E_EXIT:.*]], label %[[SCALAR_PH]] ; CHECK: [[SCALAR_PH]]: @@ -58,7 +42,7 @@ define i32 @iv_live_out_wide(ptr %dst) { ; CHECK-NEXT: [[CMP_I:%.*]] = icmp slt i32 [[IV_NEXT]], 2000 ; CHECK-NEXT: br i1 [[CMP_I]], label %[[LOOP]], label %[[E_EXIT]], !llvm.loop [[LOOP3:![0-9]+]] ; CHECK: [[E_EXIT]]: -; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[TMP20]], %[[MIDDLE_BLOCK]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[IV_NEXT]], %[[LOOP]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ] ; CHECK-NEXT: ret i32 [[RES]] ; entry: