diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index cbfbc29360b0b..826f80acd9294 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -7655,7 +7655,7 @@ createWidenInductionRecipes(VPInstruction *PhiR, } VPHeaderPHIRecipe * -VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI, VFRange &Range) { +VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI) { auto *Phi = cast(VPI->getUnderlyingInstr()); // Check if this is an integer or fp induction. If so, build the recipe that @@ -7666,14 +7666,9 @@ VPRecipeBuilder::tryToOptimizeInductionPHI(VPInstruction *VPI, VFRange &Range) { // Check if this is pointer induction. If so, build the recipe for it. if (auto *II = Legal->getPointerInductionDescriptor(Phi)) { VPValue *Step = vputils::getOrCreateVPValueForSCEVExpr(Plan, II->getStep()); - return new VPWidenPointerInductionRecipe( - Phi, VPI->getOperand(0), Step, &Plan.getVFxUF(), *II, - LoopVectorizationPlanner::getDecisionAndClampRange( - [&](ElementCount VF) { - return CM.isScalarAfterVectorization(Phi, VF); - }, - Range), - VPI->getDebugLoc()); + return new VPWidenPointerInductionRecipe(Phi, VPI->getOperand(0), Step, + &Plan.getVFxUF(), *II, + VPI->getDebugLoc()); } return nullptr; } @@ -8162,7 +8157,7 @@ VPRecipeBase *VPRecipeBuilder::tryToCreateWidenRecipe(VPSingleDefRecipe *R, "Non-header phis should have been handled during predication"); auto *Phi = cast(R->getUnderlyingInstr()); assert(R->getNumOperands() == 2 && "Must have 2 operands for header phis"); - if ((Recipe = tryToOptimizeInductionPHI(PhiR, Range))) + if ((Recipe = tryToOptimizeInductionPHI(PhiR))) return Recipe; VPHeaderPHIRecipe *PhiRecipe = nullptr; diff --git a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h index a7000aff06379..c6019e733fe77 100644 --- a/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h +++ b/llvm/lib/Transforms/Vectorize/VPRecipeBuilder.h @@ -100,8 +100,7 @@ class VPRecipeBuilder { /// Check if an induction recipe should be constructed for \p VPI. If so build /// and return it. If not, return null. - VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI, - VFRange &Range); + VPHeaderPHIRecipe *tryToOptimizeInductionPHI(VPInstruction *VPI); /// Optimize the special case where the operand of \p VPI is a constant /// integer induction variable. diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index ea88eaa42d945..3fd6ec1f24255 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -2211,19 +2211,15 @@ class VPWidenIntOrFpInductionRecipe : public VPWidenInductionRecipe { }; class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { - bool IsScalarAfterVectorization; - public: /// Create a new VPWidenPointerInductionRecipe for \p Phi with start value \p /// Start and the number of elements unrolled \p NumUnrolledElems, typically /// VF*UF. VPWidenPointerInductionRecipe(PHINode *Phi, VPValue *Start, VPValue *Step, VPValue *NumUnrolledElems, - const InductionDescriptor &IndDesc, - bool IsScalarAfterVectorization, DebugLoc DL) + const InductionDescriptor &IndDesc, DebugLoc DL) : VPWidenInductionRecipe(VPDef::VPWidenPointerInductionSC, Phi, Start, - Step, IndDesc, DL), - IsScalarAfterVectorization(IsScalarAfterVectorization) { + Step, IndDesc, DL) { addOperand(NumUnrolledElems); } @@ -2232,8 +2228,7 @@ class VPWidenPointerInductionRecipe : public VPWidenInductionRecipe { VPWidenPointerInductionRecipe *clone() override { return new VPWidenPointerInductionRecipe( cast(getUnderlyingInstr()), getOperand(0), getOperand(1), - getOperand(2), getInductionDescriptor(), IsScalarAfterVectorization, - getDebugLoc()); + getOperand(2), getInductionDescriptor(), getDebugLoc()); } VP_CLASSOF_IMPL(VPDef::VPWidenPointerInductionSC) diff --git a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp index aa85bd435ee9e..5071941eb1413 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp @@ -4310,7 +4310,7 @@ void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, #endif bool VPWidenPointerInductionRecipe::onlyScalarsGenerated(bool IsScalable) { - return IsScalarAfterVectorization && + return vputils::onlyScalarValuesUsed(this) && (!IsScalable || vputils::onlyFirstLaneUsed(this)); } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll index d23e3c29b59e5..28b2d30b03fd6 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-widen-inductions.ll @@ -11,21 +11,14 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK: vector.main.loop.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] ; CHECK: vector.ph: +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 10000 ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 -; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 2 -; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 3 -; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[PTR_START_1:%.*]], i64 [[TMP0]] -; CHECK-NEXT: [[NEXT_GEP1:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP1]] -; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP]], i32 0 -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x ptr> [[TMP4]], ptr [[NEXT_GEP1]], i32 1 -; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP2]] -; CHECK-NEXT: [[NEXT_GEP3:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP3]] -; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP2]], i32 0 -; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x ptr> [[TMP6]], ptr [[NEXT_GEP3]], i32 1 +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[PTR_START_1]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> +; CHECK-NEXT: [[NEXT_GEP:%.*]] = extractelement <2 x ptr> [[TMP5]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, <2 x ptr> [[TMP5]], <2 x i64> splat (i64 2) ; CHECK-NEXT: [[TMP8:%.*]] = icmp ne <2 x ptr> [[TMP5]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x i1> [[TMP8]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x i1> [[TMP8]], i32 1 @@ -40,25 +33,24 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP]], align 1 ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 4 ; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10000 ; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: middle.block: ; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END4:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3:![0-9]+]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START_1]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 10000 ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: ; CHECK-NEXT: [[INDEX6:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT9:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP17:%.*]] = add i64 [[INDEX6]], 0 -; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[INDEX6]], 1 -; CHECK-NEXT: [[NEXT_GEP7:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP17]] -; CHECK-NEXT: [[NEXT_GEP8:%.*]] = getelementptr i8, ptr [[PTR_START_1]], i64 [[TMP18]] -; CHECK-NEXT: [[TMP19:%.*]] = insertelement <2 x ptr> poison, ptr [[NEXT_GEP7]], i32 0 -; CHECK-NEXT: [[TMP20:%.*]] = insertelement <2 x ptr> [[TMP19]], ptr [[NEXT_GEP8]], i32 1 +; CHECK-NEXT: [[POINTER_PHI2:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], [[VEC_EPILOG_PH]] ], [ [[PTR_IND5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[POINTER_PHI2]], <2 x i64> +; CHECK-NEXT: [[NEXT_GEP7:%.*]] = extractelement <2 x ptr> [[TMP20]], i32 0 ; CHECK-NEXT: [[TMP21:%.*]] = icmp ne <2 x ptr> [[TMP20]], zeroinitializer ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x i1> [[TMP21]], i32 0 ; CHECK-NEXT: [[TMP23:%.*]] = extractelement <2 x i1> [[TMP21]], i32 1 @@ -66,6 +58,7 @@ define void @test_widen_ptr_induction(ptr %ptr.start.1) { ; CHECK-NEXT: tail call void @llvm.assume(i1 [[TMP23]]) ; CHECK-NEXT: store <2 x i8> zeroinitializer, ptr [[NEXT_GEP7]], align 1 ; CHECK-NEXT: [[INDEX_NEXT9]] = add nuw i64 [[INDEX6]], 2 +; CHECK-NEXT: [[PTR_IND5]] = getelementptr i8, ptr [[POINTER_PHI2]], i64 2 ; CHECK-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT9]], 10000 ; CHECK-NEXT: br i1 [[TMP25]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], {{!llvm.loop ![0-9]+}} ; CHECK: vec.epilog.middle.block: @@ -136,7 +129,7 @@ define void @test_widen_induction(ptr %A, i64 %N) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 @@ -221,7 +214,7 @@ define void @test_widen_induction_variable_start(ptr %A, i64 %N, i64 %start) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[IND_END5:%.*]] = add i64 [[START]], [[N_VEC]] ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] @@ -304,7 +297,7 @@ define void @test_widen_induction_step_2(ptr %A, i64 %N, i32 %step) { ; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 2 -; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[IND_END4]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[N]], 2 @@ -416,7 +409,7 @@ define void @test_widen_truncated_induction(ptr %A) { ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] ; CHECK: vec.epilog.iter.check: -; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF3]] ; CHECK: vec.epilog.ph: ; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 10000, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[VEC_EPILOG_RESUME_VAL]] to i8 diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll index f223786a07cdf..456c03824106a 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll @@ -112,15 +112,11 @@ define void @pointer_induction(ptr noalias %start, i64 %N) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX2:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[TMP12:%.*]] = call @llvm.stepvector.nxv2i64() -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], [[TMP12]] -; CHECK-NEXT: [[TMP15:%.*]] = extractelement [[VECTOR_GEP]], i32 0 +; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[START]], i64 [[INDEX2]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP15]], align 1 ; CHECK-NEXT: [[TMP17:%.*]] = add [[WIDE_LOAD]], splat (i8 1) ; CHECK-NEXT: store [[TMP17]], ptr [[TMP15]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX2]], [[TMP4]] -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP4]] ; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll index 3a07bcca523ce..13da121fe2dc2 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/interleave-opaque-pointers.ll @@ -25,17 +25,14 @@ define void @test_pr55375_interleave_opaque_ptr(ptr %start, ptr %end) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 16 -; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 0 -; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 16 -; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP5]] -; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP6]] -; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x ptr> poison, ptr [[TMP7]], i32 0 -; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x ptr> [[TMP9]], ptr [[TMP8]], i32 1 +; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[START]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <2 x i64> +; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x ptr> [[TMP10]], i32 0 ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x ptr> zeroinitializer, <2 x ptr> [[TMP10]], <4 x i32> ; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x ptr> [[TMP12]], <4 x ptr> poison, <4 x i32> ; CHECK-NEXT: store <4 x ptr> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2 +; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 32 ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll index cebf90ae9f8ce..94392f856c386 100644 --- a/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll +++ b/llvm/test/Transforms/LoopVectorize/pointer-induction-index-width-smaller-than-iv-width.ll @@ -15,16 +15,20 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[POINTER_PHI:%.*]] = phi ptr [ [[SRC]], %[[VECTOR_PH]] ], [ [[PTR_IND:%.*]], %[[VECTOR_BODY]] ] -; CHECK-NEXT: [[VECTOR_GEP:%.*]] = getelementptr i8, ptr [[POINTER_PHI]], <4 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 0 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 1 -; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 2 -; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x ptr> [[VECTOR_GEP]], i32 3 ; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 ; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 1 ; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 2 ; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 3 +; CHECK-NEXT: [[DOTCAST:%.*]] = trunc i64 [[INDEX]] to i32 +; CHECK-NEXT: [[OFFSET_IDX:%.*]] = mul i32 [[DOTCAST]], 8 +; CHECK-NEXT: [[TMP11:%.*]] = add i32 [[OFFSET_IDX]], 0 +; CHECK-NEXT: [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], 8 +; CHECK-NEXT: [[TMP16:%.*]] = add i32 [[OFFSET_IDX]], 16 +; CHECK-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 24 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP11]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP6]] +; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP16]] +; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[SRC]], i32 [[TMP17]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i64>, ptr [[TMP5]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[DST_0]], i64 [[TMP2]] @@ -36,7 +40,6 @@ define void @wide_ptr_induction_index_width_smaller_than_iv_width(ptr noalias %s ; CHECK-NEXT: store ptr [[TMP13]], ptr [[TMP9]], align 8 ; CHECK-NEXT: store ptr [[TMP14]], ptr [[TMP10]], align 8 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 -; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i32 32 ; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100 ; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: