diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ca092dcfcb492..60b485df73e86 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -9629,6 +9629,16 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, // vectorizing the epilogue loop. for (VPRecipeBase &R : Header->phis()) { if (auto *IV = dyn_cast(&R)) { + // If we didn't find any PHIs, due to a simplification where all incoming + // values were equal (and necessarily zero), it means that the vector trip + // count is zero. + // TODO: We should not choose VF * UF so the main vector loop is known to + // be dead. + if (L->getLoopPreheader()->phis().empty()) { + EPI.VectorTripCount = ConstantInt::get(IV->getScalarType(), 0); + continue; + } + // When vectorizing the epilogue loop, the canonical induction start // value needs to be changed from zero to the value after the main // vector loop. Find the resume value created during execution of the main @@ -9644,19 +9654,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L, "Must only have a single non-zero incoming value"); EPI.VectorTripCount = Inc; } - // If we didn't find a non-zero vector trip count, all incoming values - // must be zero, which also means the vector trip count is zero. Pick the - // first zero as vector trip count. - // TODO: We should not choose VF * UF so the main vector loop is known to - // be dead. - if (!EPI.VectorTripCount) { - assert( - EPResumeVal->getNumIncomingValues() > 0 && - all_of(EPResumeVal->incoming_values(), - [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) && - "all incoming values must be 0"); - EPI.VectorTripCount = EPResumeVal->getOperand(0); - } + assert(EPI.VectorTripCount && "Must have an epilog vector trip-count"); VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); assert(all_of(IV->users(), [](const VPUser *U) { diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 84f02059743c3..85c7b4b55a1cc 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1225,8 +1225,8 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { } if (auto *Phi = dyn_cast(Def)) { - if (Phi->getNumOperands() == 1) - Phi->replaceAllUsesWith(Phi->getOperand(0)); + if (all_equal(Phi->incoming_values())) + Phi->replaceAllUsesWith(Phi->getIncomingValue(0)); return; } diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll index b83d3af3a0d65..8586e126b75b1 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll @@ -374,10 +374,9 @@ define void @small_trip_count_loop(ptr %arg, ptr %arg2) { ; CHECK: vec.epilog.iter.check: ; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] ; CHECK: vec.epilog.ph: -; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i32 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] ; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] ; CHECK: vec.epilog.vector.body: -; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i32 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i8>, ptr [[TMP11]], align 1 ; CHECK-NEXT: [[TMP12:%.*]] = add <16 x i8> [[WIDE_LOAD7]], splat (i8 10) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll index efce4bdf712a0..9f766f3000322 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/predicated-costs.ll @@ -268,10 +268,9 @@ define void @test_predicated_load_cast_hint(ptr %dst.1, ptr %dst.2, ptr %src, i8 ; CHECK: [[MIDDLE_BLOCK]]: ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[SCALAR_PH]]: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label %[[LOOP:.*]] ; CHECK: [[LOOP]]: -; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ 0, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] ; CHECK-NEXT: [[L:%.*]] = load i8, ptr [[SRC]], align 1 ; CHECK-NEXT: [[L_EXT:%.*]] = zext i8 [[L]] to i64 ; CHECK-NEXT: [[ADD:%.*]] = or i64 [[L_EXT]], 1 diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll index ea2ccb07b388b..7d4487f793116 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/gather-scatter-cost.ll @@ -48,10 +48,9 @@ define void @predicated_uniform_load(ptr %src, i32 %n, ptr %dst, i1 %cond) { ; CHECK: middle.block: ; CHECK-NEXT: br label [[EXIT:%.*]] ; CHECK: scalar.ph: -; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; CHECK-NEXT: br label [[LOOP:%.*]] ; CHECK: loop: -; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ] ; CHECK-NEXT: br i1 [[COND]], label [[LOOP_THEN:%.*]], label [[LOOP_ELSE:%.*]] ; CHECK: loop.then: ; CHECK-NEXT: br label [[LOOP_LATCH]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll index a165dde0d217e..3797203cf0224 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/riscv-vector-reverse.ll @@ -242,8 +242,6 @@ define void @vector_reverse_i64(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64: [[MIDDLE_BLOCK]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]] ; RV64: [[SCALAR_PH]]: -; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] -; RV64-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] ; RV64-NEXT: br label %[[FOR_BODY:.*]] ; RV64: [[FOR_COND_CLEANUP_LOOPEXIT]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP]] @@ -493,8 +491,6 @@ define void @vector_reverse_f32(ptr nocapture noundef writeonly %A, ptr nocaptur ; RV64: [[MIDDLE_BLOCK]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP_LOOPEXIT:.*]] ; RV64: [[SCALAR_PH]]: -; RV64-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[TMP0]], %[[VECTOR_SCEVCHECK]] ], [ [[TMP0]], %[[VECTOR_MEMCHECK]] ] -; RV64-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i32 [ [[N]], %[[VECTOR_SCEVCHECK]] ], [ [[N]], %[[VECTOR_MEMCHECK]] ] ; RV64-NEXT: br label %[[FOR_BODY:.*]] ; RV64: [[FOR_COND_CLEANUP_LOOPEXIT]]: ; RV64-NEXT: br label %[[FOR_COND_CLEANUP]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll index 4c84913eea23d..a8b0be4b4b251 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/strided-accesses.ll @@ -684,10 +684,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; NOSTRIDED: middle.block: ; NOSTRIDED-NEXT: br label [[EXIT:%.*]] ; NOSTRIDED: scalar.ph: -; NOSTRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK]] ] ; NOSTRIDED-NEXT: br label [[LOOP:%.*]] ; NOSTRIDED: loop: -; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; NOSTRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; NOSTRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]] ; NOSTRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]] ; NOSTRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4 @@ -842,10 +841,9 @@ define void @double_stride_int_scaled(ptr %p, ptr %p2, i64 %stride) { ; STRIDED: middle.block: ; STRIDED-NEXT: br label [[EXIT:%.*]] ; STRIDED: scalar.ph: -; STRIDED-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[VECTOR_MEMCHECK1]] ] ; STRIDED-NEXT: br label [[LOOP:%.*]] ; STRIDED: loop: -; STRIDED-NEXT: [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] +; STRIDED-NEXT: [[I:%.*]] = phi i64 [ 0, [[SCALAR_PH]] ], [ [[NEXTI:%.*]], [[LOOP]] ] ; STRIDED-NEXT: [[OFFSET:%.*]] = mul nuw nsw i64 [[I]], [[STRIDE]] ; STRIDED-NEXT: [[Q0:%.*]] = getelementptr i32, ptr [[P]], i64 [[OFFSET]] ; STRIDED-NEXT: [[X0:%.*]] = load i32, ptr [[Q0]], align 4