Skip to content

Commit

Permalink
Revert "[LV] Skip VFs < iterations remaining for epilogue vectorizati…
Browse files Browse the repository at this point in the history
…on."

This reverts commit 7cc0be0.

The title of the commit is incorrect, revert to fix the commit message.
  • Loading branch information
fhahn committed Jul 7, 2023
1 parent 147c064 commit aee851f
Show file tree
Hide file tree
Showing 5 changed files with 92 additions and 81 deletions.
2 changes: 1 addition & 1 deletion llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ class LoopVectorizationPlanner {
/// for vectorizing the epilogue. Returns VectorizationFactor::Disabled if
/// epilogue vectorization is not supported for the loop.
VectorizationFactor
selectEpilogueVectorizationFactor(const ElementCount MaxVF, unsigned IC);
selectEpilogueVectorizationFactor(const ElementCount MaxVF);

protected:
/// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
Expand Down
23 changes: 2 additions & 21 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5630,7 +5630,7 @@ bool LoopVectorizationCostModel::isEpilogueVectorizationProfitable(
}

VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
const ElementCount MainLoopVF, unsigned IC) {
const ElementCount MainLoopVF) {
VectorizationFactor Result = VectorizationFactor::Disabled();
if (!EnableEpilogueVectorization) {
LLVM_DEBUG(dbgs() << "LEV: Epilogue vectorization is disabled.\n");
Expand Down Expand Up @@ -5686,9 +5686,6 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
EstimatedRuntimeVF *= *VScale;
}

ScalarEvolution &SE = *PSE.getSE();
Type *TCType = Legal->getWidestInductionType();
const SCEV *RemainingIterations = nullptr;
for (auto &NextVF : ProfitableVFs) {
// Skip candidate VFs without a corresponding VPlan.
if (!hasPlanWithVF(NextVF.Width))
Expand All @@ -5701,22 +5698,6 @@ VectorizationFactor LoopVectorizationPlanner::selectEpilogueVectorizationFactor(
ElementCount::isKnownGE(NextVF.Width, MainLoopVF))
continue;

// If NextVF is greater than the number of remaining iterations, the
// epilogue loop would be dead. Skip such factors.
if (!MainLoopVF.isScalable() && !NextVF.Width.isScalable()) {
// TODO: extend to support scalable VFs.
if (!RemainingIterations) {
const SCEV *TC = createTripCountSCEV(TCType, PSE, OrigLoop);
RemainingIterations = SE.getURemExpr(
TC, SE.getConstant(TCType, MainLoopVF.getKnownMinValue() * IC));
}
if (SE.isKnownPredicate(
CmpInst::ICMP_UGT,
SE.getConstant(TCType, NextVF.Width.getKnownMinValue()),
RemainingIterations))
continue;
}

if (Result.Width.isScalar() || isMoreProfitable(NextVF, Result))
Result = NextVF;
}
Expand Down Expand Up @@ -10489,7 +10470,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {

// Consider vectorizing the epilogue too if it's profitable.
VectorizationFactor EpilogueVF =
LVP.selectEpilogueVectorizationFactor(VF.Width, IC);
LVP.selectEpilogueVectorizationFactor(VF.Width);
if (EpilogueVF.Width.isVector()) {

// The first pass vectorizes the main loop and creates a scalar epilogue
Expand Down
34 changes: 17 additions & 17 deletions llvm/test/Transforms/LoopVectorize/X86/gather_scatter.ll
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ target triple = "x86_64-pc_linux"
; Function Attrs: nounwind uwtable
define void @foo1(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr noalias %index) {
; AVX512-LABEL: @foo1(
; AVX512-NEXT: entry:
; AVX512-NEXT: iter.check:
; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX512: vector.body:
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[TMP0:%.*]] = add i64 [[INDEX1]], 0
; AVX512-NEXT: [[TMP1:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], i64 [[TMP0]]
; AVX512-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[TMP1]], i32 0
Expand Down Expand Up @@ -164,11 +164,11 @@ for.end:

define void @foo2(ptr noalias %in, ptr noalias %out, ptr noalias %trigger, ptr noalias %index) #0 {
; AVX512-LABEL: @foo2(
; AVX512-NEXT: entry:
; AVX512-NEXT: iter.check:
; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX512: vector.body:
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
Expand Down Expand Up @@ -311,11 +311,11 @@ for.end:

define void @foo3(ptr noalias %in, ptr noalias %out, ptr noalias %trigger) {
; AVX512-LABEL: @foo3(
; AVX512-NEXT: entry:
; AVX512-NEXT: iter.check:
; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX512: vector.body:
; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
Expand Down Expand Up @@ -445,11 +445,11 @@ declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <1

define void @foo2_addrspace(ptr addrspace(1) noalias %in, ptr addrspace(1) noalias %out, ptr noalias %trigger, ptr noalias %index) #0 {
; AVX512-LABEL: @foo2_addrspace(
; AVX512-NEXT: entry:
; AVX512-NEXT: iter.check:
; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX512: vector.body:
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
Expand Down Expand Up @@ -578,11 +578,11 @@ for.end:

define void @foo2_addrspace2(ptr addrspace(1) noalias %in, ptr addrspace(0) noalias %out, ptr noalias %trigger, ptr noalias %index) {
; AVX512-LABEL: @foo2_addrspace2(
; AVX512-NEXT: entry:
; AVX512-NEXT: iter.check:
; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX512: vector.body:
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
Expand Down Expand Up @@ -711,11 +711,11 @@ for.end:

define void @foo2_addrspace3(ptr addrspace(0) noalias %in, ptr addrspace(1) noalias %out, ptr noalias %trigger, ptr noalias %index) {
; AVX512-LABEL: @foo2_addrspace3(
; AVX512-NEXT: entry:
; AVX512-NEXT: iter.check:
; AVX512-NEXT: br label [[VECTOR_BODY:%.*]]
; AVX512: vector.body:
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[ITER_CHECK:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [ <i64 0, i64 16, i64 32, i64 48, i64 64, i64 80, i64 96, i64 112, i64 128, i64 144, i64 160, i64 176, i64 192, i64 208, i64 224, i64 240>, [[ITER_CHECK]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
; AVX512-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, ptr [[TRIGGER:%.*]], <16 x i64> [[VEC_IND]]
; AVX512-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> [[TMP0]], i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> poison)
; AVX512-NEXT: [[TMP1:%.*]] = icmp sgt <16 x i32> [[WIDE_MASKED_GATHER]], zeroinitializer
Expand Down
Loading

0 comments on commit aee851f

Please sign in to comment.