Skip to content

Commit

Permalink
[LV] Support widened induction variables in epilogue vectorization.
Browse files Browse the repository at this point in the history
Code generation now uses the start VPValue of induction recipes.

This makes it possible to adjust the start value of the epilogue
vector loop to use the 'resume' value of the main vector loop.

Fixes #59459.

Reviewed By: Ayal

Differential Revision: https://reviews.llvm.org/D92132
  • Loading branch information
fhahn committed Dec 21, 2022
1 parent 84733b0 commit f69ac9a
Show file tree
Hide file tree
Showing 9 changed files with 700 additions and 238 deletions.
70 changes: 47 additions & 23 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -5487,14 +5487,6 @@ bool LoopVectorizationCostModel::isCandidateForEpilogueVectorization(
return false;
}

// Induction variables that are widened require special handling that is
// currently not supported.
if (any_of(Legal->getInductionVars(), [&](auto &Entry) {
return !(this->isScalarAfterVectorization(Entry.first, VF) ||
this->isProfitableToScalarize(Entry.first, VF));
}))
return false;

// Epilogue vectorization code has not been auditted to ensure it handles
// non-latch exits properly. It may be fine, but it needs auditted and
// tested.
Expand Down Expand Up @@ -7764,9 +7756,9 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
EPI.VectorTripCount = getOrCreateVectorTripCount(LoopVectorPreHeader);

// Skip induction resume value creation here because they will be created in
// the second pass. If we created them here, they wouldn't be used anyway,
// because the vplan in the second pass still contains the inductions from the
// original loop.
// the second pass for the scalar loop. The induction resume values for the
// inductions in the epilogue loop are created before executing the plan for
// the epilogue loop.

return {completeLoopSkeleton(), nullptr};
}
Expand Down Expand Up @@ -7897,31 +7889,40 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
DT->changeImmediateDominator(LoopExitBlock,
EPI.EpilogueIterationCountCheck);

// Keep track of bypass blocks, as they feed start values to the induction
// phis in the scalar loop preheader.
// Keep track of bypass blocks, as they feed start values to the induction and
// reduction phis in the scalar loop preheader.
if (EPI.SCEVSafetyCheck)
LoopBypassBlocks.push_back(EPI.SCEVSafetyCheck);
if (EPI.MemSafetyCheck)
LoopBypassBlocks.push_back(EPI.MemSafetyCheck);
LoopBypassBlocks.push_back(EPI.EpilogueIterationCountCheck);

// The vec.epilog.iter.check block may contain Phi nodes from reductions which
// merge control-flow from the latch block and the middle block. Update the
// incoming values here and move the Phi into the preheader.
// The vec.epilog.iter.check block may contain Phi nodes from inductions or
// reductions which merge control-flow from the latch block and the middle
// block. Update the incoming values here and move the Phi into the preheader.
SmallVector<PHINode *, 4> PhisInBlock;
for (PHINode &Phi : VecEpilogueIterationCountCheck->phis())
PhisInBlock.push_back(&Phi);

for (PHINode *Phi : PhisInBlock) {
Phi->moveBefore(LoopVectorPreHeader->getFirstNonPHI());
Phi->replaceIncomingBlockWith(
VecEpilogueIterationCountCheck->getSinglePredecessor(),
VecEpilogueIterationCountCheck);

// If the phi doesn't have an incoming value from the
// EpilogueIterationCountCheck, we are done. Otherwise remove the incoming
// value and also those from other check blocks. This is needed for
// reduction phis only.
if (none_of(Phi->blocks(), [&](BasicBlock *IncB) {
return EPI.EpilogueIterationCountCheck == IncB;
}))
continue;
Phi->removeIncomingValue(EPI.EpilogueIterationCountCheck);
if (EPI.SCEVSafetyCheck)
Phi->removeIncomingValue(EPI.SCEVSafetyCheck);
if (EPI.MemSafetyCheck)
Phi->removeIncomingValue(EPI.MemSafetyCheck);
Phi->moveBefore(LoopVectorPreHeader->getFirstNonPHI());
}

// Generate a resume induction for the vector epilogue and put it in the
Expand Down Expand Up @@ -10488,16 +10489,39 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VPBasicBlock *Header = VectorLoop->getEntryBasicBlock();
Header->setName("vec.epilog.vector.body");

// Ensure that the start values for any VPReductionPHIRecipes are
// updated before vectorising the epilogue loop.
// Ensure that the start values for any VPWidenIntOrFpInductionRecipe,
// VPWidenPointerInductionRecipe and VPReductionPHIRecipes are updated
// before vectorizing the epilogue loop.
for (VPRecipeBase &R : Header->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;

Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
Value *Resume = MainILV.getReductionResumeValue(
ResumeV = MainILV.getReductionResumeValue(
ReductionPhi->getRecurrenceDescriptor());
assert(Resume && "Must have a resume value.");
VPValue *StartVal = BestEpiPlan.getOrAddExternalDef(Resume);
ReductionPhi->setOperand(0, StartVal);
} else {
// Create induction resume values for both widened pointer and
// integer/fp inductions and update the start value of the induction
// recipes to use the resume value.
PHINode *IndPhi = nullptr;
const InductionDescriptor *ID;
if (auto *Ind = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
IndPhi = cast<PHINode>(Ind->getUnderlyingValue());
ID = &Ind->getInductionDescriptor();
} else {
auto *WidenInd = cast<VPWidenIntOrFpInductionRecipe>(&R);
IndPhi = WidenInd->getPHINode();
ID = &WidenInd->getInductionDescriptor();
}

ResumeV = MainILV.createInductionResumeValue(
IndPhi, *ID, {EPI.MainLoopIterationCountCheck});
}
assert(ResumeV && "Must have a resume value");
VPValue *StartVal = BestEpiPlan.getOrAddExternalDef(ResumeV);
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}

LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/Transforms/Vectorize/VPlan.h
Expand Up @@ -1167,6 +1167,9 @@ class VPHeaderPHIRecipe : public VPRecipeBase, public VPValue {
return getNumOperands() == 0 ? nullptr : getOperand(0);
}

/// Update the start value of the recipe.
void setStartValue(VPValue *V) { setOperand(0, V); }

/// Returns the incoming value from the loop backedge.
VPValue *getBackedgeValue() {
return getOperand(1);
Expand Down Expand Up @@ -1209,6 +1212,9 @@ class VPWidenPointerInductionRecipe : public VPHeaderPHIRecipe {
/// Returns true if only scalar values will be generated.
bool onlyScalarsGenerated(ElementCount VF);

/// Returns the induction descriptor for the recipe.
const InductionDescriptor &getInductionDescriptor() const { return IndDesc; }

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
Expand Down

0 comments on commit f69ac9a

Please sign in to comment.