-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Make canonical IV part of the region #156262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[VPlan] Make canonical IV part of the region #156262
Conversation
f58f46b
to
bc336d2
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
bc336d2
to
5262ecd
Compare
✅ With the latest revision this PR passed the undef deprecator. |
e3ad483
to
f5ff37c
Compare
Instead of re-setting the start value of the canonical IV when vectorizing the epilogue we can emit an Add VPInstruction to provide canonical IV value, adjusted by the resume value from the main loop. This is in preparation to make the canonical IV a VPValue defined by loop regions. It ensures that the canonical IV always starts at 0.
f5ff37c
to
49676de
Compare
@llvm/pr-subscribers-vectorizers Author: Florian Hahn (fhahn) ChangesThe canonical IV is directly tied to a loop region. To directly ensure there's a single, unique canonical IV, directly define it by the region. Depends on #161589 (include in the PR) Patch is 179.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156262.diff 45 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fa5be21dc2b8a..8e4d70aaf7538 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4030,7 +4030,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPScalarIVStepsSC:
case VPDef::VPReplicateSC:
case VPDef::VPInstructionSC:
- case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
case VPDef::VPVectorEndPointerSC:
case VPDef::VPExpandSCEVSC:
@@ -8428,6 +8427,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
+ Plan->getCanonicalIVInfo().HasNUW = false;
}
// ---------------------------------------------------------------------------
@@ -8491,8 +8491,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
- &R) ||
+ if (isa<VPWidenCanonicalIVRecipe, VPBlendRecipe>(&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
continue;
@@ -8679,8 +8678,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder, BlockMaskCache, nullptr /*LVer*/);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R))
- continue;
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
@@ -9430,8 +9427,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
for (VPRecipeBase &R :
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R))
- continue;
EpiWidenedPhis.insert(
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
}
@@ -9492,8 +9487,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPPhi *ResumePhi = nullptr;
if (ResumePhiIter == MainScalarPH->phis().end()) {
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
+ Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC);
ResumePhi = ScalarPHBuilder.createScalarPhi(
- {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
+ {VectorTC, MainPlan.getOrAddLiveIn(Constant::getNullValue(Ty))}, {},
"vec.epilog.resume.val");
} else {
ResumePhi = cast<VPPhi>(&*ResumePhiIter);
@@ -9523,7 +9519,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
- VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+
// When vectorizing the epilogue loop, the canonical induction start
// value needs to be changed from zero to the value after the main
// vector loop. Find the resume value created during execution of the main
@@ -9552,6 +9548,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
EPI.VectorTripCount = EPResumeVal->getOperand(0);
}
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
+ VPValue *IV = VectorLoop->getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
return isa<VPScalarIVStepsRecipe>(U) ||
@@ -9562,11 +9559,14 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
+ VPBuilder Builder(Header, Header->getFirstNonPhi());
+ VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV});
+ IV->replaceAllUsesWith(Add);
+ Add->setOperand(0, IV);
DenseMap<Value *, Value *> ToFrozen;
SmallVector<Instruction *> InstsToMove;
- for (VPRecipeBase &R : drop_begin(Header->phis())) {
+ for (VPRecipeBase &R : Header->phis()) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
@@ -9596,12 +9596,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);
- // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires
- // an adjustment to the resume value. The resume value is adjusted to
- // the sentinel value when the final value from the main vector loop
- // equals the start value. This ensures correctness when the start value
- // might not be less than the minimum value of a monotonically
- // increasing induction variable.
+ // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions
+ // requires an adjustment to the resume value. The resume value is
+ // adjusted to the sentinel value when the final value from the main
+ // vector loop equals the start value. This ensures correctness when
+ // the start value might not be less than the minimum value of a
+ // monotonically increasing induction variable.
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 02eb6375aac41..a36656ede623e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -768,10 +768,17 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
- auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
- getName(), isReplicator());
+ auto *NewRegion =
+ getPlan()->createVPRegionBlock(NewEntry, NewExiting, getName());
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);
+
+ if (CanIVInfo.CanIV) {
+ NewRegion->CanIVInfo.CanIV = new VPRegionValue();
+ NewRegion->CanIVInfo.HasNUW = CanIVInfo.HasNUW;
+ NewRegion->CanIVInfo.DL = CanIVInfo.DL;
+ }
+
return NewRegion;
}
@@ -856,6 +863,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
auto NewIndent = Indent + " ";
+ if (auto *CanIV = getCanonicalIV()) {
+ O << '\n';
+ CanIV->print(O, SlotTracker);
+ O << '\n';
+ }
for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
O << '\n';
BlockBase->print(O, NewIndent, SlotTracker);
@@ -868,18 +880,37 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
void VPRegionBlock::dissolveToCFGLoop() {
auto *Header = cast<VPBasicBlock>(getEntry());
- if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
- assert(this == getPlan()->getVectorLoopRegion() &&
- "Canonical IV must be in the entry of the top-level loop region");
- auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
- {CanIV->getStartValue(), CanIV->getBackedgeValue()},
- CanIV->getDebugLoc(), "index");
+ auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
+ VPValue *CanIV = getCanonicalIV();
+ if (CanIV && CanIV->getNumUsers() > 0) {
+ auto *ExitingTerm = ExitingLatch->getTerminator();
+ VPInstruction *CanIVInc = nullptr;
+ // Check if there's a canonical IV increment via an existing terminator.
+ if (match(ExitingTerm,
+ m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) {
+ assert(match(CanIVInc,
+ m_Add(m_CombineOr(m_Specific(CanIV),
+ m_Add(m_Specific(CanIV), m_LiveIn())),
+ m_VPValue())) &&
+ "invalid existing IV increment");
+ }
+ VPlan &Plan = *getPlan();
+ if (!CanIVInc) {
+ CanIVInc = VPBuilder(ExitingTerm)
+ .createOverflowingOp(
+ Instruction::Add, {CanIV, &Plan.getVFxUF()},
+ {CanIVInfo.HasNUW, false}, CanIVInfo.DL, "index.next");
+ }
+ Type *CanIVTy = VPTypeAnalysis(Plan).inferScalarType(CanIV);
+ auto *ScalarR =
+ VPBuilder(Header, Header->begin())
+ .createScalarPhi(
+ {Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 0)), CanIVInc},
+ CanIVInfo.DL, "index");
CanIV->replaceAllUsesWith(ScalarR);
- CanIV->eraseFromParent();
}
VPBlockBase *Preheader = getSinglePredecessor();
- auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
VPBlockBase *Middle = getSingleSuccessor();
VPBlockUtils::disconnectBlocks(Preheader, this);
VPBlockUtils::disconnectBlocks(this, Middle);
@@ -916,7 +947,10 @@ VPlan::~VPlan() {
for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
R.setOperand(I, &DummyValue);
}
+ } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
+ CanIV->replaceAllUsesWith(&DummyValue);
}
+
delete VPB;
}
for (VPValue *VPV : getLiveIns())
@@ -1224,6 +1258,11 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
+ if (auto *LoopRegion = getVectorLoopRegion()) {
+ Old2NewVPValues[LoopRegion->getCanonicalIV()] =
+ NewPlan->getVectorLoopRegion()->getCanonicalIV();
+ }
+
remapOperands(Entry, NewEntry, Old2NewVPValues);
// Initialize remaining fields of cloned VPlan.
@@ -1404,6 +1443,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
/// Returns true if there is a vector loop region and \p VPV is defined in a
/// loop region.
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
+ if (isa<VPRegionValue>(VPV))
+ return true;
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion());
@@ -1513,9 +1554,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
- for (const VPBasicBlock *VPBB :
- VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
- assignNames(VPBB);
+ for (const VPBlockBase *VPB : RPOT) {
+ if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
+ assignNames(VPBB);
+ } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
+ assignName(CanIV);
+ }
}
void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c167dd7f65fac..926bb88995348 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -551,7 +551,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
- case VPRecipeBase::VPCanonicalIVPHISC:
case VPRecipeBase::VPActiveLaneMaskPHISC:
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
case VPRecipeBase::VPWidenPHISC:
@@ -1957,12 +1956,6 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
/// the backedge is the second operand.
///
/// Inductions are modeled using the following sub-classes:
-/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
-/// starting at a specified value (zero for the main vector loop, the resume
-/// value for the epilogue vector loop) and stepping by 1. The induction
-/// controls exiting of the vector loop by comparing against the vector trip
-/// count. Produces a single scalar PHI for the induction value per
-/// iteration.
/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
/// floating point inductions with arbitrary start and step values. Produces
/// a vector PHI per-part.
@@ -3435,63 +3428,6 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe {
const SCEV *getSCEV() const { return Expr; }
};
-/// Canonical scalar induction phi of the vector loop. Starting at the specified
-/// start value (either 0 or the resume value when vectorizing the epilogue
-/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
-/// canonical induction variable.
-class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
-public:
- VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
- : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
-
- ~VPCanonicalIVPHIRecipe() override = default;
-
- VPCanonicalIVPHIRecipe *clone() override {
- auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
- R->addOperand(getBackedgeValue());
- return R;
- }
-
- VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
-
- void execute(VPTransformState &State) override {
- llvm_unreachable("cannot execute this recipe, should be replaced by a "
- "scalar phi recipe");
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-
- /// Returns the scalar type of the induction.
- Type *getScalarType() const {
- return getStartValue()->getLiveInIRValue()->getType();
- }
-
- /// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- /// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- /// Return the cost of this VPCanonicalIVPHIRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override {
- // For now, match the behavior of the legacy cost model.
- return 0;
- }
-};
-
/// A recipe for generating the active lane mask for the vector loop that is
/// used to predicate the vector operations.
/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
@@ -3570,14 +3506,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
public VPUnrollPartAccessor<1> {
public:
- VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
+ VPWidenCanonicalIVRecipe(VPValue *CanonicalIV)
: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
~VPWidenCanonicalIVRecipe() override = default;
VPWidenCanonicalIVRecipe *clone() override {
- return new VPWidenCanonicalIVRecipe(
- cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
+ return new VPWidenCanonicalIVRecipe(getOperand(0));
}
VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
@@ -3616,8 +3551,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
- VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
- const Twine &Name = "")
+ VPValue *CanonicalIV, VPValue *Step, const Twine &Name = "")
: VPDerivedIVRecipe(
IndDesc.getKind(),
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
@@ -3963,6 +3897,23 @@ class VPIRBasicBlock : public VPBasicBlock {
BasicBlock *getIRBasicBlock() const { return IRBB; }
};
+/// Track information about the canonical IV value of a region.
+struct VPCanonicalIVInfo {
+ VPRegionValue *CanIV = nullptr;
+ bool HasNUW = true;
+ DebugLoc DL = DebugLoc::getUnknown();
+
+ VPCanonicalIVInfo(VPRegionValue *CanIV, bool HasNUW, DebugLoc DL)
+ : CanIV(CanIV), HasNUW(HasNUW), DL(DL) {}
+
+ VPCanonicalIVInfo() {}
+
+ ~VPCanonicalIVInfo() {
+ if (CanIV)
+ delete CanIV;
+ }
+};
+
/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
/// A VPRegionBlock may indicate that its contents are to be replicated several
@@ -3981,23 +3932,35 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
/// VPRegionBlock.
VPBlockBase *Exiting;
- /// An indicator whether this region is to generate multiple replicated
- /// instances of output IR corresponding to its VPBlockBases.
- bool IsReplicator;
+ /// Canonical IV of the loop region. If CanIV is nullptr, the region is a
+ /// replicating region.
+ VPCanonicalIVInfo CanIVInfo;
/// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
- const std::string &Name = "", bool IsReplicator = false)
+ const std::string &Name = "")
+ : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
+ CanIVInfo() {
+ assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
+ assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
+ Entry->setParent(this);
+ Exiting->setParent(this);
+ }
+
+ VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
+ const VPCanonicalIVInfo &CanIVInfo,
+ const std::string &Name = "")
: VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
- IsReplicator(IsReplicator) {
+ CanIVInfo(CanIVInfo) {
assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
Entry->setParent(this);
Exiting->setParent(this);
}
- VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
+
+ VPRegionBlock(DebugLoc DL, const std::string &Name = "")
: VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
- IsReplicator(IsReplicator) {}
+ CanIVInfo(new VPRegionValue(), true, DL) {}
public:
~VPRegionBlock() override {}
@@ -4039,7 +4002,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
/// An indicator whether this region is to generate multiple replicated
/// instances of output IR corresponding to its VPBlockBases.
- bool isReplicator() const { return IsReplicator; }
+ bool isReplicator() const { return !getCanonicalIV(); }
/// The method which generates the output IR instructions that correspond to
/// this VPRegionBlock, thereby "executing" the VPlan.
@@ -4067,6 +4030,13 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
/// Remove the current region from its VPlan, connecting its predecessor to
/// its entry, and its exiting block to its successor.
void dissolveToCFGLoop();
+
+ /// Return the canonical induction variable of the region, null for
+ /// replicating regions.
+ VPValue *getCanonicalIV() { return CanIVInfo.CanIV; }
+ const VPValue *getCanonicalIV() const { return CanIVInfo.CanIV; }
+
+ VPCanonicalIVInfo &getCanonicalIVInfo() { return CanIVInfo; }
};
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -4378,14 +4348,10 @@ class VPlan {
...
[truncated]
|
@llvm/pr-subscribers-llvm-transforms Author: Florian Hahn (fhahn) ChangesThe canonical IV is directly tied to a loop region. To directly ensure there's a single, unique canonical IV, directly define it by the region. Depends on #161589 (include in the PR) Patch is 179.72 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156262.diff 45 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index fa5be21dc2b8a..8e4d70aaf7538 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4030,7 +4030,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPScalarIVStepsSC:
case VPDef::VPReplicateSC:
case VPDef::VPInstructionSC:
- case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
case VPDef::VPVectorEndPointerSC:
case VPDef::VPExpandSCEVSC:
@@ -8428,6 +8427,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
+ Plan->getCanonicalIVInfo().HasNUW = false;
}
// ---------------------------------------------------------------------------
@@ -8491,8 +8491,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
- &R) ||
+ if (isa<VPWidenCanonicalIVRecipe, VPBlendRecipe>(&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
continue;
@@ -8679,8 +8678,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder, BlockMaskCache, nullptr /*LVer*/);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R))
- continue;
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
@@ -9430,8 +9427,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
for (VPRecipeBase &R :
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R))
- continue;
EpiWidenedPhis.insert(
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
}
@@ -9492,8 +9487,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPPhi *ResumePhi = nullptr;
if (ResumePhiIter == MainScalarPH->phis().end()) {
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
+ Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC);
ResumePhi = ScalarPHBuilder.createScalarPhi(
- {VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
+ {VectorTC, MainPlan.getOrAddLiveIn(Constant::getNullValue(Ty))}, {},
"vec.epilog.resume.val");
} else {
ResumePhi = cast<VPPhi>(&*ResumePhiIter);
@@ -9523,7 +9519,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
- VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();
+
// When vectorizing the epilogue loop, the canonical induction start
// value needs to be changed from zero to the value after the main
// vector loop. Find the resume value created during execution of the main
@@ -9552,6 +9548,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
EPI.VectorTripCount = EPResumeVal->getOperand(0);
}
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
+ VPValue *IV = VectorLoop->getCanonicalIV();
assert(all_of(IV->users(),
[](const VPUser *U) {
return isa<VPScalarIVStepsRecipe>(U) ||
@@ -9562,11 +9559,14 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
+ VPBuilder Builder(Header, Header->getFirstNonPhi());
+ VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV});
+ IV->replaceAllUsesWith(Add);
+ Add->setOperand(0, IV);
DenseMap<Value *, Value *> ToFrozen;
SmallVector<Instruction *> InstsToMove;
- for (VPRecipeBase &R : drop_begin(Header->phis())) {
+ for (VPRecipeBase &R : Header->phis()) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
@@ -9596,12 +9596,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);
- // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires
- // an adjustment to the resume value. The resume value is adjusted to
- // the sentinel value when the final value from the main vector loop
- // equals the start value. This ensures correctness when the start value
- // might not be less than the minimum value of a monotonically
- // increasing induction variable.
+ // VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions
+ // requires an adjustment to the resume value. The resume value is
+ // adjusted to the sentinel value when the final value from the main
+ // vector loop equals the start value. This ensures correctness when
+ // the start value might not be less than the minimum value of a
+ // monotonically increasing induction variable.
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index 02eb6375aac41..a36656ede623e 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -768,10 +768,17 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
- auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
- getName(), isReplicator());
+ auto *NewRegion =
+ getPlan()->createVPRegionBlock(NewEntry, NewExiting, getName());
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);
+
+ if (CanIVInfo.CanIV) {
+ NewRegion->CanIVInfo.CanIV = new VPRegionValue();
+ NewRegion->CanIVInfo.HasNUW = CanIVInfo.HasNUW;
+ NewRegion->CanIVInfo.DL = CanIVInfo.DL;
+ }
+
return NewRegion;
}
@@ -856,6 +863,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
auto NewIndent = Indent + " ";
+ if (auto *CanIV = getCanonicalIV()) {
+ O << '\n';
+ CanIV->print(O, SlotTracker);
+ O << '\n';
+ }
for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
O << '\n';
BlockBase->print(O, NewIndent, SlotTracker);
@@ -868,18 +880,37 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
void VPRegionBlock::dissolveToCFGLoop() {
auto *Header = cast<VPBasicBlock>(getEntry());
- if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
- assert(this == getPlan()->getVectorLoopRegion() &&
- "Canonical IV must be in the entry of the top-level loop region");
- auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
- {CanIV->getStartValue(), CanIV->getBackedgeValue()},
- CanIV->getDebugLoc(), "index");
+ auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
+ VPValue *CanIV = getCanonicalIV();
+ if (CanIV && CanIV->getNumUsers() > 0) {
+ auto *ExitingTerm = ExitingLatch->getTerminator();
+ VPInstruction *CanIVInc = nullptr;
+ // Check if there's a canonical IV increment via an existing terminator.
+ if (match(ExitingTerm,
+ m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) {
+ assert(match(CanIVInc,
+ m_Add(m_CombineOr(m_Specific(CanIV),
+ m_Add(m_Specific(CanIV), m_LiveIn())),
+ m_VPValue())) &&
+ "invalid existing IV increment");
+ }
+ VPlan &Plan = *getPlan();
+ if (!CanIVInc) {
+ CanIVInc = VPBuilder(ExitingTerm)
+ .createOverflowingOp(
+ Instruction::Add, {CanIV, &Plan.getVFxUF()},
+ {CanIVInfo.HasNUW, false}, CanIVInfo.DL, "index.next");
+ }
+ Type *CanIVTy = VPTypeAnalysis(Plan).inferScalarType(CanIV);
+ auto *ScalarR =
+ VPBuilder(Header, Header->begin())
+ .createScalarPhi(
+ {Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 0)), CanIVInc},
+ CanIVInfo.DL, "index");
CanIV->replaceAllUsesWith(ScalarR);
- CanIV->eraseFromParent();
}
VPBlockBase *Preheader = getSinglePredecessor();
- auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
VPBlockBase *Middle = getSingleSuccessor();
VPBlockUtils::disconnectBlocks(Preheader, this);
VPBlockUtils::disconnectBlocks(this, Middle);
@@ -916,7 +947,10 @@ VPlan::~VPlan() {
for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
R.setOperand(I, &DummyValue);
}
+ } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
+ CanIV->replaceAllUsesWith(&DummyValue);
}
+
delete VPB;
}
for (VPValue *VPV : getLiveIns())
@@ -1224,6 +1258,11 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
+ if (auto *LoopRegion = getVectorLoopRegion()) {
+ Old2NewVPValues[LoopRegion->getCanonicalIV()] =
+ NewPlan->getVectorLoopRegion()->getCanonicalIV();
+ }
+
remapOperands(Entry, NewEntry, Old2NewVPValues);
// Initialize remaining fields of cloned VPlan.
@@ -1404,6 +1443,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
/// Returns true if there is a vector loop region and \p VPV is defined in a
/// loop region.
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
+ if (isa<VPRegionValue>(VPV))
+ return true;
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion());
@@ -1513,9 +1554,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
- for (const VPBasicBlock *VPBB :
- VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
- assignNames(VPBB);
+ for (const VPBlockBase *VPB : RPOT) {
+ if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
+ assignNames(VPBB);
+ } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
+ assignName(CanIV);
+ }
}
void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index c167dd7f65fac..926bb88995348 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -551,7 +551,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
- case VPRecipeBase::VPCanonicalIVPHISC:
case VPRecipeBase::VPActiveLaneMaskPHISC:
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
case VPRecipeBase::VPWidenPHISC:
@@ -1957,12 +1956,6 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
/// the backedge is the second operand.
///
/// Inductions are modeled using the following sub-classes:
-/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
-/// starting at a specified value (zero for the main vector loop, the resume
-/// value for the epilogue vector loop) and stepping by 1. The induction
-/// controls exiting of the vector loop by comparing against the vector trip
-/// count. Produces a single scalar PHI for the induction value per
-/// iteration.
/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
/// floating point inductions with arbitrary start and step values. Produces
/// a vector PHI per-part.
@@ -3435,63 +3428,6 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe {
const SCEV *getSCEV() const { return Expr; }
};
-/// Canonical scalar induction phi of the vector loop. Starting at the specified
-/// start value (either 0 or the resume value when vectorizing the epilogue
-/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
-/// canonical induction variable.
-class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
-public:
- VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
- : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
-
- ~VPCanonicalIVPHIRecipe() override = default;
-
- VPCanonicalIVPHIRecipe *clone() override {
- auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
- R->addOperand(getBackedgeValue());
- return R;
- }
-
- VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
-
- void execute(VPTransformState &State) override {
- llvm_unreachable("cannot execute this recipe, should be replaced by a "
- "scalar phi recipe");
- }
-
-#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
- /// Print the recipe.
- void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
-#endif
-
- /// Returns the scalar type of the induction.
- Type *getScalarType() const {
- return getStartValue()->getLiveInIRValue()->getType();
- }
-
- /// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- /// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- /// Return the cost of this VPCanonicalIVPHIRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override {
- // For now, match the behavior of the legacy cost model.
- return 0;
- }
-};
-
/// A recipe for generating the active lane mask for the vector loop that is
/// used to predicate the vector operations.
/// TODO: It would be good to use the existing VPWidenPHIRecipe instead and
@@ -3570,14 +3506,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
public VPUnrollPartAccessor<1> {
public:
- VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
+ VPWidenCanonicalIVRecipe(VPValue *CanonicalIV)
: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
~VPWidenCanonicalIVRecipe() override = default;
VPWidenCanonicalIVRecipe *clone() override {
- return new VPWidenCanonicalIVRecipe(
- cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
+ return new VPWidenCanonicalIVRecipe(getOperand(0));
}
VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
@@ -3616,8 +3551,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
- VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
- const Twine &Name = "")
+ VPValue *CanonicalIV, VPValue *Step, const Twine &Name = "")
: VPDerivedIVRecipe(
IndDesc.getKind(),
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp()),
@@ -3963,6 +3897,23 @@ class VPIRBasicBlock : public VPBasicBlock {
BasicBlock *getIRBasicBlock() const { return IRBB; }
};
+/// Track information about the canonical IV value of a region.
+struct VPCanonicalIVInfo {
+ VPRegionValue *CanIV = nullptr;
+ bool HasNUW = true;
+ DebugLoc DL = DebugLoc::getUnknown();
+
+ VPCanonicalIVInfo(VPRegionValue *CanIV, bool HasNUW, DebugLoc DL)
+ : CanIV(CanIV), HasNUW(HasNUW), DL(DL) {}
+
+ VPCanonicalIVInfo() {}
+
+ ~VPCanonicalIVInfo() {
+ if (CanIV)
+ delete CanIV;
+ }
+};
+
/// VPRegionBlock represents a collection of VPBasicBlocks and VPRegionBlocks
/// which form a Single-Entry-Single-Exiting subgraph of the output IR CFG.
/// A VPRegionBlock may indicate that its contents are to be replicated several
@@ -3981,23 +3932,35 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
/// VPRegionBlock.
VPBlockBase *Exiting;
- /// An indicator whether this region is to generate multiple replicated
- /// instances of output IR corresponding to its VPBlockBases.
- bool IsReplicator;
+ /// Canonical IV of the loop region. If CanIV is nullptr, the region is a
+ /// replicating region.
+ VPCanonicalIVInfo CanIVInfo;
/// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
- const std::string &Name = "", bool IsReplicator = false)
+ const std::string &Name = "")
+ : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
+ CanIVInfo() {
+ assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
+ assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
+ Entry->setParent(this);
+ Exiting->setParent(this);
+ }
+
+ VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
+ const VPCanonicalIVInfo &CanIVInfo,
+ const std::string &Name = "")
: VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting),
- IsReplicator(IsReplicator) {
+ CanIVInfo(CanIVInfo) {
assert(Entry->getPredecessors().empty() && "Entry block has predecessors.");
assert(Exiting->getSuccessors().empty() && "Exit block has successors.");
Entry->setParent(this);
Exiting->setParent(this);
}
- VPRegionBlock(const std::string &Name = "", bool IsReplicator = false)
+
+ VPRegionBlock(DebugLoc DL, const std::string &Name = "")
: VPBlockBase(VPRegionBlockSC, Name), Entry(nullptr), Exiting(nullptr),
- IsReplicator(IsReplicator) {}
+ CanIVInfo(new VPRegionValue(), true, DL) {}
public:
~VPRegionBlock() override {}
@@ -4039,7 +4002,7 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
/// An indicator whether this region is to generate multiple replicated
/// instances of output IR corresponding to its VPBlockBases.
- bool isReplicator() const { return IsReplicator; }
+ bool isReplicator() const { return !getCanonicalIV(); }
/// The method which generates the output IR instructions that correspond to
/// this VPRegionBlock, thereby "executing" the VPlan.
@@ -4067,6 +4030,13 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
/// Remove the current region from its VPlan, connecting its predecessor to
/// its entry, and its exiting block to its successor.
void dissolveToCFGLoop();
+
+ /// Return the canonical induction variable of the region, null for
+ /// replicating regions.
+ VPValue *getCanonicalIV() { return CanIVInfo.CanIV; }
+ const VPValue *getCanonicalIV() const { return CanIVInfo.CanIV; }
+
+ VPCanonicalIVInfo &getCanonicalIVInfo() { return CanIVInfo; }
};
/// VPlan models a candidate for vectorization, encoding various decisions take
@@ -4378,14 +4348,10 @@ class VPlan {
...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks overall a good approach to me! Adding initial set of various comments, to be continued.
// Ensure that the start values for all header phi recipes are updated before | ||
// vectorizing the epilogue loop. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment is moved below.
// VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions | ||
// requires an adjustment to the resume value. The resume value is | ||
// adjusted to the sentinel value when the final value from the main | ||
// vector loop equals the start value. This ensures correctness when | ||
// the start value might not be less than the minimum value of a | ||
// monotonically increasing induction variable. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Independent NFC?
NewRegion->CanIVInfo.CanIV = new VPRegionValue(); | ||
NewRegion->CanIVInfo.HasNUW = CanIVInfo.HasNUW; | ||
NewRegion->CanIVInfo.DL = CanIVInfo.DL; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Suggests clone() of CanIVInfo?
// When vectorizing the epilogue loop, the canonical induction start value | ||
// needs to be changed from zero to the value after the main vector loop. Find | ||
// the resume value created during execution of the main VPlan. | ||
// FIXME: Improve modeling for canonical IV start values in the epilogue loop. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// FIXME: Improve modeling for canonical IV start values in the epilogue loop. | |
// It must be the first phi in the loop preheader. | |
// FIXME: Improve modeling for canonical IV start values in the epilogue loop. |
worth retaining this helpful comment if it still holds?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is this movement needed?
return TTICapture.getRegUsageForType(VectorType::get(Ty, VF)); | ||
}; | ||
|
||
if (auto *CanIV = LoopRegion->getCanonicalIV()) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
When/Can CanIV be null / LoopRegion be a replicating one?
static void createLoopRegion(VPlan &Plan, VPBlockBase *HeaderVPB) { | ||
auto *PreheaderVPBB = HeaderVPB->getPredecessors()[0]; | ||
auto *LatchVPBB = HeaderVPB->getPredecessors()[1]; | ||
auto *LatchVPBB = cast<VPBasicBlock>(HeaderVPB->getPredecessors()[1]); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why/Is this needed?
auto *CanonicalIVIncrement = Builder.createOverflowingOp( | ||
auto CanonicalIVIncrement = Builder.createOverflowingOp( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dropping * needed?
/// Returns true if this VPValue is a live-in, i.e. defined outside the VPlan. | ||
bool isLiveIn() const { return !hasDefiningRecipe(); } | ||
bool isLiveIn() const { | ||
return !hasDefiningRecipe() && SubclassID != VPRegionValueSC; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps more consistent to have both check SubclassID
}; | ||
|
||
/// VPValues defined by a VPRegionBlock, like the canonical IV. | ||
class VPRegionValue : public VPValue { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
struct?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Overall approach looks good to me! Adding initial set of various comments, to be continued.
The canonical IV is directly tied to a loop region. To directly ensure there's a single, unique canonical IV, directly define it by the region.
Depends on #161589 (include in the PR)