-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[VPlan] Make canonical IV part of the region #156262
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -4030,7 +4030,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF, | |
case VPDef::VPScalarIVStepsSC: | ||
case VPDef::VPReplicateSC: | ||
case VPDef::VPInstructionSC: | ||
case VPDef::VPCanonicalIVPHISC: | ||
case VPDef::VPVectorPointerSC: | ||
case VPDef::VPVectorEndPointerSC: | ||
case VPDef::VPExpandSCEVSC: | ||
|
@@ -8428,6 +8427,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( | |
m_Specific(Plan->getCanonicalIV()), m_VPValue())) && | ||
"Did not find the canonical IV increment"); | ||
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags(); | ||
Plan->getCanonicalIVInfo().HasNUW = false; | ||
} | ||
|
||
// --------------------------------------------------------------------------- | ||
|
@@ -8491,8 +8491,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes( | |
// latter are added above for masking. | ||
// FIXME: Migrate code relying on the underlying instruction from VPlan0 | ||
// to construct recipes below to not use the underlying instruction. | ||
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>( | ||
&R) || | ||
if (isa<VPWidenCanonicalIVRecipe, VPBlendRecipe>(&R) || | ||
(isa<VPInstruction>(&R) && !UnderlyingValue)) | ||
continue; | ||
|
||
|
@@ -8679,8 +8678,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) { | |
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE, | ||
Builder, BlockMaskCache, nullptr /*LVer*/); | ||
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) { | ||
if (isa<VPCanonicalIVPHIRecipe>(&R)) | ||
continue; | ||
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R); | ||
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR); | ||
} | ||
|
@@ -9430,8 +9427,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { | |
SmallPtrSet<PHINode *, 2> EpiWidenedPhis; | ||
for (VPRecipeBase &R : | ||
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) { | ||
if (isa<VPCanonicalIVPHIRecipe>(&R)) | ||
continue; | ||
EpiWidenedPhis.insert( | ||
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue())); | ||
} | ||
|
@@ -9492,8 +9487,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) { | |
VPPhi *ResumePhi = nullptr; | ||
if (ResumePhiIter == MainScalarPH->phis().end()) { | ||
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin()); | ||
Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC); | ||
ResumePhi = ScalarPHBuilder.createScalarPhi( | ||
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {}, | ||
{VectorTC, MainPlan.getOrAddLiveIn(Constant::getNullValue(Ty))}, {}, | ||
"vec.epilog.resume.val"); | ||
} else { | ||
ResumePhi = cast<VPPhi>(&*ResumePhiIter); | ||
|
@@ -9523,7 +9519,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( | |
|
||
// Ensure that the start values for all header phi recipes are updated before | ||
// vectorizing the epilogue loop. | ||
Comment on lines
9520
to
9521
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This comment is moved below. |
||
VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV(); | ||
|
||
// When vectorizing the epilogue loop, the canonical induction start | ||
// value needs to be changed from zero to the value after the main | ||
// vector loop. Find the resume value created during execution of the main | ||
|
@@ -9552,6 +9548,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( | |
EPI.VectorTripCount = EPResumeVal->getOperand(0); | ||
} | ||
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal); | ||
VPValue *IV = VectorLoop->getCanonicalIV(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This replaces an |
||
assert(all_of(IV->users(), | ||
[](const VPUser *U) { | ||
return isa<VPScalarIVStepsRecipe>(U) || | ||
|
@@ -9562,11 +9559,14 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( | |
}) && | ||
"the canonical IV should only be used by its increment or " | ||
"ScalarIVSteps when resetting the start value"); | ||
IV->setOperand(0, VPV); | ||
VPBuilder Builder(Header, Header->getFirstNonPhi()); | ||
VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV}); | ||
IV->replaceAllUsesWith(Add); | ||
Add->setOperand(0, IV); | ||
|
||
DenseMap<Value *, Value *> ToFrozen; | ||
SmallVector<Instruction *> InstsToMove; | ||
for (VPRecipeBase &R : drop_begin(Header->phis())) { | ||
for (VPRecipeBase &R : Header->phis()) { | ||
Value *ResumeV = nullptr; | ||
// TODO: Move setting of resume values to prepareToExecute. | ||
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) { | ||
|
@@ -9596,12 +9596,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop( | |
ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock( | ||
EPI.MainLoopIterationCountCheck); | ||
|
||
// VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires | ||
// an adjustment to the resume value. The resume value is adjusted to | ||
// the sentinel value when the final value from the main vector loop | ||
// equals the start value. This ensures correctness when the start value | ||
// might not be less than the minimum value of a monotonically | ||
// increasing induction variable. | ||
// VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions | ||
// requires an adjustment to the resume value. The resume value is | ||
// adjusted to the sentinel value when the final value from the main | ||
// vector loop equals the start value. This ensures correctness when | ||
// the start value might not be less than the minimum value of a | ||
// monotonically increasing induction variable. | ||
Comment on lines
+9599
to
+9604
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Independent NFC?
Comment on lines
+9599
to
+9604
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can be done independently? |
||
BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent(); | ||
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt()); | ||
Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -768,10 +768,17 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) { | |
|
||
VPRegionBlock *VPRegionBlock::clone() { | ||
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry()); | ||
auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting, | ||
getName(), isReplicator()); | ||
auto *NewRegion = | ||
getPlan()->createVPRegionBlock(NewEntry, NewExiting, getName()); | ||
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry)) | ||
Block->setParent(NewRegion); | ||
|
||
if (CanIVInfo.CanIV) { | ||
NewRegion->CanIVInfo.CanIV = new VPRegionValue(); | ||
NewRegion->CanIVInfo.HasNUW = CanIVInfo.HasNUW; | ||
NewRegion->CanIVInfo.DL = CanIVInfo.DL; | ||
Comment on lines
+777
to
+779
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Suggests clone() of CanIVInfo? |
||
} | ||
|
||
return NewRegion; | ||
} | ||
|
||
|
@@ -856,6 +863,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, | |
VPSlotTracker &SlotTracker) const { | ||
O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {"; | ||
auto NewIndent = Indent + " "; | ||
if (auto *CanIV = getCanonicalIV()) { | ||
O << '\n'; | ||
CanIV->print(O, SlotTracker); | ||
O << '\n'; | ||
} | ||
for (auto *BlockBase : vp_depth_first_shallow(Entry)) { | ||
O << '\n'; | ||
BlockBase->print(O, NewIndent, SlotTracker); | ||
|
@@ -868,18 +880,37 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, | |
|
||
void VPRegionBlock::dissolveToCFGLoop() { | ||
auto *Header = cast<VPBasicBlock>(getEntry()); | ||
if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) { | ||
assert(this == getPlan()->getVectorLoopRegion() && | ||
"Canonical IV must be in the entry of the top-level loop region"); | ||
auto *ScalarR = VPBuilder(CanIV).createScalarPhi( | ||
{CanIV->getStartValue(), CanIV->getBackedgeValue()}, | ||
CanIV->getDebugLoc(), "index"); | ||
auto *ExitingLatch = cast<VPBasicBlock>(getExiting()); | ||
VPValue *CanIV = getCanonicalIV(); | ||
if (CanIV && CanIV->getNumUsers() > 0) { | ||
auto *ExitingTerm = ExitingLatch->getTerminator(); | ||
VPInstruction *CanIVInc = nullptr; | ||
// Check if there's a canonical IV increment via an existing terminator. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps CanIVInfo could cache CanIVInc? |
||
if (match(ExitingTerm, | ||
m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) { | ||
assert(match(CanIVInc, | ||
m_Add(m_CombineOr(m_Specific(CanIV), | ||
m_Add(m_Specific(CanIV), m_LiveIn())), | ||
m_VPValue())) && | ||
"invalid existing IV increment"); | ||
} | ||
VPlan &Plan = *getPlan(); | ||
if (!CanIVInc) { | ||
CanIVInc = VPBuilder(ExitingTerm) | ||
.createOverflowingOp( | ||
Instruction::Add, {CanIV, &Plan.getVFxUF()}, | ||
{CanIVInfo.HasNUW, false}, CanIVInfo.DL, "index.next"); | ||
} | ||
Type *CanIVTy = VPTypeAnalysis(Plan).inferScalarType(CanIV); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps CanIVInfo could cache CanIVTy? |
||
auto *ScalarR = | ||
VPBuilder(Header, Header->begin()) | ||
.createScalarPhi( | ||
{Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 0)), CanIVInc}, | ||
CanIVInfo.DL, "index"); | ||
CanIV->replaceAllUsesWith(ScalarR); | ||
CanIV->eraseFromParent(); | ||
} | ||
|
||
VPBlockBase *Preheader = getSinglePredecessor(); | ||
auto *ExitingLatch = cast<VPBasicBlock>(getExiting()); | ||
VPBlockBase *Middle = getSingleSuccessor(); | ||
VPBlockUtils::disconnectBlocks(Preheader, this); | ||
VPBlockUtils::disconnectBlocks(this, Middle); | ||
|
@@ -916,7 +947,10 @@ VPlan::~VPlan() { | |
for (unsigned I = 0, E = R.getNumOperands(); I != E; I++) | ||
R.setOperand(I, &DummyValue); | ||
} | ||
} else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) { | ||
CanIV->replaceAllUsesWith(&DummyValue); | ||
} | ||
|
||
delete VPB; | ||
} | ||
for (VPValue *VPV : getLiveIns()) | ||
|
@@ -1224,6 +1258,11 @@ VPlan *VPlan::duplicate() { | |
// else NewTripCount will be created and inserted into Old2NewVPValues when | ||
// TripCount is cloned. In any case NewPlan->TripCount is updated below. | ||
|
||
if (auto *LoopRegion = getVectorLoopRegion()) { | ||
Old2NewVPValues[LoopRegion->getCanonicalIV()] = | ||
NewPlan->getVectorLoopRegion()->getCanonicalIV(); | ||
} | ||
|
||
remapOperands(Entry, NewEntry, Old2NewVPValues); | ||
|
||
// Initialize remaining fields of cloned VPlan. | ||
|
@@ -1404,6 +1443,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) { | |
/// Returns true if there is a vector loop region and \p VPV is defined in a | ||
/// loop region. | ||
static bool isDefinedInsideLoopRegions(const VPValue *VPV) { | ||
if (isa<VPRegionValue>(VPV)) | ||
return true; | ||
const VPRecipeBase *DefR = VPV->getDefiningRecipe(); | ||
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() || | ||
DefR->getParent()->getEnclosingLoopRegion()); | ||
|
@@ -1513,9 +1554,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) { | |
|
||
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>> | ||
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); | ||
for (const VPBasicBlock *VPBB : | ||
VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT)) | ||
assignNames(VPBB); | ||
for (const VPBlockBase *VPB : RPOT) { | ||
if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) { | ||
assignNames(VPBB); | ||
} else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better be consistent with {} |
||
assignName(CanIV); | ||
} | ||
} | ||
|
||
void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This comment is moved below.