Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 17 additions & 17 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4030,7 +4030,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPScalarIVStepsSC:
case VPDef::VPReplicateSC:
case VPDef::VPInstructionSC:
case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
case VPDef::VPVectorEndPointerSC:
case VPDef::VPExpandSCEVSC:
Expand Down Expand Up @@ -8428,6 +8427,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
Plan->getCanonicalIVInfo().HasNUW = false;
}

// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -8491,8 +8491,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
&R) ||
if (isa<VPWidenCanonicalIVRecipe, VPBlendRecipe>(&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
continue;

Expand Down Expand Up @@ -8679,8 +8678,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder, BlockMaskCache, nullptr /*LVer*/);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
Expand Down Expand Up @@ -9430,8 +9427,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
for (VPRecipeBase &R :
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
if (isa<VPCanonicalIVPHIRecipe>(&R))
continue;
EpiWidenedPhis.insert(
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
}
Expand Down Expand Up @@ -9492,8 +9487,9 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
VPPhi *ResumePhi = nullptr;
if (ResumePhiIter == MainScalarPH->phis().end()) {
VPBuilder ScalarPHBuilder(MainScalarPH, MainScalarPH->begin());
Type *Ty = VPTypeAnalysis(MainPlan).inferScalarType(VectorTC);
ResumePhi = ScalarPHBuilder.createScalarPhi(
{VectorTC, MainPlan.getCanonicalIV()->getStartValue()}, {},
{VectorTC, MainPlan.getOrAddLiveIn(Constant::getNullValue(Ty))}, {},
"vec.epilog.resume.val");
} else {
ResumePhi = cast<VPPhi>(&*ResumePhiIter);
Expand Down Expand Up @@ -9523,7 +9519,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(

// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
Comment on lines 9520 to 9521
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is moved below.

Comment on lines 9520 to 9521
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment is moved below.

VPCanonicalIVPHIRecipe *IV = Plan.getCanonicalIV();

// When vectorizing the epilogue loop, the canonical induction start
// value needs to be changed from zero to the value after the main
// vector loop. Find the resume value created during execution of the main
Expand Down Expand Up @@ -9552,6 +9548,7 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
EPI.VectorTripCount = EPResumeVal->getOperand(0);
}
VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
VPValue *IV = VectorLoop->getCanonicalIV();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This replaces an if (auto *IV = dyn_cast...

assert(all_of(IV->users(),
[](const VPUser *U) {
return isa<VPScalarIVStepsRecipe>(U) ||
Expand All @@ -9562,11 +9559,14 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
IV->setOperand(0, VPV);
VPBuilder Builder(Header, Header->getFirstNonPhi());
VPInstruction *Add = Builder.createNaryOp(Instruction::Add, {IV, VPV});
IV->replaceAllUsesWith(Add);
Add->setOperand(0, IV);

DenseMap<Value *, Value *> ToFrozen;
SmallVector<Instruction *> InstsToMove;
for (VPRecipeBase &R : drop_begin(Header->phis())) {
for (VPRecipeBase &R : Header->phis()) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
Expand Down Expand Up @@ -9596,12 +9596,12 @@ static SmallVector<Instruction *> preparePlanForEpilogueVectorLoop(
ToFrozen[StartV] = cast<PHINode>(ResumeV)->getIncomingValueForBlock(
EPI.MainLoopIterationCountCheck);

// VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions requires
// an adjustment to the resume value. The resume value is adjusted to
// the sentinel value when the final value from the main vector loop
// equals the start value. This ensures correctness when the start value
// might not be less than the minimum value of a monotonically
// increasing induction variable.
// VPReductionPHIRecipe for FindFirstIV/FindLastIV reductions
// requires an adjustment to the resume value. The resume value is
// adjusted to the sentinel value when the final value from the main
// vector loop equals the start value. This ensures correctness when
// the start value might not be less than the minimum value of a
// monotonically increasing induction variable.
Comment on lines +9599 to +9604
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Independent NFC?

Comment on lines +9599 to +9604
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can be done independently?

BasicBlock *ResumeBB = cast<Instruction>(ResumeV)->getParent();
IRBuilder<> Builder(ResumeBB, ResumeBB->getFirstNonPHIIt());
Value *Cmp = Builder.CreateICmpEQ(ResumeV, ToFrozen[StartV]);
Expand Down
70 changes: 57 additions & 13 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -768,10 +768,17 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {

VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
getName(), isReplicator());
auto *NewRegion =
getPlan()->createVPRegionBlock(NewEntry, NewExiting, getName());
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);

if (CanIVInfo.CanIV) {
NewRegion->CanIVInfo.CanIV = new VPRegionValue();
NewRegion->CanIVInfo.HasNUW = CanIVInfo.HasNUW;
NewRegion->CanIVInfo.DL = CanIVInfo.DL;
Comment on lines +777 to +779
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggests clone() of CanIVInfo?

}

return NewRegion;
}

Expand Down Expand Up @@ -856,6 +863,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
auto NewIndent = Indent + " ";
if (auto *CanIV = getCanonicalIV()) {
O << '\n';
CanIV->print(O, SlotTracker);
O << '\n';
}
for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
O << '\n';
BlockBase->print(O, NewIndent, SlotTracker);
Expand All @@ -868,18 +880,37 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,

void VPRegionBlock::dissolveToCFGLoop() {
auto *Header = cast<VPBasicBlock>(getEntry());
if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
assert(this == getPlan()->getVectorLoopRegion() &&
"Canonical IV must be in the entry of the top-level loop region");
auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
{CanIV->getStartValue(), CanIV->getBackedgeValue()},
CanIV->getDebugLoc(), "index");
auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
VPValue *CanIV = getCanonicalIV();
if (CanIV && CanIV->getNumUsers() > 0) {
auto *ExitingTerm = ExitingLatch->getTerminator();
VPInstruction *CanIVInc = nullptr;
// Check if there's a canonical IV increment via an existing terminator.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps CanIVInfo could cache CanIVInc?
Should CanIVInc also be a VPRegionValue?

if (match(ExitingTerm,
m_BranchOnCount(m_VPInstruction(CanIVInc), m_VPValue()))) {
assert(match(CanIVInc,
m_Add(m_CombineOr(m_Specific(CanIV),
m_Add(m_Specific(CanIV), m_LiveIn())),
m_VPValue())) &&
"invalid existing IV increment");
}
VPlan &Plan = *getPlan();
if (!CanIVInc) {
CanIVInc = VPBuilder(ExitingTerm)
.createOverflowingOp(
Instruction::Add, {CanIV, &Plan.getVFxUF()},
{CanIVInfo.HasNUW, false}, CanIVInfo.DL, "index.next");
}
Type *CanIVTy = VPTypeAnalysis(Plan).inferScalarType(CanIV);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps CanIVInfo could cache CanIVTy?

auto *ScalarR =
VPBuilder(Header, Header->begin())
.createScalarPhi(
{Plan.getOrAddLiveIn(ConstantInt::get(CanIVTy, 0)), CanIVInc},
CanIVInfo.DL, "index");
CanIV->replaceAllUsesWith(ScalarR);
CanIV->eraseFromParent();
}

VPBlockBase *Preheader = getSinglePredecessor();
auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
VPBlockBase *Middle = getSingleSuccessor();
VPBlockUtils::disconnectBlocks(Preheader, this);
VPBlockUtils::disconnectBlocks(this, Middle);
Expand Down Expand Up @@ -916,7 +947,10 @@ VPlan::~VPlan() {
for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
R.setOperand(I, &DummyValue);
}
} else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
CanIV->replaceAllUsesWith(&DummyValue);
}

delete VPB;
}
for (VPValue *VPV : getLiveIns())
Expand Down Expand Up @@ -1224,6 +1258,11 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.

if (auto *LoopRegion = getVectorLoopRegion()) {
Old2NewVPValues[LoopRegion->getCanonicalIV()] =
NewPlan->getVectorLoopRegion()->getCanonicalIV();
}

remapOperands(Entry, NewEntry, Old2NewVPValues);

// Initialize remaining fields of cloned VPlan.
Expand Down Expand Up @@ -1404,6 +1443,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
/// Returns true if there is a vector loop region and \p VPV is defined in a
/// loop region.
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
if (isa<VPRegionValue>(VPV))
return true;
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion());
Expand Down Expand Up @@ -1513,9 +1554,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {

ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
for (const VPBasicBlock *VPBB :
VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
assignNames(VPBB);
for (const VPBlockBase *VPB : RPOT) {
if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
assignNames(VPBB);
} else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better be consistent with {}

assignName(CanIV);
}
}

void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
Expand Down
Loading
Loading