Skip to content

Commit

Permalink
Recommit "[LV] Remove unneeded createHeaderBranch.(NFCI)"
Browse files Browse the repository at this point in the history
This reverts the revert commit 2760cdc.

This version pulls in the code to create the vector loop object in VPlan
from D121624.

This is needed because otherwise existing LoopInfo verification will
fail, as a loop block doesn't have in-loop successors now that we
do not replace the branch.

Now that we do not add new loops during skeleton construction, there's
also no need to verify LI there.
  • Loading branch information
fhahn committed Mar 31, 2022
1 parent 19246b0 commit 8378a71
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 70 deletions.
66 changes: 9 additions & 57 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Expand Up @@ -569,11 +569,6 @@ class InnerLoopVectorizer {
Value *CountRoundDown, Value *EndValue,
BasicBlock *MiddleBlock, BasicBlock *VectorHeader);

/// Introduce a conditional branch (on true, condition to be set later) at the
/// end of the header=latch connecting it to itself (across the backedge) and
/// to the exit block of \p L.
void createHeaderBranch(Loop *L);

/// Handle all cross-iteration phis in the header.
void fixCrossIterationPHIs(VPTransformState &State);

Expand Down Expand Up @@ -630,9 +625,8 @@ class InnerLoopVectorizer {
BasicBlock *emitMemRuntimeChecks(BasicBlock *Bypass);

/// Emit basic blocks (prefixed with \p Prefix) for the iteration check,
/// vector loop preheader, middle block and scalar preheader. Also
/// allocate a loop object for the new vector loop and return it.
Loop *createVectorLoopSkeleton(StringRef Prefix);
/// vector loop preheader, middle block and scalar preheader.
void createVectorLoopSkeleton(StringRef Prefix);

/// Create new phi nodes for the induction variables to resume iteration count
/// in the scalar epilogue, from where the vectorized loop left off.
Expand Down Expand Up @@ -2833,23 +2827,6 @@ void InnerLoopVectorizer::scalarizeInstruction(Instruction *Instr,
PredicatedInstructions.push_back(Cloned);
}

void InnerLoopVectorizer::createHeaderBranch(Loop *L) {
BasicBlock *Header = L->getHeader();
assert(!L->getLoopLatch() && "loop should not have a latch at this point");

IRBuilder<> B(Header->getTerminator());
Instruction *OldInst =
getDebugLocFromInstOrOperands(Legal->getPrimaryInduction());
setDebugLocFromInst(OldInst, &B);

// Connect the header to the exit and header blocks and replace the old
// terminator.
B.CreateCondBr(B.getTrue(), L->getUniqueExitBlock(), Header);

// Now we have two terminators. Remove the old one from the block.
Header->getTerminator()->eraseFromParent();
}

Value *InnerLoopVectorizer::getOrCreateTripCount(BasicBlock *InsertBlock) {
if (TripCount)
return TripCount;
Expand Down Expand Up @@ -3092,7 +3069,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {
return MemCheckBlock;
}

Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
void InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
LoopScalarBody = OrigLoop->getHeader();
LoopVectorPreHeader = OrigLoop->getLoopPreheader();
assert(LoopVectorPreHeader && "Invalid loop structure");
Expand Down Expand Up @@ -3124,33 +3101,15 @@ Loop *InnerLoopVectorizer::createVectorLoopSkeleton(StringRef Prefix) {
BrInst->setDebugLoc(ScalarLatchTerm->getDebugLoc());
ReplaceInstWithInst(LoopMiddleBlock->getTerminator(), BrInst);

// We intentionally don't let SplitBlock to update LoopInfo since
// LoopVectorBody should belong to another loop than LoopVectorPreHeader.
// LoopVectorBody is explicitly added to the correct place few lines later.
BasicBlock *LoopVectorBody =
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
nullptr, nullptr, Twine(Prefix) + "vector.body");
SplitBlock(LoopVectorPreHeader, LoopVectorPreHeader->getTerminator(), DT,
nullptr, nullptr, Twine(Prefix) + "vector.body");

// Update dominator for loop exit.
if (!Cost->requiresScalarEpilogue(VF))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
// dominator of the exit blocks.
DT->changeImmediateDominator(LoopExitBlock, LoopMiddleBlock);

// Create and register the new vector loop.
Loop *Lp = LI->AllocateLoop();
Loop *ParentLoop = OrigLoop->getParentLoop();

// Insert the new loop into the loop nest and register the new basic blocks
// before calling any utilities such as SCEV that require valid LoopInfo.
if (ParentLoop) {
ParentLoop->addChildLoop(Lp);
} else {
LI->addTopLevelLoop(Lp);
}
Lp->addBasicBlockToLoop(LoopVectorBody, *LI);
return Lp;
}

void InnerLoopVectorizer::createInductionResumeValues(
Expand Down Expand Up @@ -3262,7 +3221,6 @@ BasicBlock *InnerLoopVectorizer::completeLoopSkeleton(MDNode *OrigLoopID) {

#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
LI->verify(*DT);
#endif

return LoopVectorPreHeader;
Expand Down Expand Up @@ -3316,7 +3274,7 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {

// Create an empty vector loop, and prepare basic blocks for the runtime
// checks.
Loop *Lp = createVectorLoopSkeleton("");
createVectorLoopSkeleton("");

// Now, compare the new count to zero. If it is zero skip the vector loop and
// jump to the scalar loop. This check also covers the case where the
Expand All @@ -3334,8 +3292,6 @@ InnerLoopVectorizer::createVectorizedLoopSkeleton() {
// faster.
emitMemRuntimeChecks(LoopScalarPreHeader);

createHeaderBranch(Lp);

// Emit phis for the new starting index of the scalar loop.
createInductionResumeValues();

Expand Down Expand Up @@ -7622,7 +7578,7 @@ void LoopVectorizationPlanner::executePlan(ElementCount BestVF, unsigned BestUF,
// 1. Create a new empty loop. Unlink the old loop and connect the new one.
VPTransformState State{BestVF, BestUF, LI, DT, ILV.Builder, &ILV, &BestVPlan};
Value *CanonicalIVStartValue;
std::tie(State.CFG.PrevBB, CanonicalIVStartValue) =
std::tie(State.CFG.VectorPreHeader, CanonicalIVStartValue) =
ILV.createVectorizedLoopSkeleton();
ILV.collectPoisonGeneratingRecipes(State);

Expand Down Expand Up @@ -7739,7 +7695,7 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) { return V; }
std::pair<BasicBlock *, Value *>
EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
MDNode *OrigLoopID = OrigLoop->getLoopID();
Loop *Lp = createVectorLoopSkeleton("");
createVectorLoopSkeleton("");

// Generate the code to check the minimum iteration count of the vector
// epilogue (see below).
Expand Down Expand Up @@ -7768,7 +7724,6 @@ EpilogueVectorizerMainLoop::createEpilogueVectorizedLoopSkeleton() {
// Generate the induction variable.
Value *CountRoundDown = getOrCreateVectorTripCount(LoopVectorPreHeader);
EPI.VectorTripCount = CountRoundDown;
createHeaderBranch(Lp);

// Skip induction resume value creation here because they will be created in
// the second pass. If we created them here, they wouldn't be used anyway,
Expand Down Expand Up @@ -7860,7 +7815,7 @@ EpilogueVectorizerMainLoop::emitMinimumIterationCountCheck(BasicBlock *Bypass,
std::pair<BasicBlock *, Value *>
EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
MDNode *OrigLoopID = OrigLoop->getLoopID();
Loop *Lp = createVectorLoopSkeleton("vec.epilog.");
createVectorLoopSkeleton("vec.epilog.");

// Now, compare the remaining count and if there aren't enough iterations to
// execute the vectorized epilogue skip to the scalar part.
Expand Down Expand Up @@ -7941,9 +7896,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton() {
EPResumeVal->addIncoming(ConstantInt::get(IdxTy, 0),
EPI.MainLoopIterationCountCheck);

// Generate the induction variable.
createHeaderBranch(Lp);

// Generate induction resume values. These variables save the new starting
// indexes for the scalar loop. They are used to test if there are any tail
// iterations left once the vector loop has completed.
Expand Down
38 changes: 25 additions & 13 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Expand Up @@ -318,11 +318,16 @@ void VPBasicBlock::execute(VPTransformState *State) {
// Temporarily terminate with unreachable until CFG is rewired.
UnreachableInst *Terminator = State->Builder.CreateUnreachable();
State->Builder.SetInsertPoint(Terminator);
// Register NewBB in its loop. In innermost loops its the same for all BB's.
State->CurrentVectorLoop->addBasicBlockToLoop(NewBB, *State->LI);
State->CFG.PrevBB = NewBB;
}

if (State->CurrentVectorLoop &&
!State->CurrentVectorLoop->contains(State->CFG.PrevBB)) {
// Register NewBB in its loop. In innermost loops its the same for all BB's.
State->CurrentVectorLoop->addBasicBlockToLoop(State->CFG.PrevBB,
*State->LI);
}

// 2. Fill the IR basic block with IR instructions.
LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
<< " in BB:" << NewBB->getName() << '\n');
Expand Down Expand Up @@ -447,6 +452,17 @@ void VPRegionBlock::execute(VPTransformState *State) {
ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);

if (!isReplicator()) {
// Create and register the new vector loop.
State->CurrentVectorLoop = State->LI->AllocateLoop();
Loop *ParentLoop = State->LI->getLoopFor(State->CFG.VectorPreHeader);

// Insert the new loop into the loop nest and register the new basic blocks
// before calling any utilities such as SCEV that require valid LoopInfo.
if (ParentLoop)
ParentLoop->addChildLoop(State->CurrentVectorLoop);
else
State->LI->addTopLevelLoop(State->CurrentVectorLoop);

// Visit the VPBlocks connected to "this", starting from it.
for (VPBlockBase *Block : RPOT) {
if (EnableVPlanNativePath) {
Expand Down Expand Up @@ -857,7 +873,7 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,

// Check if the backedge taken count is needed, and if so build it.
if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
IRBuilder<> Builder(State.CFG.VectorPreHeader->getTerminator());
auto *TCMO = Builder.CreateSub(TripCountV,
ConstantInt::get(TripCountV->getType(), 1),
"trip.count.minus.1");
Expand Down Expand Up @@ -898,17 +914,16 @@ void VPlan::prepareToExecute(Value *TripCountV, Value *VectorTripCountV,
/// LoopVectorBody basic-block was created for this. Introduce additional
/// basic-blocks as needed, and fill them all.
void VPlan::execute(VPTransformState *State) {
// 0. Set the reverse mapping from VPValues to Values for code generation.
// Set the reverse mapping from VPValues to Values for code generation.
for (auto &Entry : Value2VPValue)
State->VPValue2Value[Entry.second] = Entry.first;

BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
State->CFG.VectorPreHeader = VectorPreHeaderBB;
BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
assert(VectorHeaderBB && "Loop preheader does not have a single successor.");

// Initialize CFG state.
State->CFG.PrevVPBB = nullptr;
BasicBlock *VectorHeaderBB = State->CFG.VectorPreHeader->getSingleSuccessor();
State->CFG.PrevBB = VectorHeaderBB;
State->CFG.ExitBB = VectorHeaderBB->getSingleSuccessor();
State->CurrentVectorLoop = State->LI->getLoopFor(VectorHeaderBB);
State->CFG.ExitBB = State->CurrentVectorLoop->getExitBlock();

// Remove the edge between Header and Latch to allow other connections.
// Temporarily terminate with unreachable until CFG is rewired.
Expand All @@ -920,9 +935,6 @@ void VPlan::execute(VPTransformState *State) {
State->Builder.SetInsertPoint(Terminator);

// Generate code in loop body.
State->CFG.PrevVPBB = nullptr;
State->CFG.PrevBB = VectorHeaderBB;

for (VPBlockBase *Block : depth_first(Entry))
Block->execute(State);

Expand Down

0 comments on commit 8378a71

Please sign in to comment.