Skip to content

Commit

Permalink
[NFC] [LoopPeel] Change the way DT is updated for loop exits
Browse files Browse the repository at this point in the history
When peeling a loop, we assume that the latch has a `br` terminator and
that all loop exits are either terminated with an `unreachable` or have
a terminating deoptimize call. So when we peel off the 1st iteration, we
change the IDom of all loop exits to the peeled copy of
`NCD(IDom(Exit), Latch)`. This works now, but if we add logic to support
loops with exits that are followed by a block with an `unreachable` or a
terminating deoptimize call, changing the exit's idom wouldn't be enough
and DT would be broken.

For example, let `Exit1` and `Exit2` are loop exits, and each of them
unconditionally branches to the same `unreachable` terminated block. So
neither of the exits dominates this unreachable block. If we change the
IDoms of the exits to some peeled loop block, we don't update the
dominators of the unreachable block. Currently we just don't get to the
peeling logic, saying that we can't peel such loops.

With this NFC we just insert edges from cloned exiting blocks to their
exits after peeling each iteration (we accumulate the insertion updates
and then after peeling apply the updates to DT).

This patch was a part of D110922.

Patch by Dmitry Makogon!

Differential Revision: https://reviews.llvm.org/D111611
Reviewed By: mkazantsev
  • Loading branch information
xortator committed Oct 18, 2021
1 parent c900b0a commit fa16329
Showing 1 changed file with 34 additions and 56 deletions.
90 changes: 34 additions & 56 deletions llvm/lib/Transforms/Utils/LoopPeel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,11 @@

#include "llvm/Transforms/Utils/LoopPeel.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/DomTreeUpdater.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopIterator.h"
Expand Down Expand Up @@ -555,11 +557,13 @@ static void fixupBranchWeights(BasicBlock *Header, BranchInst *LatchBR,
/// \param LoopBlocks A helper for DFS-traversal of the loop.
/// \param LVMap A value-map that maps instructions from the original loop to
/// instructions in the last peeled-off iteration.
/// \param LoopBlocksIDoms Immediate dominators of the original loop blocks.
static void cloneLoopBlocks(
Loop *L, unsigned IterNumber, BasicBlock *InsertTop, BasicBlock *InsertBot,
SmallVectorImpl<std::pair<BasicBlock *, BasicBlock *>> &ExitEdges,
SmallVectorImpl<BasicBlock *> &NewBlocks, LoopBlocksDFS &LoopBlocks,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DominatorTree *DT,
ValueToValueMapTy &VMap, ValueToValueMapTy &LVMap, DomTreeUpdater &DTU,
const SmallDenseMap<BasicBlock *, BasicBlock *> &LoopBlocksIDoms,
LoopInfo *LI, ArrayRef<MDNode *> LoopLocalNoAliasDeclScopes) {
BasicBlock *Header = L->getHeader();
BasicBlock *Latch = L->getLoopLatch();
Expand All @@ -585,14 +589,13 @@ static void cloneLoopBlocks(
VMap[*BB] = NewBB;

// If dominator tree is available, insert nodes to represent cloned blocks.
if (DT) {
if (Header == *BB)
DT->addNewBlock(NewBB, InsertTop);
else {
DomTreeNode *IDom = DT->getNode(*BB)->getIDom();
// VMap must contain entry for IDom, as the iteration order is RPO.
DT->addNewBlock(NewBB, cast<BasicBlock>(VMap[IDom->getBlock()]));
}
if (Header == *BB)
DTU.applyUpdates({{DominatorTree::Insert, InsertTop, NewBB}});
else {
BasicBlock *IDom = LoopBlocksIDoms.lookup(*BB);
// VMap must contain entry for IDom, as the iteration order is RPO.
DTU.applyUpdates(
{{DominatorTree::Insert, cast<BasicBlock>(VMap[IDom]), NewBB}});
}
}

Expand Down Expand Up @@ -629,8 +632,8 @@ static void cloneLoopBlocks(
LatchBR->setSuccessor(idx, InsertBot);
break;
}
if (DT)
DT->changeImmediateDominator(InsertBot, NewLatch);
DTU.applyUpdates({{DominatorTree::Insert, NewLatch, InsertBot},
{DominatorTree::Delete, InsertTop, InsertBot}});

// The new copy of the loop body starts with a bunch of PHI nodes
// that pick an incoming value from either the preheader, or the previous
Expand Down Expand Up @@ -732,36 +735,8 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SmallVector<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdges;
L->getExitEdges(ExitEdges);

DenseMap<BasicBlock *, BasicBlock *> ExitIDom;
if (DT) {
// We'd like to determine the idom of exit block after peeling one
// iteration.
// Let Exit is exit block.
// Let ExitingSet - is a set of predecessors of Exit block. They are exiting
// blocks.
// Let Latch' and ExitingSet' are copies after a peeling.
// We'd like to find an idom'(Exit) - idom of Exit after peeling.
// It is an evident that idom'(Exit) will be the nearest common dominator
// of ExitingSet and ExitingSet'.
// idom(Exit) is a nearest common dominator of ExitingSet.
// idom(Exit)' is a nearest common dominator of ExitingSet'.
// Taking into account that we have a single Latch, Latch' will dominate
// Header and idom(Exit).
// So the idom'(Exit) is nearest common dominator of idom(Exit)' and Latch'.
// All these basic blocks are in the same loop, so what we find is
// (nearest common dominator of idom(Exit) and Latch)'.
// In the loop below we remember nearest common dominator of idom(Exit) and
// Latch to update idom of Exit later.
assert(L->hasDedicatedExits() && "No dedicated exits?");
for (auto Edge : ExitEdges) {
if (ExitIDom.count(Edge.second))
continue;
BasicBlock *BB = DT->findNearestCommonDominator(
DT->getNode(Edge.second)->getIDom()->getBlock(), Latch);
assert(L->contains(BB) && "IDom is not in a loop");
ExitIDom[Edge.second] = BB;
}
}
SmallDenseSet<std::pair<BasicBlock *, BasicBlock *>, 4> ExitEdgesSet(
ExitEdges.begin(), ExitEdges.end());

Function *F = Header->getParent();

Expand Down Expand Up @@ -834,31 +809,31 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SmallVector<MDNode *, 6> LoopLocalNoAliasDeclScopes;
identifyNoAliasScopesToClone(L->getBlocks(), LoopLocalNoAliasDeclScopes);

DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);

// Fill map with the loop blocks IDoms to later update the DT when cloning the
// loop blocks.
SmallDenseMap<BasicBlock *, BasicBlock *> LoopBlocksIDoms;
for (auto *BB : L->blocks())
LoopBlocksIDoms[BB] = DT->getNode(BB)->getIDom()->getBlock();

// For each peeled-off iteration, make a copy of the loop.
for (unsigned Iter = 0; Iter < PeelCount; ++Iter) {
SmallVector<BasicBlock *, 8> NewBlocks;
ValueToValueMapTy VMap;

cloneLoopBlocks(L, Iter, InsertTop, InsertBot, ExitEdges, NewBlocks,
LoopBlocks, VMap, LVMap, DT, LI,
LoopBlocks, VMap, LVMap, DTU, LoopBlocksIDoms, LI,
LoopLocalNoAliasDeclScopes);

// Remap to use values from the current iteration instead of the
// previous one.
remapInstructionsInBlocks(NewBlocks, VMap);

if (DT) {
// Latches of the cloned loops dominate over the loop exit, so idom of the
// latter is the first cloned loop body, as original PreHeader dominates
// the original loop body.
if (Iter == 0)
for (auto Exit : ExitIDom)
DT->changeImmediateDominator(Exit.first,
cast<BasicBlock>(LVMap[Exit.second]));
#ifdef EXPENSIVE_CHECKS
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
#endif
}
// If DT is available, insert edges from cloned exiting blocks to the exits
for (auto Exit : ExitEdgesSet)
DTU.applyUpdates({{DominatorTree::Insert,
cast<BasicBlock>(LVMap[Exit.first]), Exit.second}});

auto *LatchBRCopy = cast<BranchInst>(VMap[LatchBR]);
updateBranchWeights(InsertBot, LatchBRCopy, ExitWeight, FallThroughWeight);
Expand All @@ -867,7 +842,7 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
LatchBRCopy->setMetadata(LLVMContext::MD_loop, nullptr);

InsertTop = InsertBot;
InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), DT, LI);
InsertBot = SplitBlock(InsertBot, InsertBot->getTerminator(), &DTU, LI);
InsertBot->setName(Header->getName() + ".peel.next");

F->getBasicBlockList().splice(InsertTop->getIterator(),
Expand Down Expand Up @@ -902,7 +877,10 @@ bool llvm::peelLoop(Loop *L, unsigned PeelCount, LoopInfo *LI,
SE->forgetTopmostLoop(L);

// Finally DomtTree must be correct.
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
if (DTU.hasDomTree()) {
DTU.flush();
assert(DT->verify(DominatorTree::VerificationLevel::Fast));
}

// FIXME: Incrementally update loop-simplify
simplifyLoop(L, DT, LI, SE, AC, nullptr, PreserveLCSSA);
Expand Down

0 comments on commit fa16329

Please sign in to comment.