Skip to content

Commit

Permalink
[LoopUnroll] Add support for loops with exiting headers and uncond la…
Browse files Browse the repository at this point in the history
…tches.

This patch generalizes the UnrollLoop utility to support loops that exit
from the header instead of the latch. Usually, LoopRotate would take care
of must of those cases, but in some cases (e.g. -Oz), LoopRotate does
not kick in.

Codesize impact looks relatively neutral on ARM64 with -Oz + LTO.

Program                                         master     patch     diff
 External/S.../CFP2006/447.dealII/447.dealII   629060.00  627676.00  -0.2%
 External/SPEC/CINT2000/176.gcc/176.gcc        1245916.00 1244932.00 -0.1%
 MultiSourc...Prolangs-C/simulator/simulator   86100.00   86156.00    0.1%
 MultiSourc...arks/Rodinia/backprop/backprop   66212.00   66252.00    0.1%
 MultiSourc...chmarks/Prolangs-C++/life/life   67276.00   67312.00    0.1%
 MultiSourc...s/Prolangs-C/compiler/compiler   69824.00   69788.00   -0.1%
 MultiSourc...Prolangs-C/assembler/assembler   86672.00   86696.00    0.0%

Reviewers: efriedma, vsk, paquette

Reviewed By: paquette

Differential Revision: https://reviews.llvm.org/D61962

llvm-svn: 364398
  • Loading branch information
fhahn committed Jun 26, 2019
1 parent 46ce9e4 commit 4c11b52
Show file tree
Hide file tree
Showing 6 changed files with 523 additions and 64 deletions.
230 changes: 170 additions & 60 deletions llvm/lib/Transforms/Utils/LoopUnroll.cpp
Expand Up @@ -44,6 +44,8 @@ using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
"conditional latch (completely or otherwise)");

static cl::opt<bool>
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
Expand Down Expand Up @@ -295,28 +297,46 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}

// The current loop unroll pass can only unroll loops with a single latch
// The current loop unroll pass can unroll loops with a single latch or header
// that's a conditional branch exiting the loop.
// FIXME: The implementation can be extended to work with more complicated
// cases, e.g. loops with multiple latches.
BasicBlock *Header = L->getHeader();
BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

if (!BI || BI->isUnconditional()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
// FIXME: Support loops without conditional latch and multiple exiting blocks.
if (!BI ||
(BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
L->getExitingBlock() != Header))) {
LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
"branch in the latch or header.\n");
return LoopUnrollResult::Unmodified;
}

auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
return BI->isConditional() && BI->getSuccessor(S1) == Header &&
!L->contains(BI->getSuccessor(S2));
};

// If we have a conditional latch, it must exit the loop.
if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
!CheckLatchSuccessors(1, 0)) {
LLVM_DEBUG(
dbgs()
<< " Can't unroll; loop not terminated by a conditional branch.\n");
dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
}

auto CheckSuccessors = [&](unsigned S1, unsigned S2) {
return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2));
auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
return HeaderBI && HeaderBI->isConditional() &&
L->contains(HeaderBI->getSuccessor(S1)) &&
!L->contains(HeaderBI->getSuccessor(S2));
};

if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) {
LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch"
" exiting the loop can be unrolled\n");
// If we do not have a conditional latch, the header must exit the loop.
if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
!CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
return LoopUnrollResult::Unmodified;
}

Expand Down Expand Up @@ -503,8 +523,17 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
SE->forgetTopmostLoop(L);
}

bool ContinueOnTrue = L->contains(BI->getSuccessor(0));
BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue);
bool ContinueOnTrue;
bool LatchIsExiting = BI->isConditional();
BasicBlock *LoopExit = nullptr;
if (LatchIsExiting) {
ContinueOnTrue = L->contains(BI->getSuccessor(0));
LoopExit = BI->getSuccessor(ContinueOnTrue);
} else {
NumUnrolledWithHeader++;
ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
}

// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
Expand All @@ -514,11 +543,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
OrigPHINode.push_back(cast<PHINode>(I));
}

std::vector<BasicBlock*> Headers;
std::vector<BasicBlock*> Latches;
std::vector<BasicBlock *> Headers;
std::vector<BasicBlock *> HeaderSucc;
std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);

if (!LatchIsExiting) {
auto *Term = cast<BranchInst>(Header->getTerminator());
if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
assert(L->contains(Term->getSuccessor(0)));
HeaderSucc.push_back(Term->getSuccessor(0));
} else {
assert(L->contains(Term->getSuccessor(1)));
HeaderSucc.push_back(Term->getSuccessor(1));
}
}

// The current on-the-fly SSA update requires blocks to be processed in
// reverse postorder so that LastValueMap contains the correct value at each
// exit.
Expand Down Expand Up @@ -608,6 +649,13 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (*BB == LatchBlock)
Latches.push_back(New);

// Keep track of the successor of the new header in the current iteration.
for (auto *Pred : predecessors(*BB))
if (Pred == Header) {
HeaderSucc.push_back(New);
break;
}

NewBlocks.push_back(New);
UnrolledLoopBlocks.push_back(New);

Expand Down Expand Up @@ -657,51 +705,21 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}
}

// Now that all the basic blocks for the unrolled iterations are in place,
// set up the branches to connect them.
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
// The original branch was replicated in each unrolled iteration.
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

// The branch destination.
unsigned j = (i + 1) % e;
BasicBlock *Dest = Headers[j];
bool NeedConditional = true;

if (RuntimeTripCount && j != 0) {
NeedConditional = false;
}

// For a complete unroll, make the last iteration end with a branch
// to the exit block.
if (CompletelyUnroll) {
if (j == 0)
Dest = LoopExit;
// If using trip count upper bound to completely unroll, we need to keep
// the conditional branch except the last one because the loop may exit
// after any iteration.
assert(NeedConditional &&
"NeedCondition cannot be modified by both complete "
"unrolling and runtime unrolling");
NeedConditional =
(ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
} else if (j != BreakoutTrip &&
(ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
// If we know the trip count or a multiple of it, we can safely use an
// unconditional branch for some iterations.
NeedConditional = false;
}

auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest,
ArrayRef<BasicBlock *> NextBlocks,
BasicBlock *CurrentHeader,
bool NeedConditional) {
auto *Term = cast<BranchInst>(Src->getTerminator());
if (NeedConditional) {
// Update the conditional branch's successor for the following
// iteration.
Term->setSuccessor(!ContinueOnTrue, Dest);
} else {
// Remove phi operands at this loop exit
if (Dest != LoopExit) {
BasicBlock *BB = Latches[i];
for (BasicBlock *Succ: successors(BB)) {
if (Succ == Headers[i])
BasicBlock *BB = Src;
for (BasicBlock *Succ : successors(BB)) {
if (Succ == CurrentHeader)
continue;
for (PHINode &Phi : Succ->phis())
Phi.removeIncomingValue(BB, false);
Expand All @@ -711,6 +729,90 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
BranchInst::Create(Dest, Term);
Term->eraseFromParent();
}
};

// Now that all the basic blocks for the unrolled iterations are in place,
// set up the branches to connect them.
if (LatchIsExiting) {
// Set up latches to branch to the new header in the unrolled iterations or
// the loop exit for the last latch in a fully unrolled loop.
for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
// The branch destination.
unsigned j = (i + 1) % e;
BasicBlock *Dest = Headers[j];
bool NeedConditional = true;

if (RuntimeTripCount && j != 0) {
NeedConditional = false;
}

// For a complete unroll, make the last iteration end with a branch
// to the exit block.
if (CompletelyUnroll) {
if (j == 0)
Dest = LoopExit;
// If using trip count upper bound to completely unroll, we need to keep
// the conditional branch except the last one because the loop may exit
// after any iteration.
assert(NeedConditional &&
"NeedCondition cannot be modified by both complete "
"unrolling and runtime unrolling");
NeedConditional =
(ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0));
} else if (j != BreakoutTrip &&
(ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) {
// If we know the trip count or a multiple of it, we can safely use an
// unconditional branch for some iterations.
NeedConditional = false;
}

setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional);
}
} else {
// Setup headers to branch to their new successors in the unrolled
// iterations.
for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
// The branch destination.
unsigned j = (i + 1) % e;
BasicBlock *Dest = HeaderSucc[i];
bool NeedConditional = true;

if (RuntimeTripCount && j != 0)
NeedConditional = false;

if (CompletelyUnroll)
// We cannot drop the conditional branch for the last condition, as we
// may have to execute the loop body depending on the condition.
NeedConditional = j == 0 || ULO.PreserveCondBr;
else if (j != BreakoutTrip &&
(ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0))
// If we know the trip count or a multiple of it, we can safely use an
// unconditional branch for some iterations.
NeedConditional = false;

setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional);
}

// Set up latches to branch to the new header in the unrolled iterations or
// the loop exit for the last latch in a fully unrolled loop.

for (unsigned i = 0, e = Latches.size(); i != e; ++i) {
// The original branch was replicated in each unrolled iteration.
BranchInst *Term = cast<BranchInst>(Latches[i]->getTerminator());

// The branch destination.
unsigned j = (i + 1) % e;
BasicBlock *Dest = Headers[j];

// When completely unrolling, the last latch becomes unreachable.
if (CompletelyUnroll && j == 0)
new UnreachableInst(Term->getContext(), Term);
else
// Replace the conditional branch with an unconditional one.
BranchInst::Create(Dest, Term);

Term->eraseFromParent();
}
}

// Update dominators of blocks we might reach through exits.
Expand All @@ -727,19 +829,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
ChildrenToUpdate.push_back(ChildBB);
}
BasicBlock *NewIDom;
if (BB == LatchBlock) {
BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
auto &TermBlocks = LatchIsExiting ? Latches : Headers;
if (BB == TermBlock) {
// The latch is special because we emit unconditional branches in
// some cases where the original loop contained a conditional branch.
// Since the latch is always at the bottom of the loop, if the latch
// dominated an exit before unrolling, the new dominator of that exit
// must also be a latch. Specifically, the dominator is the first
// latch which ends in a conditional branch, or the last latch if
// there is no such latch.
NewIDom = Latches.back();
for (BasicBlock *IterLatch : Latches) {
Instruction *Term = IterLatch->getTerminator();
// For loops exiting from the header, we limit the supported loops
// to have a single exiting block.
NewIDom = TermBlocks.back();
for (BasicBlock *Iter : TermBlocks) {
Instruction *Term = Iter->getTerminator();
if (isa<BranchInst>(Term) && cast<BranchInst>(Term)->isConditional()) {
NewIDom = IterLatch;
NewIDom = Iter;
break;
}
}
Expand All @@ -756,13 +862,17 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}

assert(!DT || !UnrollVerifyDomtree ||
DT->verify(DominatorTree::VerificationLevel::Fast));
DT->verify(DominatorTree::VerificationLevel::Fast));

DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
// Merge adjacent basic blocks, if possible.
for (BasicBlock *Latch : Latches) {
BranchInst *Term = cast<BranchInst>(Latch->getTerminator());
if (Term->isUnconditional()) {
BranchInst *Term = dyn_cast<BranchInst>(Latch->getTerminator());
assert((Term ||
(CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) &&
"Need a branch as terminator, except when fully unrolling with "
"unconditional latch");
if (Term && Term->isUnconditional()) {
BasicBlock *Dest = Term->getSuccessor(0);
BasicBlock *Fold = Dest->getUniquePredecessor();
if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) {
Expand Down
@@ -1,7 +1,11 @@
; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -verify-scev-maps -S | FileCheck %s

; Check SCEV expansion uses existing value when unrolling an inner loop with runtime trip count in a loop nest.
; The outer loop gets unrolled twice, so we see 2 selects in the outer loop blocks.
; CHECK-LABEL: @foo(
; CHECK-LABEL: for.body.loopexit:
; CHECK: select
; CHECK-LABEL: for.body:
; CHECK: select
; CHECK-NOT: select
; CHECK: ret
Expand All @@ -14,7 +18,7 @@ for.body: ; preds = %for.body5, %for.bod
%xfL.addr.033 = phi i32 [ %xfL, %entry ], [ %add, %for.body5 ]
%add = add nsw i32 %xfL.addr.033, %scaleL
%shr = ashr i32 %add, 16
%cmp.i = icmp slt i32 0, %shr
%cmp.i = icmp slt i32 10, %shr
%.sroa.speculated = select i1 %cmp.i, i32 0, i32 %shr
%cmp425 = icmp slt i32 0, %.sroa.speculated
br i1 %cmp425, label %for.body5.preheader, label %for.end
Expand Down

0 comments on commit 4c11b52

Please sign in to comment.