Skip to content

Commit

Permalink
[LoopUnroll] Support loops with exiting block that is neither header nor
Browse files Browse the repository at this point in the history
latch.

Summary: Remove the limitation in LoopUnrollPass that exiting block must
be either header or latch.
Reviewer: dmgreen, jdoerfert, Meinersbur, kbarton, bmahjour, etiotto,
fhahn, efriedma
Reviewed By: etiotto, fhahn, efriedma
Subscribers: efriedma, lkail, xbolva00, hiraditya, zzheng, llvm-commits
Tag: LLVM
Differential Revision: https://reviews.llvm.org/D80477
  • Loading branch information
Whitney Tsang committed May 29, 2020
1 parent b4668a2 commit 1bc73b0
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 73 deletions.
113 changes: 40 additions & 73 deletions llvm/lib/Transforms/Utils/LoopUnroll.cpp
Expand Up @@ -81,8 +81,8 @@ using namespace llvm;
// TODO: Should these be here or in LoopUnroll?
STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled");
STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)");
STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a "
"conditional latch (completely or otherwise)");
STATISTIC(NumUnrolledNotLatch, "Number of loops unrolled without a conditional "
"latch (completely or otherwise)");

static cl::opt<bool>
UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden,
Expand Down Expand Up @@ -304,48 +304,30 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
return LoopUnrollResult::Unmodified;
}

// The current loop unroll pass can unroll loops with a single latch or header
// that's a conditional branch exiting the loop.
// The current loop unroll pass can unroll loops that have
// (1) single latch; and
// (2a) latch is an exiting block; or
// (2b) latch is unconditional and there exists a single exiting block.
// FIXME: The implementation can be extended to work with more complicated
// cases, e.g. loops with multiple latches.
BasicBlock *Header = L->getHeader();
BranchInst *HeaderBI = dyn_cast<BranchInst>(Header->getTerminator());
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

// FIXME: Support loops without conditional latch and multiple exiting blocks.
if (!BI ||
(BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() ||
L->getExitingBlock() != Header))) {
BranchInst *LatchBI = dyn_cast<BranchInst>(LatchBlock->getTerminator());

// A conditional branch which exits the loop, which can be optimized to an
// unconditional branch in the unrolled loop in some cases.
BranchInst *ExitingBI = nullptr;
bool LatchIsExiting = L->isLoopExiting(LatchBlock);
if (LatchIsExiting)
ExitingBI = LatchBI;
else if (BasicBlock *ExitingBlock = L->getExitingBlock())
ExitingBI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
if (!LatchBI || !ExitingBI) {
LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional "
"branch in the latch or header.\n");
return LoopUnrollResult::Unmodified;
}

auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) {
return BI->isConditional() && BI->getSuccessor(S1) == Header &&
!L->contains(BI->getSuccessor(S2));
};

// If we have a conditional latch, it must exit the loop.
if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) &&
!CheckLatchSuccessors(1, 0)) {
LLVM_DEBUG(
dbgs() << "Can't unroll; a conditional latch must exit the loop");
return LoopUnrollResult::Unmodified;
}

auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) {
return HeaderBI && HeaderBI->isConditional() &&
L->contains(HeaderBI->getSuccessor(S1)) &&
!L->contains(HeaderBI->getSuccessor(S2));
};

// If we do not have a conditional latch, the header must exit the loop.
if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() &&
!CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) {
LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop");
"branch in latch or a single exiting block.\n");
return LoopUnrollResult::Unmodified;
}
LLVM_DEBUG(dbgs() << " Exiting Block = " << ExitingBI->getParent()->getName()
<< "\n");

if (Header->hasAddressTaken()) {
// The loop-rotate pass can be helpful to avoid this in many cases.
Expand Down Expand Up @@ -534,17 +516,10 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
SE->forgetTopmostLoop(L);
}

bool ContinueOnTrue;
bool LatchIsExiting = BI->isConditional();
BasicBlock *LoopExit = nullptr;
if (LatchIsExiting) {
ContinueOnTrue = L->contains(BI->getSuccessor(0));
LoopExit = BI->getSuccessor(ContinueOnTrue);
} else {
NumUnrolledWithHeader++;
ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0));
LoopExit = HeaderBI->getSuccessor(ContinueOnTrue);
}
if (!LatchIsExiting)
++NumUnrolledNotLatch;
bool ContinueOnTrue = L->contains(ExitingBI->getSuccessor(0));
BasicBlock *LoopExit = ExitingBI->getSuccessor(ContinueOnTrue);

// For the first iteration of the loop, we should use the precloned values for
// PHI nodes. Insert associations now.
Expand All @@ -555,21 +530,13 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
}

std::vector<BasicBlock *> Headers;
std::vector<BasicBlock *> HeaderSucc;
std::vector<BasicBlock *> ExitingBlocks;
std::vector<BasicBlock *> ExitingSucc;
std::vector<BasicBlock *> Latches;
Headers.push_back(Header);
Latches.push_back(LatchBlock);

if (!LatchIsExiting) {
auto *Term = cast<BranchInst>(Header->getTerminator());
if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) {
assert(L->contains(Term->getSuccessor(0)));
HeaderSucc.push_back(Term->getSuccessor(0));
} else {
assert(L->contains(Term->getSuccessor(1)));
HeaderSucc.push_back(Term->getSuccessor(1));
}
}
ExitingBlocks.push_back(ExitingBI->getParent());
ExitingSucc.push_back(ExitingBI->getSuccessor(!ContinueOnTrue));

// The current on-the-fly SSA update requires blocks to be processed in
// reverse postorder so that LastValueMap contains the correct value at each
Expand Down Expand Up @@ -660,12 +627,12 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (*BB == LatchBlock)
Latches.push_back(New);

// Keep track of the successor of the new header in the current iteration.
for (auto *Pred : predecessors(*BB))
if (Pred == Header) {
HeaderSucc.push_back(New);
break;
}
// Keep track of the exiting block and its successor block contained in
// the loop for the current iteration.
if (*BB == ExitingBlocks[0])
ExitingBlocks.push_back(New);
if (*BB == ExitingSucc[0])
ExitingSucc.push_back(New);

NewBlocks.push_back(New);
UnrolledLoopBlocks.push_back(New);
Expand Down Expand Up @@ -784,7 +751,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
if (!LatchIsExiting) {
// If the latch is not exiting, we may be able to simplify the conditional
// branches in the unrolled exiting blocks.
for (unsigned i = 0, e = Headers.size(); i != e; ++i) {
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
// The branch destination.
unsigned j = (i + 1) % e;
bool NeedConditional = true;
Expand All @@ -807,7 +774,7 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// already correct.
if (NeedConditional)
continue;
setDest(Headers[i], HeaderSucc[i], HeaderSucc[i], NeedConditional,
setDest(ExitingBlocks[i], ExitingSucc[i], ExitingSucc[i], NeedConditional,
ContinueOnTrue, false);
}

Expand All @@ -833,8 +800,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
ChildrenToUpdate.push_back(ChildBB);
}
BasicBlock *NewIDom;
BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header;
auto &TermBlocks = LatchIsExiting ? Latches : Headers;
BasicBlock *&TermBlock = ExitingBlocks[0];
auto &TermBlocks = ExitingBlocks;
if (BB == TermBlock) {
// The latch is special because we emit unconditional branches in
// some cases where the original loop contained a conditional branch.
Expand All @@ -843,8 +810,8 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI,
// must also be a latch. Specifically, the dominator is the first
// latch which ends in a conditional branch, or the last latch if
// there is no such latch.
// For loops exiting from the header, we limit the supported loops
// to have a single exiting block.
// For loops exiting from non latch exiting block, we limit the
// supported loops to have a single exiting block.
NewIDom = TermBlocks.back();
for (BasicBlock *Iter : TermBlocks) {
Instruction *Term = Iter->getTerminator();
Expand Down
69 changes: 69 additions & 0 deletions llvm/test/Transforms/LoopUnroll/nonlatchcondbr.ll
@@ -0,0 +1,69 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-unroll -S | FileCheck %s
; RUN: opt < %s -passes='require<opt-remark-emit>,unroll' -S | FileCheck %s

define void @foo(i32* noalias %A) {
; CHECK-LABEL: @foo(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[A:%.*]], align 4
; CHECK-NEXT: call void @bar(i32 [[TMP0]])
; CHECK-NEXT: br label [[FOR_HEADER:%.*]]
; CHECK: for.header:
; CHECK-NEXT: call void @bar(i32 [[TMP0]])
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]]
; CHECK: for.body.for.body_crit_edge:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 1
; CHECK-NEXT: [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
; CHECK-NEXT: call void @bar(i32 [[DOTPRE]])
; CHECK-NEXT: br label [[FOR_BODY_1:%.*]]
; CHECK: for.end:
; CHECK-NEXT: ret void
; CHECK: for.body.1:
; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_1:%.*]]
; CHECK: for.body.for.body_crit_edge.1:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
; CHECK-NEXT: [[DOTPRE_1:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_1]], align 4
; CHECK-NEXT: call void @bar(i32 [[DOTPRE_1]])
; CHECK-NEXT: br label [[FOR_BODY_2:%.*]]
; CHECK: for.body.2:
; CHECK-NEXT: br label [[FOR_BODY_FOR_BODY_CRIT_EDGE_2:%.*]]
; CHECK: for.body.for.body_crit_edge.2:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 3
; CHECK-NEXT: [[DOTPRE_2:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT_2]], align 4
; CHECK-NEXT: call void @bar(i32 [[DOTPRE_2]])
; CHECK-NEXT: br label [[FOR_BODY_3:%.*]]
; CHECK: for.body.3:
; CHECK-NEXT: br i1 false, label [[FOR_BODY_FOR_BODY_CRIT_EDGE_3:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body.for.body_crit_edge.3:
; CHECK-NEXT: [[ARRAYIDX_PHI_TRANS_INSERT_3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 4
; CHECK-NEXT: unreachable
;
entry:
%0 = load i32, i32* %A, align 4
call void @bar(i32 %0)
br label %for.header

for.header:
%1 = phi i32 [ %0, %entry ], [ %.pre, %for.body.for.body_crit_edge ]
%i = phi i64 [ 0, %entry ], [ %inc, %for.body.for.body_crit_edge ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %i
call void @bar(i32 %1)
br label %for.body

for.body:
%inc = add nsw i64 %i, 1
%cmp = icmp slt i64 %inc, 4
br i1 %cmp, label %for.body.for.body_crit_edge, label %for.end

for.body.for.body_crit_edge:
%arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %A, i64 %inc
%.pre = load i32, i32* %arrayidx.phi.trans.insert, align 4
br label %for.header

for.end:
ret void
}

declare void @bar(i32)

0 comments on commit 1bc73b0

Please sign in to comment.