From 4c11b5268ca295b156159bed9d520fcb85fcf068 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 26 Jun 2019 09:16:57 +0000 Subject: [PATCH] [LoopUnroll] Add support for loops with exiting headers and uncond latches. This patch generalizes the UnrollLoop utility to support loops that exit from the header instead of the latch. Usually, LoopRotate would take care of must of those cases, but in some cases (e.g. -Oz), LoopRotate does not kick in. Codesize impact looks relatively neutral on ARM64 with -Oz + LTO. Program master patch diff External/S.../CFP2006/447.dealII/447.dealII 629060.00 627676.00 -0.2% External/SPEC/CINT2000/176.gcc/176.gcc 1245916.00 1244932.00 -0.1% MultiSourc...Prolangs-C/simulator/simulator 86100.00 86156.00 0.1% MultiSourc...arks/Rodinia/backprop/backprop 66212.00 66252.00 0.1% MultiSourc...chmarks/Prolangs-C++/life/life 67276.00 67312.00 0.1% MultiSourc...s/Prolangs-C/compiler/compiler 69824.00 69788.00 -0.1% MultiSourc...Prolangs-C/assembler/assembler 86672.00 86696.00 0.0% Reviewers: efriedma, vsk, paquette Reviewed By: paquette Differential Revision: https://reviews.llvm.org/D61962 llvm-svn: 364398 --- llvm/lib/Transforms/Utils/LoopUnroll.cpp | 230 +++++++++++---- .../scev-expander-reuse-unroll.ll | 6 +- .../partially-unroll-unconditional-latch.ll | 65 ++++ llvm/test/Transforms/LoopUnroll/runtime-li.ll | 7 +- llvm/test/Transforms/LoopUnroll/unloop.ll | 2 +- .../LoopUnroll/unroll-unconditional-latch.ll | 277 ++++++++++++++++++ 6 files changed, 523 insertions(+), 64 deletions(-) create mode 100644 llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll create mode 100644 llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll diff --git a/llvm/lib/Transforms/Utils/LoopUnroll.cpp b/llvm/lib/Transforms/Utils/LoopUnroll.cpp index d0908b594c931..1fc6aa4377bc3 100644 --- a/llvm/lib/Transforms/Utils/LoopUnroll.cpp +++ b/llvm/lib/Transforms/Utils/LoopUnroll.cpp @@ -44,6 +44,8 @@ using namespace llvm; // TODO: Should these be here or in LoopUnroll? STATISTIC(NumCompletelyUnrolled, "Number of loops completely unrolled"); STATISTIC(NumUnrolled, "Number of loops unrolled (completely or otherwise)"); +STATISTIC(NumUnrolledWithHeader, "Number of loops unrolled without a " + "conditional latch (completely or otherwise)"); static cl::opt UnrollRuntimeEpilog("unroll-runtime-epilog", cl::init(false), cl::Hidden, @@ -295,28 +297,46 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, return LoopUnrollResult::Unmodified; } - // The current loop unroll pass can only unroll loops with a single latch + // The current loop unroll pass can unroll loops with a single latch or header // that's a conditional branch exiting the loop. // FIXME: The implementation can be extended to work with more complicated // cases, e.g. loops with multiple latches. BasicBlock *Header = L->getHeader(); + BranchInst *HeaderBI = dyn_cast(Header->getTerminator()); BranchInst *BI = dyn_cast(LatchBlock->getTerminator()); - if (!BI || BI->isUnconditional()) { - // The loop-rotate pass can be helpful to avoid this in many cases. + // FIXME: Support loops without conditional latch and multiple exiting blocks. + if (!BI || + (BI->isUnconditional() && (!HeaderBI || HeaderBI->isUnconditional() || + L->getExitingBlock() != Header))) { + LLVM_DEBUG(dbgs() << " Can't unroll; loop not terminated by a conditional " + "branch in the latch or header.\n"); + return LoopUnrollResult::Unmodified; + } + + auto CheckLatchSuccessors = [&](unsigned S1, unsigned S2) { + return BI->isConditional() && BI->getSuccessor(S1) == Header && + !L->contains(BI->getSuccessor(S2)); + }; + + // If we have a conditional latch, it must exit the loop. + if (BI && BI->isConditional() && !CheckLatchSuccessors(0, 1) && + !CheckLatchSuccessors(1, 0)) { LLVM_DEBUG( - dbgs() - << " Can't unroll; loop not terminated by a conditional branch.\n"); + dbgs() << "Can't unroll; a conditional latch must exit the loop"); return LoopUnrollResult::Unmodified; } - auto CheckSuccessors = [&](unsigned S1, unsigned S2) { - return BI->getSuccessor(S1) == Header && !L->contains(BI->getSuccessor(S2)); + auto CheckHeaderSuccessors = [&](unsigned S1, unsigned S2) { + return HeaderBI && HeaderBI->isConditional() && + L->contains(HeaderBI->getSuccessor(S1)) && + !L->contains(HeaderBI->getSuccessor(S2)); }; - if (!CheckSuccessors(0, 1) && !CheckSuccessors(1, 0)) { - LLVM_DEBUG(dbgs() << "Can't unroll; only loops with one conditional latch" - " exiting the loop can be unrolled\n"); + // If we do not have a conditional latch, the header must exit the loop. + if (BI && !BI->isConditional() && HeaderBI && HeaderBI->isConditional() && + !CheckHeaderSuccessors(0, 1) && !CheckHeaderSuccessors(1, 0)) { + LLVM_DEBUG(dbgs() << "Can't unroll; conditional header must exit the loop"); return LoopUnrollResult::Unmodified; } @@ -503,8 +523,17 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, SE->forgetTopmostLoop(L); } - bool ContinueOnTrue = L->contains(BI->getSuccessor(0)); - BasicBlock *LoopExit = BI->getSuccessor(ContinueOnTrue); + bool ContinueOnTrue; + bool LatchIsExiting = BI->isConditional(); + BasicBlock *LoopExit = nullptr; + if (LatchIsExiting) { + ContinueOnTrue = L->contains(BI->getSuccessor(0)); + LoopExit = BI->getSuccessor(ContinueOnTrue); + } else { + NumUnrolledWithHeader++; + ContinueOnTrue = L->contains(HeaderBI->getSuccessor(0)); + LoopExit = HeaderBI->getSuccessor(ContinueOnTrue); + } // For the first iteration of the loop, we should use the precloned values for // PHI nodes. Insert associations now. @@ -514,11 +543,23 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, OrigPHINode.push_back(cast(I)); } - std::vector Headers; - std::vector Latches; + std::vector Headers; + std::vector HeaderSucc; + std::vector Latches; Headers.push_back(Header); Latches.push_back(LatchBlock); + if (!LatchIsExiting) { + auto *Term = cast(Header->getTerminator()); + if (Term->isUnconditional() || L->contains(Term->getSuccessor(0))) { + assert(L->contains(Term->getSuccessor(0))); + HeaderSucc.push_back(Term->getSuccessor(0)); + } else { + assert(L->contains(Term->getSuccessor(1))); + HeaderSucc.push_back(Term->getSuccessor(1)); + } + } + // The current on-the-fly SSA update requires blocks to be processed in // reverse postorder so that LastValueMap contains the correct value at each // exit. @@ -608,6 +649,13 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, if (*BB == LatchBlock) Latches.push_back(New); + // Keep track of the successor of the new header in the current iteration. + for (auto *Pred : predecessors(*BB)) + if (Pred == Header) { + HeaderSucc.push_back(New); + break; + } + NewBlocks.push_back(New); UnrolledLoopBlocks.push_back(New); @@ -657,41 +705,11 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } } - // Now that all the basic blocks for the unrolled iterations are in place, - // set up the branches to connect them. - for (unsigned i = 0, e = Latches.size(); i != e; ++i) { - // The original branch was replicated in each unrolled iteration. - BranchInst *Term = cast(Latches[i]->getTerminator()); - - // The branch destination. - unsigned j = (i + 1) % e; - BasicBlock *Dest = Headers[j]; - bool NeedConditional = true; - - if (RuntimeTripCount && j != 0) { - NeedConditional = false; - } - - // For a complete unroll, make the last iteration end with a branch - // to the exit block. - if (CompletelyUnroll) { - if (j == 0) - Dest = LoopExit; - // If using trip count upper bound to completely unroll, we need to keep - // the conditional branch except the last one because the loop may exit - // after any iteration. - assert(NeedConditional && - "NeedCondition cannot be modified by both complete " - "unrolling and runtime unrolling"); - NeedConditional = - (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0)); - } else if (j != BreakoutTrip && - (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) { - // If we know the trip count or a multiple of it, we can safely use an - // unconditional branch for some iterations. - NeedConditional = false; - } - + auto setDest = [LoopExit, ContinueOnTrue](BasicBlock *Src, BasicBlock *Dest, + ArrayRef NextBlocks, + BasicBlock *CurrentHeader, + bool NeedConditional) { + auto *Term = cast(Src->getTerminator()); if (NeedConditional) { // Update the conditional branch's successor for the following // iteration. @@ -699,9 +717,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } else { // Remove phi operands at this loop exit if (Dest != LoopExit) { - BasicBlock *BB = Latches[i]; - for (BasicBlock *Succ: successors(BB)) { - if (Succ == Headers[i]) + BasicBlock *BB = Src; + for (BasicBlock *Succ : successors(BB)) { + if (Succ == CurrentHeader) continue; for (PHINode &Phi : Succ->phis()) Phi.removeIncomingValue(BB, false); @@ -711,6 +729,90 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, BranchInst::Create(Dest, Term); Term->eraseFromParent(); } + }; + + // Now that all the basic blocks for the unrolled iterations are in place, + // set up the branches to connect them. + if (LatchIsExiting) { + // Set up latches to branch to the new header in the unrolled iterations or + // the loop exit for the last latch in a fully unrolled loop. + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + bool NeedConditional = true; + + if (RuntimeTripCount && j != 0) { + NeedConditional = false; + } + + // For a complete unroll, make the last iteration end with a branch + // to the exit block. + if (CompletelyUnroll) { + if (j == 0) + Dest = LoopExit; + // If using trip count upper bound to completely unroll, we need to keep + // the conditional branch except the last one because the loop may exit + // after any iteration. + assert(NeedConditional && + "NeedCondition cannot be modified by both complete " + "unrolling and runtime unrolling"); + NeedConditional = + (ULO.PreserveCondBr && j && !(ULO.PreserveOnlyFirst && i != 0)); + } else if (j != BreakoutTrip && + (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) { + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + NeedConditional = false; + } + + setDest(Latches[i], Dest, Headers, Headers[i], NeedConditional); + } + } else { + // Setup headers to branch to their new successors in the unrolled + // iterations. + for (unsigned i = 0, e = Headers.size(); i != e; ++i) { + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = HeaderSucc[i]; + bool NeedConditional = true; + + if (RuntimeTripCount && j != 0) + NeedConditional = false; + + if (CompletelyUnroll) + // We cannot drop the conditional branch for the last condition, as we + // may have to execute the loop body depending on the condition. + NeedConditional = j == 0 || ULO.PreserveCondBr; + else if (j != BreakoutTrip && + (ULO.TripMultiple == 0 || j % ULO.TripMultiple != 0)) + // If we know the trip count or a multiple of it, we can safely use an + // unconditional branch for some iterations. + NeedConditional = false; + + setDest(Headers[i], Dest, Headers, Headers[i], NeedConditional); + } + + // Set up latches to branch to the new header in the unrolled iterations or + // the loop exit for the last latch in a fully unrolled loop. + + for (unsigned i = 0, e = Latches.size(); i != e; ++i) { + // The original branch was replicated in each unrolled iteration. + BranchInst *Term = cast(Latches[i]->getTerminator()); + + // The branch destination. + unsigned j = (i + 1) % e; + BasicBlock *Dest = Headers[j]; + + // When completely unrolling, the last latch becomes unreachable. + if (CompletelyUnroll && j == 0) + new UnreachableInst(Term->getContext(), Term); + else + // Replace the conditional branch with an unconditional one. + BranchInst::Create(Dest, Term); + + Term->eraseFromParent(); + } } // Update dominators of blocks we might reach through exits. @@ -727,7 +829,9 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, ChildrenToUpdate.push_back(ChildBB); } BasicBlock *NewIDom; - if (BB == LatchBlock) { + BasicBlock *&TermBlock = LatchIsExiting ? LatchBlock : Header; + auto &TermBlocks = LatchIsExiting ? Latches : Headers; + if (BB == TermBlock) { // The latch is special because we emit unconditional branches in // some cases where the original loop contained a conditional branch. // Since the latch is always at the bottom of the loop, if the latch @@ -735,11 +839,13 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, // must also be a latch. Specifically, the dominator is the first // latch which ends in a conditional branch, or the last latch if // there is no such latch. - NewIDom = Latches.back(); - for (BasicBlock *IterLatch : Latches) { - Instruction *Term = IterLatch->getTerminator(); + // For loops exiting from the header, we limit the supported loops + // to have a single exiting block. + NewIDom = TermBlocks.back(); + for (BasicBlock *Iter : TermBlocks) { + Instruction *Term = Iter->getTerminator(); if (isa(Term) && cast(Term)->isConditional()) { - NewIDom = IterLatch; + NewIDom = Iter; break; } } @@ -756,13 +862,17 @@ LoopUnrollResult llvm::UnrollLoop(Loop *L, UnrollLoopOptions ULO, LoopInfo *LI, } assert(!DT || !UnrollVerifyDomtree || - DT->verify(DominatorTree::VerificationLevel::Fast)); + DT->verify(DominatorTree::VerificationLevel::Fast)); DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); // Merge adjacent basic blocks, if possible. for (BasicBlock *Latch : Latches) { - BranchInst *Term = cast(Latch->getTerminator()); - if (Term->isUnconditional()) { + BranchInst *Term = dyn_cast(Latch->getTerminator()); + assert((Term || + (CompletelyUnroll && !LatchIsExiting && Latch == Latches.back())) && + "Need a branch as terminator, except when fully unrolling with " + "unconditional latch"); + if (Term && Term->isUnconditional()) { BasicBlock *Dest = Term->getSuccessor(0); BasicBlock *Fold = Dest->getUniquePredecessor(); if (MergeBlockIntoPredecessor(Dest, &DTU, LI)) { diff --git a/llvm/test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll b/llvm/test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll index e74b004c6f8d0..d50d550327195 100644 --- a/llvm/test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll +++ b/llvm/test/Analysis/ScalarEvolution/scev-expander-reuse-unroll.ll @@ -1,7 +1,11 @@ ; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-count=2 -verify-scev-maps -S | FileCheck %s ; Check SCEV expansion uses existing value when unrolling an inner loop with runtime trip count in a loop nest. +; The outer loop gets unrolled twice, so we see 2 selects in the outer loop blocks. ; CHECK-LABEL: @foo( +; CHECK-LABEL: for.body.loopexit: +; CHECK: select +; CHECK-LABEL: for.body: ; CHECK: select ; CHECK-NOT: select ; CHECK: ret @@ -14,7 +18,7 @@ for.body: ; preds = %for.body5, %for.bod %xfL.addr.033 = phi i32 [ %xfL, %entry ], [ %add, %for.body5 ] %add = add nsw i32 %xfL.addr.033, %scaleL %shr = ashr i32 %add, 16 - %cmp.i = icmp slt i32 0, %shr + %cmp.i = icmp slt i32 10, %shr %.sroa.speculated = select i1 %cmp.i, i32 0, i32 %shr %cmp425 = icmp slt i32 0, %.sroa.speculated br i1 %cmp425, label %for.body5.preheader, label %for.end diff --git a/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll b/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll new file mode 100644 index 0000000000000..dc13642d85105 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/partially-unroll-unconditional-latch.ll @@ -0,0 +1,65 @@ +; RUN: opt -loop-unroll -unroll-allow-partial -S %s -verify-loop-info -verify-dom-info -verify-loop-lcssa | FileCheck %s + +@table = internal unnamed_addr global [344 x i32] zeroinitializer, align 16 + +define i32 @test_partial_unroll_with_breakout_at_iter0() { +; CHECK-LABEL: define i32 @test_partial_unroll_with_breakout_at_iter0() { +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %for.header + +; CHECK-LABEL: for.header: ; preds = %for.latch.3, %entry +; CHECK-NEXT: %red = phi i32 [ 0, %entry ], [ %red.next.3, %for.latch.3 ] +; CHECK-NEXT: %iv = phi i64 [ 0, %entry ], [ %iv.next.3, %for.latch.3 ] +; CHECK-NEXT: %red.next = add nuw nsw i32 10, %red +; CHECK-NEXT: %iv.next = add nuw nsw i64 %iv, 2 +; CHECK-NEXT: %ptr = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next +; CHECK-NEXT: store i32 %red.next, i32* %ptr, align 4 +; CHECK-NEXT: br label %for.latch + +; CHECK-LABEL: for.latch: ; preds = %for.header +; CHECK-NEXT: %red.next.1 = add nuw nsw i32 10, %red.next +; CHECK-NEXT: %iv.next.1 = add nuw nsw i64 %iv.next, 2 +; CHECK-NEXT: %ptr.1 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.1 +; CHECK-NEXT: store i32 %red.next.1, i32* %ptr.1, align 4 +; CHECK-NEXT: br label %for.latch.1 + +; CHECK-LABEL: exit: ; preds = %for.latch.2 +; CHECK-NEXT: ret i32 0 + +; CHECK-LABEL: for.latch.1: ; preds = %for.latch +; CHECK-NEXT: %red.next.2 = add nuw nsw i32 10, %red.next.1 +; CHECK-NEXT: %iv.next.2 = add nuw nsw i64 %iv.next.1, 2 +; CHECK-NEXT: %ptr.2 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.2 +; CHECK-NEXT: store i32 %red.next.2, i32* %ptr.2, align 4 +; CHECK-NEXT: br label %for.latch.2 + +; CHECK-LABEL: for.latch.2: ; preds = %for.latch.1 +; CHECK-NEXT: %red.next.3 = add nuw nsw i32 10, %red.next.2 +; CHECK-NEXT: %iv.next.3 = add nuw nsw i64 %iv.next.2, 2 +; CHECK-NEXT: %ptr.3 = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next.3 +; CHECK-NEXT: store i32 %red.next.3, i32* %ptr.3, align 4 +; CHECK-NEXT: %exitcond.1.i.3 = icmp eq i64 %iv.next.3, 344 +; CHECK-NEXT: br i1 %exitcond.1.i.3, label %exit, label %for.latch.3 + +; CHECK-LABEL: for.latch.3: ; preds = %for.latch.2 +; CHECK-NEXT: br label %for.header +; +entry: + br label %for.header + +for.header: ; preds = %for.body28.i.for.body28.i_crit_edge, %for.body.i + %red = phi i32 [ 0, %entry ], [ %red.next, %for.latch ] + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.latch ] + %red.next = add i32 10, %red + %iv.next = add nuw nsw i64 %iv, 2 + %ptr = getelementptr inbounds [344 x i32], [344 x i32]* @table, i64 0, i64 %iv.next + store i32 %red.next, i32* %ptr, align 4 + %exitcond.1.i = icmp eq i64 %iv.next, 344 + br i1 %exitcond.1.i, label %exit, label %for.latch + +for.latch: ; preds = %for.header + br label %for.header + +exit: + ret i32 0 +} diff --git a/llvm/test/Transforms/LoopUnroll/runtime-li.ll b/llvm/test/Transforms/LoopUnroll/runtime-li.ll index 5494c8e9da7dd..a4a9b92028916 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-li.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-li.ll @@ -17,12 +17,15 @@ header.outer: ; preds = %latch.outer, %entry br label %header.inner header.inner: ; preds = %latch.inner, %header.outer + %tmp5 = load i64, i64* %q1, align 8 + %tmp6 = icmp eq double* %p, %arg + br i1 undef, label %exiting.inner, label %latch.outer + +exiting.inner: ; preds = %latch.inner, %header.outer br i1 undef, label %latch.inner, label %latch.outer latch.inner: ; preds = %header.inner - %tmp5 = load i64, i64* %q1, align 8 store i64 %tmp5, i64* %q2, align 8 - %tmp6 = icmp eq double* %p, %arg br label %header.inner latch.outer: ; preds = %header.inner diff --git a/llvm/test/Transforms/LoopUnroll/unloop.ll b/llvm/test/Transforms/LoopUnroll/unloop.ll index 899ccbf2b2e6b..36b79818ff397 100644 --- a/llvm/test/Transforms/LoopUnroll/unloop.ll +++ b/llvm/test/Transforms/LoopUnroll/unloop.ll @@ -387,7 +387,7 @@ for.end94: ; Check that the loop backedge is removed from the middle loop 1699, ; but not the inner loop 1676. ; CHECK: while.body1694: -; CHECK: br label %while.cond1676 +; CHECK: unreachable ; CHECK: while.end1699: ; CHECK: br label %sw.default1711 define void @removeSubloopBlocks() nounwind { diff --git a/llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll b/llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll new file mode 100644 index 0000000000000..da0d9ec4e8e4b --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/unroll-unconditional-latch.ll @@ -0,0 +1,277 @@ +; RUN: opt -loop-unroll -S %s -verify-loop-info -verify-dom-info -verify-loop-lcssa | FileCheck %s + +%struct.spam = type { double, double, double, double, double, double, double } + +define void @test2(i32* %arg) { +; CHECK-LABEL: void @test2 +; CHECK-NEXT: entry: +; CHECK-NEXT: br label %for.header + +; CHECK-LABEL: for.header: ; preds = %entry +; CHECK-NEXT: store i32 0, i32* %arg, align 4 +; CHECK-NEXT: br label %for.latch + +; CHECK-LABEL: for.latch: ; preds = %for.header +; CHECK-NEXT: %ptr.1 = getelementptr inbounds i32, i32* %arg, i64 1 +; CHECK-NEXT: store i32 0, i32* %ptr.1, align 4 +; CHECK-NEXT: br label %for.latch.1 + +; CHECK-LABEL: if.end.loopexit: ; preds = %for.latch.2 +; CHECK-NEXT: ret void + +; CHECK-LABEL: for.latch.1: ; preds = %for.latch +; CHECK-NEXT: %ptr.2 = getelementptr inbounds i32, i32* %arg, i64 2 +; CHECK-NEXT: store i32 0, i32* %ptr.2, align 4 +; CHECK-NEXT: br label %for.latch.2 + +; CHECK-LABEL: for.latch.2: ; preds = %for.latch.1 +; CHECK-NEXT: %ptr.3 = getelementptr inbounds i32, i32* %arg, i64 3 +; CHECK-NEXT: store i32 0, i32* %ptr.3, align 4 +; CHECK-NEXT: br i1 true, label %if.end.loopexit, label %for.latch.3 + +; CHECK-LABEL: for.latch.3: ; preds = %for.latch.2 +; CHECK-NEXT: unreachable + +entry: + br label %for.header + +for.header: ; preds = %for.latch, %entry + %indvars.iv800 = phi i64 [ 0, %entry ], [ %indvars.iv.next801, %for.latch ] + %ptr = getelementptr inbounds i32, i32* %arg, i64 %indvars.iv800 + store i32 0, i32* %ptr, align 4 + %indvars.iv.next801 = add nuw nsw i64 %indvars.iv800, 1 + %exitcond802 = icmp eq i64 %indvars.iv.next801, 4 + br i1 %exitcond802, label %if.end.loopexit, label %for.latch + +for.latch: ; preds = %for.header + br label %for.header + +if.end.loopexit: ; preds = %for.header + ret void +} + +define double @test_with_lcssa(double %arg1, double* %arg2) { +; CHECK-LABEL: define double @test_with_lcssa( +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %loop.header + +; CHECK-LABEL: loop.header: ; preds = %entry +; CHECK-NEXT: %res = fsub double %arg1, 3.000000e+00 +; CHECK-NEXT: br label %loop.latch + +; CHECK-LABEL: loop.latch: ; preds = %loop.header +; CHECK-NEXT: %ptr = getelementptr inbounds double, double* %arg2, i64 1 +; CHECK-NEXT: %lv = load double, double* %ptr, align 8 +; CHECK-NEXT: %res.1 = fsub double %lv, %res +; CHECK-NEXT: br i1 true, label %loop.exit, label %loop.latch.1 + +; CHECK-LABEL: loop.exit: ; preds = %loop.latch +; CHECK-NEXT: %res.lcssa = phi double [ %res.1, %loop.latch ] +; CHECK-NEXT: ret double %res.lcssa + +; CHECK-LABEL: loop.latch.1: ; preds = %loop.latch +; CHECK-NEXT: %ptr.1 = getelementptr inbounds double, double* %arg2, i64 2 +; CHECK-NEXT: unreachable + +entry: + br label %loop.header + +loop.header: ; preds = %entry, %loop.latch + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %d1 = phi double [ %arg1, %entry ], [ %lv, %loop.latch ] + %d2 = phi double [ 3.0, %entry ], [ %res, %loop.latch ] + %res = fsub double %d1, %d2 + %iv.next = add nuw nsw i64 %iv, 1 + %cond = icmp eq i64 %iv.next, 2 + br i1 %cond, label %loop.exit, label %loop.latch + +loop.latch: ; preds = %bb366 + %ptr = getelementptr inbounds double, double* %arg2, i64 %iv.next + %lv = load double, double* %ptr, align 8 + br label %loop.header + +loop.exit: ; preds = %bb366 + %res.lcssa = phi double [ %res, %loop.header ] + ret double %res.lcssa +} + +; We unroll the outer loop and need to preserve LI for the inner loop. +define void @test_with_nested_loop(i32* %arg) { +; CHECK-LABEL: void @test_with_nested_loop +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %outer.header + +; CHECK-DAG: outer.header: ; preds = %entry +; CHECK-NEXT: br label %inner.body.preheader + +; CHECK-DAG: inner.body.preheader: ; preds = %outer.header +; CHECK-NEXT: br label %inner.body + +; CHECK-LABEL: inner.body: ; preds = %inner.body.preheader, %inner.body +; CHECK-NEXT: %j.iv = phi i64 [ %j.iv.next, %inner.body ], [ 0, %inner.body.preheader ] +; CHECK-NEXT: %ptr = getelementptr inbounds i32, i32* %arg, i64 %j.iv +; CHECK-NEXT: store i32 0, i32* %ptr, align 4 +; CHECK-NEXT: %j.iv.next = add nuw nsw i64 %j.iv, 1 +; CHECK-NEXT: %inner.cond = icmp eq i64 %j.iv.next, 40000 +; CHECK-NEXT: br i1 %inner.cond, label %outer.latch, label %inner.body + +; CHECK-LABEL: outer.latch: ; preds = %inner.body +; CHECK-NEXT: br label %inner.body.preheader.1 + +; CHECK-LABEL: exit: ; preds = %outer.latch.1 +; CHECK-NEXT: ret void + +; CHECK-LABEL: inner.body.preheader.1: ; preds = %outer.latch +; CHECK-NEXT: br label %inner.body.1 + +; CHECK-LABEL: inner.body.1: ; preds = %inner.body.1, %inner.body.preheader.1 +; CHECK-NEXT: %j.iv.1 = phi i64 [ %j.iv.next.1, %inner.body.1 ], [ 0, %inner.body.preheader.1 ] +; CHECK-NEXT: %idx.1 = add i64 1, %j.iv.1 +; CHECK-NEXT: %ptr.1 = getelementptr inbounds i32, i32* %arg, i64 %idx.1 +; CHECK-NEXT: store i32 0, i32* %ptr.1, align 4 +; CHECK-NEXT: %j.iv.next.1 = add nuw nsw i64 %j.iv.1, 1 +; CHECK-NEXT: %inner.cond.1 = icmp eq i64 %j.iv.next.1, 40000 +; CHECK-NEXT: br i1 %inner.cond.1, label %outer.latch.1, label %inner.body.1 + +; CHECK-LABEL: outer.latch.1: ; preds = %inner.body.1 +; CHECK-NEXT: br i1 true, label %exit, label %inner.body.preheader.2 + +; CHECK-LABEL: inner.body.preheader.2: ; preds = %outer.latch.1 +; CHECK-NEXT: br label %inner.body.2 + +; CHECK-LABEL: inner.body.2: ; preds = %inner.body.2, %inner.body.preheader.2 +; CHECK-NEXT: %j.iv.2 = phi i64 [ %j.iv.next.2, %inner.body.2 ], [ 0, %inner.body.preheader.2 ] +; CHECK-NEXT: %idx.2 = add i64 2, %j.iv.2 +; CHECK-NEXT: %ptr.2 = getelementptr inbounds i32, i32* %arg, i64 %idx.2 +; CHECK-NEXT: store i32 0, i32* %ptr.2, align 4 +; CHECK-NEXT: %j.iv.next.2 = add nuw nsw i64 %j.iv.2, 1 +; CHECK-NEXT: %inner.cond.2 = icmp eq i64 %j.iv.next.2, 40000 +; CHECK-NEXT: br i1 %inner.cond.2, label %outer.latch.2, label %inner.body.2 + +; CHECK-LABEL: outer.latch.2: ; preds = %inner.body.2 +; CHECK-NEXT: unreachable +; +entry: + br label %outer.header + +outer.header: ; preds = %outer.latch, %entry + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %outer.cond = icmp eq i64 %outer.iv, 2 + br i1 %outer.cond, label %exit, label %inner.body + +inner.body: + %j.iv = phi i64 [ 0, %outer.header ], [ %j.iv.next, %inner.body ] + %idx = add i64 %outer.iv, %j.iv + %ptr = getelementptr inbounds i32, i32* %arg, i64 %idx + store i32 0, i32* %ptr, align 4 + %j.iv.next = add nuw nsw i64 %j.iv, 1 + %inner.cond = icmp eq i64 %j.iv.next, 40000 + br i1 %inner.cond, label %outer.latch, label %inner.body + +outer.latch: ; preds = %inner.body + br label %outer.header + +exit: ; preds = %outer.header + ret void +} + +; We unroll the inner loop and need to preserve LI for the outer loop. +define void @test_with_nested_loop_unroll_inner(i32* %arg) { +; CHECK-LABEL: define void @test_with_nested_loop_unroll_inner( +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %outer.header + +; CHECK-LABEL: outer.header: ; preds = %inner.body, %entry +; CHECK-NEXT: %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %inner.body ] +; CHECK-NEXT: %outer.iv.next = add nuw nsw i64 %outer.iv, 1 +; CHECK-NEXT: %outer.cond = icmp eq i64 %outer.iv, 40000 +; CHECK-NEXT: br i1 %outer.cond, label %exit, label %inner.body.preheader + +; CHECK-LABEL: inner.body.preheader: ; preds = %outer.header +; CHECK-NEXT: br label %inner.body + +; CHECK-LABEL: inner.body: ; preds = %inner.body.preheader +; CHECK-NEXT: %ptr = getelementptr inbounds i32, i32* %arg, i64 %outer.iv +; CHECK-NEXT: store i32 0, i32* %ptr, align 4 +; CHECK-NEXT: %idx.1 = add i64 %outer.iv, 1 +; CHECK-NEXT: %ptr.1 = getelementptr inbounds i32, i32* %arg, i64 %idx.1 +; CHECK-NEXT: store i32 0, i32* %ptr.1, align 4 +; CHECK-NEXT: br label %outer.header + +; CHECK-LABEL: exit: ; preds = %outer.header +; CHECK-NEXT: ret void +; +entry: + br label %outer.header + +outer.header: ; preds = %outer.latch, %entry + %outer.iv = phi i64 [ 0, %entry ], [ %outer.iv.next, %outer.latch ] + %outer.iv.next = add nuw nsw i64 %outer.iv, 1 + %outer.cond = icmp eq i64 %outer.iv, 40000 + br i1 %outer.cond, label %exit, label %inner.body + +inner.body: + %j.iv = phi i64 [ 0, %outer.header ], [ %j.iv.next, %inner.body ] + %idx = add i64 %outer.iv, %j.iv + %ptr = getelementptr inbounds i32, i32* %arg, i64 %idx + store i32 0, i32* %ptr, align 4 + %j.iv.next = add nuw nsw i64 %j.iv, 1 + %inner.cond = icmp eq i64 %j.iv.next, 2 + br i1 %inner.cond, label %outer.latch, label %inner.body + +outer.latch: ; preds = %inner.body + br label %outer.header + +exit: ; preds = %outer.header + ret void +} + + + +; Check that we do not crash for headers with non-branch instructions, e.g. +; switch. We do not unroll in those cases. +define void @test_switchinst_in_header() { +; CHECK-LABEL: define void @test_switchinst_in_header() { +; CHECK-LABEL: entry: +; CHECK-NEXT: br label %while.header + +; CHECK-LABEL: while.header: ; preds = %while.latch, %entry +; CHECK-NEXT: switch i32 undef, label %exit [ +; CHECK-NEXT: i32 11, label %while.body1 +; CHECK-NEXT: i32 5, label %while.body2 +; CHECK-NEXT: ] + +; CHECK-LABEL: while.body1: ; preds = %while.header +; CHECK-NEXT: unreachable + +; CHECK-LABEL: while.body2: ; preds = %while.header +; CHECK-NEXT: br label %while.latch + +; CHECK-LABEL: while.latch: ; preds = %while.body2 +; CHECK-NEXT: br label %while.header + +; CHECK-LABEL: exit: ; preds = %while.header +; CHECK-NEXT: ret void +; +entry: + br label %while.header + +while.header: ; preds = %while.latch, %entry + switch i32 undef, label %exit [ + i32 11, label %while.body1 + i32 5, label %while.body2 + ] + +while.body1: ; preds = %while.header + unreachable + +while.body2: ; preds = %while.header + br label %while.latch + +while.latch: ; preds = %while.body2 + br label %while.header + +exit: ; preds = %while.header + ret void +}