329 changes: 237 additions & 92 deletions llvm/lib/Transforms/Scalar/SimpleLoopUnswitch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,12 +77,12 @@ static cl::opt<int>
/// which have the exact same opcode and finds all inputs which are loop
/// invariant. For some operations these can be re-associated and unswitched out
/// of the loop entirely.
static SmallVector<Value *, 4>
static TinyPtrVector<Value *>
collectHomogenousInstGraphLoopInvariants(Loop &L, Instruction &Root,
LoopInfo &LI) {
SmallVector<Value *, 4> Invariants;
assert(!L.isLoopInvariant(&Root) &&
"Only need to walk the graph if root itself is not invariant.");
TinyPtrVector<Value *> Invariants;

// Build a worklist and recurse through operators collecting invariants.
SmallVector<Instruction *, 4> Worklist;
Expand Down Expand Up @@ -150,6 +150,26 @@ static bool areLoopExitPHIsLoopInvariant(Loop &L, BasicBlock &ExitingBB,
llvm_unreachable("Basic blocks should never be empty!");
}

/// Insert code to test a set of loop invariant values, and conditionally branch
/// on them.
static void buildPartialUnswitchConditionalBranch(BasicBlock &BB,
ArrayRef<Value *> Invariants,
bool Direction,
BasicBlock &UnswitchedSucc,
BasicBlock &NormalSucc) {
IRBuilder<> IRB(&BB);
Value *Cond = Invariants.front();
for (Value *Invariant :
make_range(std::next(Invariants.begin()), Invariants.end()))
if (Direction)
Cond = IRB.CreateOr(Cond, Invariant);
else
Cond = IRB.CreateAnd(Cond, Invariant);

IRB.CreateCondBr(Cond, Direction ? &UnswitchedSucc : &NormalSucc,
Direction ? &NormalSucc : &UnswitchedSucc);
}

/// Rewrite the PHI nodes in an unswitched loop exit basic block.
///
/// Requires that the loop exit and unswitched basic block are the same, and
Expand Down Expand Up @@ -239,7 +259,7 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
LLVM_DEBUG(dbgs() << " Trying to unswitch branch: " << BI << "\n");

// The loop invariant values that we want to unswitch.
SmallVector<Value *, 4> Invariants;
TinyPtrVector<Value *> Invariants;

// When true, we're fully unswitching the branch rather than just unswitching
// some input conditions to the branch.
Expand Down Expand Up @@ -336,8 +356,6 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
} else {
// Only unswitching a subset of inputs to the condition, so we will need to
// build a new branch that merges the invariant inputs.
IRBuilder<> IRB(OldPH);
Value *Cond = Invariants.front();
if (ExitDirection)
assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
Instruction::Or &&
Expand All @@ -346,17 +364,8 @@ static bool unswitchTrivialBranch(Loop &L, BranchInst &BI, DominatorTree &DT,
assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
Instruction::And &&
"Must have an `and` of `i1`s for the condition!");
for (Value *Invariant :
make_range(std::next(Invariants.begin()), Invariants.end()))
if (ExitDirection)
Cond = IRB.CreateOr(Cond, Invariant);
else
Cond = IRB.CreateAnd(Cond, Invariant);

BasicBlock *Succs[2];
Succs[LoopExitSuccIdx] = UnswitchedBB;
Succs[1 - LoopExitSuccIdx] = NewPH;
IRB.CreateCondBr(Cond, Succs[0], Succs[1]);
buildPartialUnswitchConditionalBranch(*OldPH, Invariants, ExitDirection,
*UnswitchedBB, *NewPH);
}

// Rewrite the relevant PHI nodes.
Expand Down Expand Up @@ -1584,16 +1593,38 @@ void visitDomSubTree(DominatorTree &DT, BasicBlock *BB, CallableT Callable) {
/// Once unswitching has been performed it runs the provided callback to report
/// the new loops and no-longer valid loops to the caller.
static bool unswitchInvariantBranch(
Loop &L, BranchInst &BI, DominatorTree &DT, LoopInfo &LI,
AssumptionCache &AC,
Loop &L, BranchInst &BI, ArrayRef<Value *> Invariants, DominatorTree &DT,
LoopInfo &LI, AssumptionCache &AC,
function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
assert(BI.isConditional() && "Can only unswitch a conditional branch!");
assert(L.isLoopInvariant(BI.getCondition()) &&
"Can only unswitch an invariant branch condition!");

// Constant and BBs tracking the cloned and continuing successor.
const int ClonedSucc = 0;
auto *ParentBB = BI.getParent();

// We can only unswitch conditional branches with an invariant condition or
// combining invariant conditions with an instruction.
assert(BI.isConditional() && "Can only unswitch a conditional branch!");
bool FullUnswitch = BI.getCondition() == Invariants[0];
if (FullUnswitch)
assert(Invariants.size() == 1 &&
"Cannot have other invariants with full unswitching!");
else
assert(isa<Instruction>(BI.getCondition()) &&
"Partial unswitching requires an instruction as the condition!");

// Constant and BBs tracking the cloned and continuing successor. When we are
// unswitching the entire condition, this can just be trivially chosen to
// unswitch towards `true`. However, when we are unswitching a set of
// invariants combined with `and` or `or`, the combining operation determines
// the best direction to unswitch: we want to unswitch the direction that will
// collapse the branch.
bool Direction = true;
int ClonedSucc = 0;
if (!FullUnswitch) {
if (cast<Instruction>(BI.getCondition())->getOpcode() != Instruction::Or) {
assert(cast<Instruction>(BI.getCondition())->getOpcode() == Instruction::And &&
"Only `or` and `and` instructions can combine invariants being unswitched.");
Direction = false;
ClonedSucc = 1;
}
}
auto *UnswitchedSuccBB = BI.getSuccessor(ClonedSucc);
auto *ContinueSuccBB = BI.getSuccessor(1 - ClonedSucc);

Expand Down Expand Up @@ -1651,15 +1682,17 @@ static bool unswitchInvariantBranch(
return true;
});
}
// Similarly, if the edge we *are* cloning in the unswitch (the unswitched
// edge) dominates its target, we will end up with dead nodes in the original
// loop and its exits that will need to be deleted. Here, we just retain that
// the property holds and will compute the deleted set later.
// If we are doing full unswitching, then similarly to the above, the edge we
// *are* cloning in the unswitch (the unswitched edge) dominates its target,
// we will end up with dead nodes in the original loop and its exits that will
// need to be deleted. Here, we just retain that the property holds and will
// compute the deleted set later.
bool DeleteUnswitchedSucc =
UnswitchedSuccBB->getUniquePredecessor() ||
llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
});
FullUnswitch &&
(UnswitchedSuccBB->getUniquePredecessor() ||
llvm::all_of(predecessors(UnswitchedSuccBB), [&](BasicBlock *PredBB) {
return PredBB == ParentBB || DT.dominates(UnswitchedSuccBB, PredBB);
}));

// Split the preheader, so that we know that there is a safe place to insert
// the conditional branch. We will change the preheader to have a conditional
Expand All @@ -1680,19 +1713,32 @@ static bool unswitchInvariantBranch(
L, LoopPH, SplitBB, ExitBlocks, ParentBB, UnswitchedSuccBB,
ContinueSuccBB, SkippedLoopAndExitBlocks, VMap, DTUpdates, AC, DT, LI);

// Remove the parent as a predecessor of the unswitched successor.
UnswitchedSuccBB->removePredecessor(ParentBB, /*DontDeleteUselessPHIs*/ true);

// Now splice the branch from the original loop and use it to select between
// the two loops.
// The stitching of the branched code back together depends on whether we're
// doing full unswitching or not with the exception that we always want to
// nuke the initial terminator placed in the split block.
SplitBB->getTerminator()->eraseFromParent();
SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
BI.setSuccessor(ClonedSucc, ClonedPH);
BI.setSuccessor(1 - ClonedSucc, LoopPH);

// Create a new unconditional branch to the continuing block (as opposed to
// the one cloned).
BranchInst::Create(ContinueSuccBB, ParentBB);
if (FullUnswitch) {
// Remove the parent as a predecessor of the
// unswitched successor.
UnswitchedSuccBB->removePredecessor(ParentBB,
/*DontDeleteUselessPHIs*/ true);
DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});

// Now splice the branch from the original loop and use it to select between
// the two loops.
SplitBB->getInstList().splice(SplitBB->end(), ParentBB->getInstList(), BI);
BI.setSuccessor(ClonedSucc, ClonedPH);
BI.setSuccessor(1 - ClonedSucc, LoopPH);

// Create a new unconditional branch to the continuing block (as opposed to
// the one cloned).
BranchInst::Create(ContinueSuccBB, ParentBB);
} else {
// When doing a partial unswitch, we have to do a bit more work to build up
// the branch in the split block.
buildPartialUnswitchConditionalBranch(*SplitBB, Invariants, Direction,
*ClonedPH, *LoopPH);
}

// Before we update the dominator tree, collect the dead blocks if we're going
// to end up deleting the unswitched successor.
Expand All @@ -1717,10 +1763,9 @@ static bool unswitchInvariantBranch(
}
}

// Add the remaining edges to our updates and apply them to get an up-to-date
// Add the remaining edge to our updates and apply them to get an up-to-date
// dominator tree. Note that this will cause the dead blocks above to be
// unreachable and no longer in the dominator tree.
DTUpdates.push_back({DominatorTree::Delete, ParentBB, UnswitchedSuccBB});
DTUpdates.push_back({DominatorTree::Insert, SplitBB, ClonedPH});
DT.applyUpdates(DTUpdates);

Expand All @@ -1745,6 +1790,32 @@ static bool unswitchInvariantBranch(
// verification steps.
assert(DT.verify(DominatorTree::VerificationLevel::Fast));

// Now we want to replace all the uses of the invariants within both the
// original and cloned blocks. We do this here so that we can use the now
// updated dominator tree to identify which side the users are on.
ConstantInt *UnswitchedReplacement =
Direction ? ConstantInt::getTrue(BI.getContext())
: ConstantInt::getFalse(BI.getContext());
ConstantInt *ContinueReplacement =
Direction ? ConstantInt::getFalse(BI.getContext())
: ConstantInt::getTrue(BI.getContext());
for (Value *Invariant : Invariants)
for (auto UI = Invariant->use_begin(), UE = Invariant->use_end();
UI != UE;) {
// Grab the use and walk past it so we can clobber it in the use list.
Use *U = &*UI++;
Instruction *UserI = dyn_cast<Instruction>(U->getUser());
if (!UserI)
continue;

// Replace it with the 'continue' side if in the main loop body, and the
// unswitched if in the cloned blocks.
if (DT.dominates(LoopPH, UserI->getParent()))
U->set(ContinueReplacement);
else if (DT.dominates(ClonedPH, UserI->getParent()))
U->set(UnswitchedReplacement);
}

// We can change which blocks are exit blocks of all the cloned sibling
// loops, the current loop, and any parent loops which shared exit blocks
// with the current loop. As a consequence, we need to re-form LCSSA for
Expand Down Expand Up @@ -1854,47 +1925,41 @@ computeDomSubtreeCost(DomTreeNode &N,
return Cost;
}

/// Unswitch control flow predicated on loop invariant conditions.
///
/// This first hoists all branches or switches which are trivial (IE, do not
/// require duplicating any part of the loop) out of the loop body. It then
/// looks at other loop invariant control flows and tries to unswitch those as
/// well by cloning the loop if the result is small enough.
static bool
unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
TargetTransformInfo &TTI, bool NonTrivial,
function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
static bool unswitchBestCondition(
Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
TargetTransformInfo &TTI,
function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
// Collect all invariant conditions within this loop (as opposed to an inner
// loop which would be handled when visiting that inner loop).
SmallVector<std::pair<TerminatorInst *, TinyPtrVector<Value *>>, 4>
UnswitchCandidates;
for (auto *BB : L.blocks()) {
if (LI.getLoopFor(BB) != &L)
continue;

// Must be in loop simplified form: we need a preheader and dedicated exits.
if (!L.isLoopSimplifyForm())
return false;
auto *BI = dyn_cast<BranchInst>(BB->getTerminator());
// FIXME: Handle switches here!
if (!BI || !BI->isConditional() || isa<Constant>(BI->getCondition()) ||
BI->getSuccessor(0) == BI->getSuccessor(1))
continue;

// Try trivial unswitch first before loop over other basic blocks in the loop.
if (unswitchAllTrivialConditions(L, DT, LI)) {
// If we unswitched successfully we will want to clean up the loop before
// processing it further so just mark it as unswitched and return.
UnswitchCB(/*CurrentLoopValid*/ true, {});
return true;
}
if (L.isLoopInvariant(BI->getCondition())) {
UnswitchCandidates.push_back({BI, {BI->getCondition()}});
continue;
}

// If we're not doing non-trivial unswitching, we're done. We both accept
// a parameter but also check a local flag that can be used for testing
// a debugging.
if (!NonTrivial && !EnableNonTrivialUnswitch)
return false;
Instruction &CondI = *cast<Instruction>(BI->getCondition());
if (CondI.getOpcode() != Instruction::And &&
CondI.getOpcode() != Instruction::Or)
continue;

// Collect all remaining invariant branch conditions within this loop (as
// opposed to an inner loop which would be handled when visiting that inner
// loop).
SmallVector<TerminatorInst *, 4> UnswitchCandidates;
for (auto *BB : L.blocks())
if (LI.getLoopFor(BB) == &L)
if (auto *BI = dyn_cast<BranchInst>(BB->getTerminator()))
if (BI->isConditional() && L.isLoopInvariant(BI->getCondition()) &&
BI->getSuccessor(0) != BI->getSuccessor(1))
UnswitchCandidates.push_back(BI);
TinyPtrVector<Value *> Invariants =
collectHomogenousInstGraphLoopInvariants(L, CondI, LI);
if (Invariants.empty())
continue;

UnswitchCandidates.push_back({BI, std::move(Invariants)});
}

// If we didn't find any candidates, we're done.
if (UnswitchCandidates.empty())
Expand Down Expand Up @@ -1968,8 +2033,8 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
SmallDenseMap<DomTreeNode *, int, 4> DTCostMap;
// Given a terminator which might be unswitched, computes the non-duplicated
// cost for that terminator.
auto ComputeUnswitchedCost = [&](TerminatorInst *TI) {
BasicBlock &BB = *TI->getParent();
auto ComputeUnswitchedCost = [&](TerminatorInst &TI, bool FullUnswitch) {
BasicBlock &BB = *TI.getParent();
SmallPtrSet<BasicBlock *, 4> Visited;

int Cost = LoopCost;
Expand All @@ -1978,6 +2043,26 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
if (!Visited.insert(SuccBB).second)
continue;

// If this is a partial unswitch candidate, then it must be a conditional
// branch with a condition of either `or` or `and`. In that case, one of
// the successors is necessarily duplicated, so don't even try to remove
// its cost.
if (!FullUnswitch) {
auto &BI = cast<BranchInst>(TI);
if (cast<Instruction>(BI.getCondition())->getOpcode() ==
Instruction::And) {
if (SuccBB == BI.getSuccessor(1))
continue;
} else {
assert(cast<Instruction>(BI.getCondition())->getOpcode() ==
Instruction::Or &&
"Only `and` and `or` conditions can result in a partial "
"unswitch!");
if (SuccBB == BI.getSuccessor(0))
continue;
}
}

// This successor's domtree will not need to be duplicated after
// unswitching if the edge to the successor dominates it (and thus the
// entire tree). This essentially means there is no other path into this
Expand All @@ -2001,13 +2086,20 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
};
TerminatorInst *BestUnswitchTI = nullptr;
int BestUnswitchCost;
for (TerminatorInst *CandidateTI : UnswitchCandidates) {
int CandidateCost = ComputeUnswitchedCost(CandidateTI);
ArrayRef<Value *> BestUnswitchInvariants;
for (auto &TerminatorAndInvariants : UnswitchCandidates) {
TerminatorInst &TI = *TerminatorAndInvariants.first;
ArrayRef<Value *> Invariants = TerminatorAndInvariants.second;
BranchInst *BI = dyn_cast<BranchInst>(&TI);
int CandidateCost =
ComputeUnswitchedCost(TI, /*FullUnswitch*/ Invariants.size() == 1 && BI &&
Invariants[0] == BI->getCondition());
LLVM_DEBUG(dbgs() << " Computed cost of " << CandidateCost
<< " for unswitch candidate: " << *CandidateTI << "\n");
<< " for unswitch candidate: " << TI << "\n");
if (!BestUnswitchTI || CandidateCost < BestUnswitchCost) {
BestUnswitchTI = CandidateTI;
BestUnswitchTI = &TI;
BestUnswitchCost = CandidateCost;
BestUnswitchInvariants = Invariants;
}
}

Expand All @@ -2017,13 +2109,66 @@ unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
return false;
}

auto *UnswitchBI = dyn_cast<BranchInst>(BestUnswitchTI);
if (!UnswitchBI) {
// FIXME: Add support for unswitching a switch here!
LLVM_DEBUG(dbgs() << "Cannot unswitch anything but a branch!\n");
return false;
}

LLVM_DEBUG(dbgs() << " Trying to unswitch non-trivial (cost = "
<< BestUnswitchCost << ") branch: " << *BestUnswitchTI
<< "\n");
return unswitchInvariantBranch(L, cast<BranchInst>(*BestUnswitchTI), DT, LI,
<< BestUnswitchCost << ") branch: " << *UnswitchBI << "\n");
return unswitchInvariantBranch(L, *UnswitchBI, BestUnswitchInvariants, DT, LI,
AC, UnswitchCB);
}

/// Unswitch control flow predicated on loop invariant conditions.
///
/// This first hoists all branches or switches which are trivial (IE, do not
/// require duplicating any part of the loop) out of the loop body. It then
/// looks at other loop invariant control flows and tries to unswitch those as
/// well by cloning the loop if the result is small enough.
static bool
unswitchLoop(Loop &L, DominatorTree &DT, LoopInfo &LI, AssumptionCache &AC,
TargetTransformInfo &TTI, bool NonTrivial,
function_ref<void(bool, ArrayRef<Loop *>)> UnswitchCB) {
assert(L.isRecursivelyLCSSAForm(DT, LI) &&
"Loops must be in LCSSA form before unswitching.");
bool Changed = false;

// Must be in loop simplified form: we need a preheader and dedicated exits.
if (!L.isLoopSimplifyForm())
return false;

// Try trivial unswitch first before loop over other basic blocks in the loop.
if (unswitchAllTrivialConditions(L, DT, LI)) {
// If we unswitched successfully we will want to clean up the loop before
// processing it further so just mark it as unswitched and return.
UnswitchCB(/*CurrentLoopValid*/ true, {});
return true;
}

// If we're not doing non-trivial unswitching, we're done. We both accept
// a parameter but also check a local flag that can be used for testing
// a debugging.
if (!NonTrivial && !EnableNonTrivialUnswitch)
return false;

// For non-trivial unswitching, because it often creates new loops, we rely on
// the pass manager to iterate on the loops rather than trying to immediately
// reach a fixed point. There is no substantial advantage to iterating
// internally, and if any of the new loops are simplified enough to contain
// trivial unswitching we want to prefer those.

// Try to unswitch the best invariant condition. We prefer this full unswitch to
// a partial unswitch when possible below the threshold.
if (unswitchBestCondition(L, DT, LI, AC, TTI, UnswitchCB))
return true;

// No other opportunities to unswitch.
return Changed;
}

PreservedAnalyses SimpleLoopUnswitchPass::run(Loop &L, LoopAnalysisManager &AM,
LoopStandardAnalysisResults &AR,
LPMUpdater &U) {
Expand Down
163 changes: 162 additions & 1 deletion llvm/test/Transforms/SimpleLoopUnswitch/nontrivial-unswitch.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2662,4 +2662,165 @@ loop_exit:
ret i32 0
; CHECK: loop_exit:
; CHECK-NEXT: ret
}
}

; Non-trivial partial loop unswitching of an invariant input to an 'or'.
define i32 @test25(i1* %ptr, i1 %cond) {
; CHECK-LABEL: @test25(
entry:
br label %loop_begin
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 %cond, label %entry.split.us, label %entry.split

loop_begin:
%v1 = load i1, i1* %ptr
%cond_or = or i1 %v1, %cond
br i1 %cond_or, label %loop_a, label %loop_b

loop_a:
call void @a()
br label %latch
; The 'loop_a' unswitched loop.
;
; CHECK: entry.split.us:
; CHECK-NEXT: br label %loop_begin.us
;
; CHECK: loop_begin.us:
; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr
; CHECK-NEXT: %[[OR_US:.*]] = or i1 %[[V1_US]], true
; CHECK-NEXT: br label %loop_a.us
;
; CHECK: loop_a.us:
; CHECK-NEXT: call void @a()
; CHECK-NEXT: br label %latch.us
;
; CHECK: latch.us:
; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr
; CHECK-NEXT: br i1 %[[V2_US]], label %loop_begin.us, label %loop_exit.split.us
;
; CHECK: loop_exit.split.us:
; CHECK-NEXT: br label %loop_exit

loop_b:
call void @b()
br label %latch
; The original loop.
;
; CHECK: entry.split:
; CHECK-NEXT: br label %loop_begin
;
; CHECK: loop_begin:
; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr
; CHECK-NEXT: %[[OR:.*]] = or i1 %[[V1]], false
; CHECK-NEXT: br i1 %[[OR]], label %loop_a, label %loop_b
;
; CHECK: loop_a:
; CHECK-NEXT: call void @a()
; CHECK-NEXT: br label %latch
;
; CHECK: loop_b:
; CHECK-NEXT: call void @b()
; CHECK-NEXT: br label %latch

latch:
%v2 = load i1, i1* %ptr
br i1 %v2, label %loop_begin, label %loop_exit
; CHECK: latch:
; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr
; CHECK-NEXT: br i1 %[[V2]], label %loop_begin, label %loop_exit.split

loop_exit:
ret i32 0
; CHECK: loop_exit.split:
; CHECK-NEXT: br label %loop_exit
;
; CHECK: loop_exit:
; CHECK-NEXT: ret
}

; Non-trivial partial loop unswitching of multiple invariant inputs to an `and`
; chain.
define i32 @test26(i1* %ptr1, i1* %ptr2, i1* %ptr3, i1 %cond1, i1 %cond2, i1 %cond3) {
; CHECK-LABEL: @test26(
entry:
br label %loop_begin
; CHECK-NEXT: entry:
; CHECK-NEXT: %[[INV_AND:.*]] = and i1 %cond3, %cond1
; CHECK-NEXT: br i1 %[[INV_AND]], label %entry.split, label %entry.split.us

loop_begin:
%v1 = load i1, i1* %ptr1
%v2 = load i1, i1* %ptr2
%cond_and1 = and i1 %v1, %cond1
%cond_or1 = or i1 %v2, %cond2
%cond_and2 = and i1 %cond_and1, %cond_or1
%cond_and3 = and i1 %cond_and2, %cond3
br i1 %cond_and3, label %loop_a, label %loop_b
; The 'loop_b' unswitched loop.
;
; CHECK: entry.split.us:
; CHECK-NEXT: br label %loop_begin.us
;
; CHECK: loop_begin.us:
; CHECK-NEXT: %[[V1_US:.*]] = load i1, i1* %ptr1
; CHECK-NEXT: %[[V2_US:.*]] = load i1, i1* %ptr2
; CHECK-NEXT: %[[AND1_US:.*]] = and i1 %[[V1_US]], false
; CHECK-NEXT: %[[OR1_US:.*]] = or i1 %[[V2_US]], %cond2
; CHECK-NEXT: %[[AND2_US:.*]] = and i1 %[[AND1_US]], %[[OR1_US]]
; CHECK-NEXT: %[[AND3_US:.*]] = and i1 %[[AND2_US]], false
; CHECK-NEXT: br label %loop_b.us
;
; CHECK: loop_b.us:
; CHECK-NEXT: call void @b()
; CHECK-NEXT: br label %latch.us
;
; CHECK: latch.us:
; CHECK-NEXT: %[[V3_US:.*]] = load i1, i1* %ptr3
; CHECK-NEXT: br i1 %[[V3_US]], label %loop_begin.us, label %loop_exit.split.us
;
; CHECK: loop_exit.split.us:
; CHECK-NEXT: br label %loop_exit

; The original loop.
;
; CHECK: entry.split:
; CHECK-NEXT: br label %loop_begin
;
; CHECK: loop_begin:
; CHECK-NEXT: %[[V1:.*]] = load i1, i1* %ptr1
; CHECK-NEXT: %[[V2:.*]] = load i1, i1* %ptr2
; CHECK-NEXT: %[[AND1:.*]] = and i1 %[[V1]], true
; CHECK-NEXT: %[[OR1:.*]] = or i1 %[[V2]], %cond2
; CHECK-NEXT: %[[AND2:.*]] = and i1 %[[AND1]], %[[OR1]]
; CHECK-NEXT: %[[AND3:.*]] = and i1 %[[AND2]], true
; CHECK-NEXT: br i1 %[[AND3]], label %loop_a, label %loop_b

loop_a:
call void @a()
br label %latch
; CHECK: loop_a:
; CHECK-NEXT: call void @a()
; CHECK-NEXT: br label %latch

loop_b:
call void @b()
br label %latch
; CHECK: loop_b:
; CHECK-NEXT: call void @b()
; CHECK-NEXT: br label %latch

latch:
%v3 = load i1, i1* %ptr3
br i1 %v3, label %loop_begin, label %loop_exit
; CHECK: latch:
; CHECK-NEXT: %[[V3:.*]] = load i1, i1* %ptr3
; CHECK-NEXT: br i1 %[[V3]], label %loop_begin, label %loop_exit.split

loop_exit:
ret i32 0
; CHECK: loop_exit.split:
; CHECK-NEXT: br label %loop_exit
;
; CHECK: loop_exit:
; CHECK-NEXT: ret
}