433 changes: 276 additions & 157 deletions llvm/lib/Target/WebAssembly/WebAssemblyFixIrreducibleControlFlow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,45 @@
//===----------------------------------------------------------------------===//
///
/// \file
/// This file implements a pass that transforms irreducible control flow
/// into reducible control flow. Irreducible control flow means multiple-entry
/// This file implements a pass that transforms irreducible control flow into
/// reducible control flow. Irreducible control flow means multiple-entry
/// loops; they appear as CFG cycles that are not recorded in MachineLoopInfo
/// due to being unnatural.
///
/// Note that LLVM has a generic pass that lowers irreducible control flow, but
/// it linearizes control flow, turning diamonds into two triangles, which is
/// both unnecessary and undesirable for WebAssembly.
///
/// TODO: The transformation implemented here handles all irreducible control
/// flow, without exponential code-size expansion, though it does so by creating
/// inefficient code in many cases. Ideally, we should add other
/// transformations, including code-duplicating cases, which can be more
/// efficient in common cases, and they can fall back to this conservative
/// implementation as needed.
/// The big picture: Ignoring natural loops (seeing them monolithically), we
/// find all the blocks which can return to themselves ("loopers"). Loopers
/// reachable from the non-loopers are loop entries: if there are 2 or more,
/// then we have irreducible control flow. We fix that as follows: a new block
/// is created that can dispatch to each of the loop entries, based on the
/// value of a label "helper" variable, and we replace direct branches to the
/// entries with assignments to the label variable and a branch to the dispatch
/// block. Then the dispatch block is the single entry in a new natural loop.
///
/// This is similar to what the Relooper [1] does, both identify looping code
/// that requires multiple entries, and resolve it in a similar way. In
/// Relooper terminology, we implement a Multiple shape in a Loop shape. Note
/// also that like the Relooper, we implement a "minimal" intervention: we only
/// use the "label" helper for the blocks we absolutely must and no others. We
/// also prioritize code size and do not perform node splitting (i.e. we don't
/// duplicate code in order to resolve irreducibility).
///
/// The difference between this code and the Relooper is that the Relooper also
/// generates ifs and loops and works in a recursive manner, knowing at each
/// point what the entries are, and recursively breaks down the problem. Here
/// we just want to resolve irreducible control flow, and we also want to use
/// as much LLVM infrastructure as possible. So we use the MachineLoopInfo to
/// identify natural loops, etc., and we start with the whole CFG and must
/// identify both the looping code and its entries.
///
/// [1] Alon Zakai. 2011. Emscripten: an LLVM-to-JavaScript compiler. In
/// Proceedings of the ACM international conference companion on Object oriented
/// programming systems languages and applications companion (SPLASH '11). ACM,
/// New York, NY, USA, 301-312. DOI=10.1145/2048147.2048224
/// http://doi.acm.org/10.1145/2048147.2048224
///
//===----------------------------------------------------------------------===//

Expand All @@ -45,142 +69,193 @@ using namespace llvm;

#define DEBUG_TYPE "wasm-fix-irreducible-control-flow"

namespace {
class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
StringRef getPassName() const override {
return "WebAssembly Fix Irreducible Control Flow";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}

bool runOnMachineFunction(MachineFunction &MF) override;

bool VisitLoop(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop);

public:
static char ID; // Pass identification, replacement for typeid
WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
};
} // end anonymous namespace

char WebAssemblyFixIrreducibleControlFlow::ID = 0;
INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
"Removes irreducible control flow", false, false)

FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
return new WebAssemblyFixIrreducibleControlFlow();
}

namespace {

/// A utility for walking the blocks of a loop, handling a nested inner
/// loop as a monolithic conceptual block.
class MetaBlock {
MachineBasicBlock *Block;
SmallVector<MachineBasicBlock *, 2> Preds;
SmallVector<MachineBasicBlock *, 2> Succs;

class LoopFixer {
public:
explicit MetaBlock(MachineBasicBlock *MBB)
: Block(MBB), Preds(MBB->pred_begin(), MBB->pred_end()),
Succs(MBB->succ_begin(), MBB->succ_end()) {}

explicit MetaBlock(MachineLoop *Loop) : Block(Loop->getHeader()) {
Loop->getExitBlocks(Succs);
for (MachineBasicBlock *Pred : Block->predecessors())
if (!Loop->contains(Pred))
Preds.push_back(Pred);
LoopFixer(MachineFunction &MF, MachineLoopInfo &MLI, MachineLoop *Loop)
: MF(MF), MLI(MLI), Loop(Loop) {}

// Run the fixer on the given inputs. Returns whether changes were made.
bool run();

private:
MachineFunction &MF;
MachineLoopInfo &MLI;
MachineLoop *Loop;

MachineBasicBlock *Header;
SmallPtrSet<MachineBasicBlock *, 4> LoopBlocks;

using BlockSet = SmallPtrSet<MachineBasicBlock *, 4>;
DenseMap<MachineBasicBlock *, BlockSet> Reachable;

// The worklist contains pairs of recent additions, (a, b), where we just
// added a link a => b.
using BlockPair = std::pair<MachineBasicBlock *, MachineBasicBlock *>;
SmallVector<BlockPair, 4> WorkList;

// Get a canonical block to represent a block or a loop: the block, or if in
// an inner loop, the loop header, of it in an outer loop scope, we can
// ignore it. We need to call this on all blocks we work on.
MachineBasicBlock *canonicalize(MachineBasicBlock *MBB) {
MachineLoop *InnerLoop = MLI.getLoopFor(MBB);
if (InnerLoop == Loop) {
return MBB;
} else {
// This is either in an outer or an inner loop, and not in ours.
if (!LoopBlocks.count(MBB)) {
// It's in outer code, ignore it.
return nullptr;
}
assert(InnerLoop);
// It's in an inner loop, canonicalize it to the header of that loop.
return InnerLoop->getHeader();
}
}

MachineBasicBlock *getBlock() const { return Block; }

const SmallVectorImpl<MachineBasicBlock *> &predecessors() const {
return Preds;
}
const SmallVectorImpl<MachineBasicBlock *> &successors() const {
return Succs;
// For a successor we can additionally ignore it if it's a branch back to a
// natural loop top, as when we are in the scope of a loop, we just care
// about internal irreducibility, and can ignore the loop we are in. We need
// to call this on all blocks in a context where they are a successor.
MachineBasicBlock *canonicalizeSuccessor(MachineBasicBlock *MBB) {
if (Loop && MBB == Loop->getHeader()) {
// Ignore branches going to the loop's natural header.
return nullptr;
}
return canonicalize(MBB);
}

bool operator==(const MetaBlock &MBB) { return Block == MBB.Block; }
bool operator!=(const MetaBlock &MBB) { return Block != MBB.Block; }
// Potentially insert a new reachable edge, and if so, note it as further
// work.
void maybeInsert(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
assert(MBB == canonicalize(MBB));
assert(Succ);
// Succ may not be interesting as a sucessor.
Succ = canonicalizeSuccessor(Succ);
if (!Succ)
return;
if (Reachable[MBB].insert(Succ).second) {
// For there to be further work, it means that we have
// X => MBB => Succ
// for some other X, and in that case X => Succ would be a new edge for
// us to discover later. However, if we don't care about MBB as a
// successor, then we don't care about that anyhow.
if (canonicalizeSuccessor(MBB)) {
WorkList.emplace_back(MBB, Succ);
}
}
}
};

class SuccessorList final : public MetaBlock {
size_t Index;
size_t Num;
bool LoopFixer::run() {
Header = Loop ? Loop->getHeader() : &*MF.begin();

public:
explicit SuccessorList(MachineBasicBlock *MBB)
: MetaBlock(MBB), Index(0), Num(successors().size()) {}
// Identify all the blocks in this loop scope.
if (Loop) {
for (auto *MBB : Loop->getBlocks()) {
LoopBlocks.insert(MBB);
}
} else {
for (auto &MBB : MF) {
LoopBlocks.insert(&MBB);
}
}

explicit SuccessorList(MachineLoop *Loop)
: MetaBlock(Loop), Index(0), Num(successors().size()) {}
// Compute which (canonicalized) blocks each block can reach.

bool HasNext() const { return Index != Num; }
// Add all the initial work.
for (auto *MBB : LoopBlocks) {
MachineLoop *InnerLoop = MLI.getLoopFor(MBB);

MachineBasicBlock *Next() {
assert(HasNext());
return successors()[Index++];
if (InnerLoop == Loop) {
for (auto *Succ : MBB->successors()) {
maybeInsert(MBB, Succ);
}
} else {
// It can't be in an outer loop - we loop on LoopBlocks - and so it must
// be an inner loop.
assert(InnerLoop);
// Check if we are the canonical block for this loop.
if (canonicalize(MBB) != MBB) {
continue;
}
// The successors are those of the loop.
SmallVector<MachineBasicBlock *, 2> ExitBlocks;
InnerLoop->getExitBlocks(ExitBlocks);
for (auto *Succ : ExitBlocks) {
maybeInsert(MBB, Succ);
}
}
}
};

} // end anonymous namespace
// Do work until we are all done.
while (!WorkList.empty()) {
MachineBasicBlock *MBB;
MachineBasicBlock *Succ;
std::tie(MBB, Succ) = WorkList.pop_back_val();
// The worklist item is an edge we just added, so it must have valid blocks
// (and not something canonicalized to nullptr).
assert(MBB);
assert(Succ);
// The successor in that pair must also be a valid successor.
assert(MBB == canonicalizeSuccessor(MBB));
// We recently added MBB => Succ, and that means we may have enabled
// Pred => MBB => Succ. Check all the predecessors. Note that our loop here
// is correct for both a block and a block representing a loop, as the loop
// is natural and so the predecessors are all predecessors of the loop
// header, which is the block we have here.
for (auto *Pred : MBB->predecessors()) {
// Canonicalize, make sure it's relevant, and check it's not the same
// block (an update to the block itself doesn't help compute that same
// block).
Pred = canonicalize(Pred);
if (Pred && Pred != MBB) {
maybeInsert(Pred, Succ);
}
}
}

bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
MachineLoopInfo &MLI,
MachineLoop *Loop) {
MachineBasicBlock *Header = Loop ? Loop->getHeader() : &*MF.begin();
SetVector<MachineBasicBlock *> RewriteSuccs;

// DFS through Loop's body, looking for irreducible control flow. Loop is
// natural, and we stay in its body, and we treat any nested loops
// monolithically, so any cycles we encounter indicate irreducibility.
SmallPtrSet<MachineBasicBlock *, 8> OnStack;
SmallPtrSet<MachineBasicBlock *, 8> Visited;
SmallVector<SuccessorList, 4> LoopWorklist;
LoopWorklist.push_back(SuccessorList(Header));
OnStack.insert(Header);
Visited.insert(Header);
while (!LoopWorklist.empty()) {
SuccessorList &Top = LoopWorklist.back();
if (Top.HasNext()) {
MachineBasicBlock *Next = Top.Next();
if (Next == Header || (Loop && !Loop->contains(Next)))
continue;
if (LLVM_LIKELY(OnStack.insert(Next).second)) {
if (!Visited.insert(Next).second) {
OnStack.erase(Next);
continue;
}
MachineLoop *InnerLoop = MLI.getLoopFor(Next);
if (InnerLoop != Loop)
LoopWorklist.push_back(SuccessorList(InnerLoop));
else
LoopWorklist.push_back(SuccessorList(Next));
} else {
RewriteSuccs.insert(Top.getBlock());
// It's now trivial to identify the loopers.
SmallPtrSet<MachineBasicBlock *, 4> Loopers;
for (auto MBB : LoopBlocks) {
if (Reachable[MBB].count(MBB)) {
Loopers.insert(MBB);
}
}
// The header cannot be a looper. At the toplevel, LLVM does not allow the
// entry to be in a loop, and in a natural loop we should ignore the header.
assert(Loopers.count(Header) == 0);

// Find the entries, loopers reachable from non-loopers.
SmallPtrSet<MachineBasicBlock *, 4> Entries;
SmallVector<MachineBasicBlock *, 4> SortedEntries;
for (auto *Looper : Loopers) {
for (auto *Pred : Looper->predecessors()) {
Pred = canonicalize(Pred);
if (Pred && !Loopers.count(Pred)) {
Entries.insert(Looper);
SortedEntries.push_back(Looper);
break;
}
continue;
}
OnStack.erase(Top.getBlock());
LoopWorklist.pop_back();
}

// Most likely, we didn't find any irreducible control flow.
if (LLVM_LIKELY(RewriteSuccs.empty()))
// Check if we found irreducible control flow.
if (LLVM_LIKELY(Entries.size() <= 1))
return false;

LLVM_DEBUG(dbgs() << "Irreducible control flow detected!\n");

// Ok. We have irreducible control flow! Create a dispatch block which will
// contains a jump table to any block in the problematic set of blocks.
// Sort the entries to ensure a deterministic build.
llvm::sort(SortedEntries,
[&](const MachineBasicBlock *A, const MachineBasicBlock *B) {
auto ANum = A->getNumber();
auto BNum = B->getNumber();
assert(ANum != -1 && BNum != -1);
assert(ANum != BNum);
return ANum < BNum;
});

// Create a dispatch block which will contain a jump table to the entries.
MachineBasicBlock *Dispatch = MF.CreateMachineBasicBlock();
MF.insert(MF.end(), Dispatch);
MLI.changeLoopFor(Dispatch, Loop);
Expand All @@ -196,43 +271,43 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
unsigned Reg = MRI.createVirtualRegister(&WebAssembly::I32RegClass);
MIB.addReg(Reg);

// Collect all the blocks which need to have their successors rewritten,
// add the successors to the jump table, and remember their index.
// Compute the indices in the superheader, one for each bad block, and
// add them as successors.
DenseMap<MachineBasicBlock *, unsigned> Indices;
SmallVector<MachineBasicBlock *, 4> SuccWorklist(RewriteSuccs.begin(),
RewriteSuccs.end());
while (!SuccWorklist.empty()) {
MachineBasicBlock *MBB = SuccWorklist.pop_back_val();
for (auto *MBB : SortedEntries) {
auto Pair = Indices.insert(std::make_pair(MBB, 0));
if (!Pair.second)
if (!Pair.second) {
continue;
}

unsigned Index = MIB.getInstr()->getNumExplicitOperands() - 1;
LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << " has index " << Index
<< "\n");

Pair.first->second = Index;
for (auto Pred : MBB->predecessors())
RewriteSuccs.insert(Pred);

MIB.addMBB(MBB);
Dispatch->addSuccessor(MBB);
}

// Rewrite the problematic successors for every block that wants to reach the
// bad blocks. For simplicity, we just introduce a new block for every edge
// we need to rewrite. (Fancier things are possible.)

MetaBlock Meta(MBB);
for (auto *Succ : Meta.successors())
if (Succ != Header && (!Loop || Loop->contains(Succ)))
SuccWorklist.push_back(Succ);
SmallVector<MachineBasicBlock *, 4> AllPreds;
for (auto *MBB : SortedEntries) {
for (auto *Pred : MBB->predecessors()) {
if (Pred != Dispatch) {
AllPreds.push_back(Pred);
}
}
}

// Rewrite the problematic successors for every block in RewriteSuccs.
// For simplicity, we just introduce a new block for every edge we need to
// rewrite. Fancier things are possible.
for (MachineBasicBlock *MBB : RewriteSuccs) {
for (MachineBasicBlock *MBB : AllPreds) {
DenseMap<MachineBasicBlock *, MachineBasicBlock *> Map;
for (auto *Succ : MBB->successors()) {
if (!Indices.count(Succ))
if (!Entries.count(Succ)) {
continue;
}

// This is a successor we need to rewrite.
MachineBasicBlock *Split = MF.CreateMachineBasicBlock();
MF.insert(MBB->isLayoutSuccessor(Succ) ? MachineFunction::iterator(Succ)
: MF.end(),
Expand Down Expand Up @@ -266,6 +341,55 @@ bool WebAssemblyFixIrreducibleControlFlow::VisitLoop(MachineFunction &MF,
return true;
}

class WebAssemblyFixIrreducibleControlFlow final : public MachineFunctionPass {
StringRef getPassName() const override {
return "WebAssembly Fix Irreducible Control Flow";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
AU.addRequired<MachineLoopInfo>();
AU.addPreserved<MachineLoopInfo>();
MachineFunctionPass::getAnalysisUsage(AU);
}

bool runOnMachineFunction(MachineFunction &MF) override;

bool runIteration(MachineFunction &MF, MachineLoopInfo &MLI) {
// Visit the function body, which is identified as a null loop.
if (LoopFixer(MF, MLI, nullptr).run()) {
return true;
}

// Visit all the loops.
SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
while (!Worklist.empty()) {
MachineLoop *Loop = Worklist.pop_back_val();
Worklist.append(Loop->begin(), Loop->end());
if (LoopFixer(MF, MLI, Loop).run()) {
return true;
}
}

return false;
}

public:
static char ID; // Pass identification, replacement for typeid
WebAssemblyFixIrreducibleControlFlow() : MachineFunctionPass(ID) {}
};
} // end anonymous namespace

char WebAssemblyFixIrreducibleControlFlow::ID = 0;
INITIALIZE_PASS(WebAssemblyFixIrreducibleControlFlow, DEBUG_TYPE,
"Removes irreducible control flow", false, false)

FunctionPass *llvm::createWebAssemblyFixIrreducibleControlFlow() {
return new WebAssemblyFixIrreducibleControlFlow();
}

bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
MachineFunction &MF) {
LLVM_DEBUG(dbgs() << "********** Fixing Irreducible Control Flow **********\n"
Expand All @@ -275,24 +399,19 @@ bool WebAssemblyFixIrreducibleControlFlow::runOnMachineFunction(
bool Changed = false;
auto &MLI = getAnalysis<MachineLoopInfo>();

// Visit the function body, which is identified as a null loop.
Changed |= VisitLoop(MF, MLI, nullptr);

// Visit all the loops.
SmallVector<MachineLoop *, 8> Worklist(MLI.begin(), MLI.end());
while (!Worklist.empty()) {
MachineLoop *CurLoop = Worklist.pop_back_val();
Worklist.append(CurLoop->begin(), CurLoop->end());
Changed |= VisitLoop(MF, MLI, CurLoop);
}

// If we made any changes, completely recompute everything.
if (LLVM_UNLIKELY(Changed)) {
LLVM_DEBUG(dbgs() << "Recomputing dominators and loops.\n");
// When we modify something, bail out and recompute MLI, then start again, as
// we create a new natural loop when we resolve irreducible control flow, and
// other loops may become nested in it, etc. In practice this is not an issue
// because irreducible control flow is rare, only very few cycles are needed
// here.
while (LLVM_UNLIKELY(runIteration(MF, MLI))) {
// We rewrote part of the function; recompute MLI and start again.
LLVM_DEBUG(dbgs() << "Recomputing loops.\n");
MF.getRegInfo().invalidateLiveness();
MF.RenumberBlocks();
getAnalysis<MachineDominatorTree>().runOnMachineFunction(MF);
MLI.runOnMachineFunction(MF);
Changed = true;
}

return Changed;
Expand Down
108 changes: 108 additions & 0 deletions llvm/test/CodeGen/WebAssembly/irreducible-cfg-exceptions.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers -enable-emscripten-cxx-exceptions | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

declare i32 @__gxx_personality_v0(...)

; Check an interesting case of complex control flow due to exceptions CFG rewriting.
; There should *not* be any irreducible control flow here.

; CHECK-LABEL: crashy:
; CHECK-NOT: br_table

; Function Attrs: minsize noinline optsize
define void @crashy() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
entry:
invoke void undef()
to label %invoke.cont unwind label %lpad

invoke.cont: ; preds = %entry
invoke void undef()
to label %invoke.cont4 unwind label %lpad3

invoke.cont4: ; preds = %invoke.cont
%call.i82 = invoke i8* undef()
to label %invoke.cont6 unwind label %lpad3

invoke.cont6: ; preds = %invoke.cont4
invoke void undef()
to label %invoke.cont13 unwind label %lpad12

invoke.cont13: ; preds = %invoke.cont6
br label %for.cond

for.cond: ; preds = %for.cond.backedge, %invoke.cont13
br i1 undef, label %exit2, label %land.lhs

land.lhs: ; preds = %for.cond
%call.i.i.i.i92 = invoke i32 undef()
to label %exit1 unwind label %lpad16.loopexit

exit1: ; preds = %land.lhs
br label %exit2

exit2: ; preds = %exit1, %for.cond
%call.i.i12.i.i93 = invoke i32 undef()
to label %exit3 unwind label %lpad16.loopexit

exit3: ; preds = %exit2
invoke void undef()
to label %invoke.cont23 unwind label %lpad22

invoke.cont23: ; preds = %exit3
invoke void undef()
to label %invoke.cont25 unwind label %lpad22

invoke.cont25: ; preds = %invoke.cont23
%call.i.i137 = invoke i32 undef()
to label %invoke.cont29 unwind label %lpad16.loopexit

lpad: ; preds = %entry
%0 = landingpad { i8*, i32 }
cleanup
unreachable

lpad3: ; preds = %invoke.cont4, %invoke.cont
%1 = landingpad { i8*, i32 }
cleanup
unreachable

lpad12: ; preds = %invoke.cont6
%2 = landingpad { i8*, i32 }
cleanup
resume { i8*, i32 } undef

lpad16.loopexit: ; preds = %if.then, %invoke.cont29, %invoke.cont25, %exit2, %land.lhs
%lpad.loopexit = landingpad { i8*, i32 }
cleanup
unreachable

lpad22: ; preds = %invoke.cont23, %exit3
%3 = landingpad { i8*, i32 }
cleanup
unreachable

invoke.cont29: ; preds = %invoke.cont25
invoke void undef()
to label %invoke.cont33 unwind label %lpad16.loopexit

invoke.cont33: ; preds = %invoke.cont29
br label %for.inc

for.inc: ; preds = %invoke.cont33
%cmp.i.i141 = icmp eq i8* undef, undef
br i1 %cmp.i.i141, label %if.then, label %if.end.i.i146

if.then: ; preds = %for.inc
%call.i.i148 = invoke i32 undef()
to label %for.cond.backedge unwind label %lpad16.loopexit

for.cond.backedge: ; preds = %if.end.i.i146, %if.then
br label %for.cond

if.end.i.i146: ; preds = %for.inc
call void undef()
br label %for.cond.backedge
}

63 changes: 63 additions & 0 deletions llvm/test/CodeGen/WebAssembly/irreducible-cfg-nested.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"


; Test an interesting pattern of nested irreducibility.
; Just check we resolve all the irreducibility here (if not we'd crash).

; CHECK-LABEL: tre_parse:

define void @tre_parse() {
entry:
br label %for.cond.outer

for.cond.outer: ; preds = %do.body14, %entry
br label %for.cond

for.cond: ; preds = %for.cond.backedge, %for.cond.outer
%nbranch.0 = phi i32* [ null, %for.cond.outer ], [ %call188, %for.cond.backedge ]
switch i8 undef, label %if.else [
i8 40, label %do.body14
i8 41, label %if.then63
]

do.body14: ; preds = %for.cond
br label %for.cond.outer

if.then63: ; preds = %for.cond
unreachable

if.else: ; preds = %for.cond
switch i8 undef, label %if.then84 [
i8 92, label %if.end101
i8 42, label %if.end101
]

if.then84: ; preds = %if.else
switch i8 undef, label %cleanup.thread [
i8 43, label %if.end101
i8 63, label %if.end101
i8 123, label %if.end101
]

if.end101: ; preds = %if.then84, %if.then84, %if.then84, %if.else, %if.else
unreachable

cleanup.thread: ; preds = %if.then84
%call188 = tail call i32* undef(i32* %nbranch.0)
switch i8 undef, label %for.cond.backedge [
i8 92, label %land.lhs.true208
i8 0, label %if.else252
]

land.lhs.true208: ; preds = %cleanup.thread
unreachable

for.cond.backedge: ; preds = %cleanup.thread
br label %for.cond

if.else252: ; preds = %cleanup.thread
unreachable
}
39 changes: 39 additions & 0 deletions llvm/test/CodeGen/WebAssembly/irreducible-cfg-nested2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-block-placement -wasm-disable-explicit-locals -wasm-keep-registers | FileCheck %s

target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
target triple = "wasm32-unknown-unknown"

; Test an interesting pattern of nested irreducibility.
; Just check we resolve all the irreducibility here (if not we'd crash).

; CHECK-LABEL: func_2:

; Function Attrs: noinline nounwind optnone
define void @func_2() {
entry:
br i1 undef, label %lbl_937, label %if.else787

lbl_937: ; preds = %for.body978, %entry
br label %if.end965

if.else787: ; preds = %entry
br label %if.end965

if.end965: ; preds = %if.else787, %lbl_937
br label %for.cond967

for.cond967: ; preds = %for.end1035, %if.end965
br label %for.cond975

for.cond975: ; preds = %if.end984, %for.cond967
br i1 undef, label %for.body978, label %for.end1035

for.body978: ; preds = %for.cond975
br i1 undef, label %lbl_937, label %if.end984

if.end984: ; preds = %for.body978
br label %for.cond975

for.end1035: ; preds = %for.cond975
br label %for.cond967
}
129 changes: 127 additions & 2 deletions llvm/test/CodeGen/WebAssembly/irreducible-cfg.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ target triple = "wasm32-unknown-unknown"

; CHECK-LABEL: test0:
; CHECK: f64.load
; CHECK: i32.const $[[REG:[^,]+]]=, 0{{$}}
; CHECK: i32.const $[[REG:[^,]+]]=
; CHECK: br_table $[[REG]],
define void @test0(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
bb:
Expand Down Expand Up @@ -50,7 +50,7 @@ bb19:

; CHECK-LABEL: test1:
; CHECK: f64.load
; CHECK: i32.const $[[REG:[^,]+]]=, 0{{$}}
; CHECK: i32.const $[[REG:[^,]+]]=
; CHECK: br_table $[[REG]],
define void @test1(double* %arg, i32 %arg1, i32 %arg2, i32 %arg3) {
bb:
Expand Down Expand Up @@ -92,3 +92,128 @@ bb13:
bb19:
ret void
}

; A simple loop 2 blocks that are both entries.

; CHECK-LABEL: test2:
; CHECK: br_if
; CHECK: i32.const $[[REG:[^,]+]]=
; CHECK: br_table $[[REG]],
define internal i32 @test2(i32) noinline {
entry:
br label %A0

A0:
%a0a = tail call i32 @test2(i32 1)
%a0b = icmp eq i32 %a0a, 0
br i1 %a0b, label %A1, label %A2

A1:
%a1a = tail call i32 @test2(i32 2)
%a1b = icmp eq i32 %a1a, 0
br i1 %a1b, label %A1, label %A2

A2:
%a2a = tail call i32 @test2(i32 3)
%a2b = icmp eq i32 %a2a, 0
br i1 %a2b, label %A1, label %A2
}

; An interesting loop with inner loop and if-else structure too.

; CHECK-LABEL: test3:
; CHECK: br_if
define void @test3(i32 %ws) {
entry:
%ws.addr = alloca i32, align 4
store volatile i32 %ws, i32* %ws.addr, align 4
%0 = load volatile i32, i32* %ws.addr, align 4
%tobool = icmp ne i32 %0, 0
br i1 %tobool, label %if.then, label %if.end

if.then: ; preds = %entry
br label %wynn

if.end: ; preds = %entry
%1 = load volatile i32, i32* %ws.addr, align 4
%tobool1 = icmp ne i32 %1, 0
br i1 %tobool1, label %if.end9, label %if.then2

if.then2: ; preds = %if.end
br label %for.cond

for.cond: ; preds = %wynn, %if.then7, %if.then2
%2 = load volatile i32, i32* %ws.addr, align 4
%tobool3 = icmp ne i32 %2, 0
br i1 %tobool3, label %if.then4, label %if.end5

if.then4: ; preds = %for.cond
br label %if.end5

if.end5: ; preds = %if.then4, %for.cond
%3 = load volatile i32, i32* %ws.addr, align 4
%tobool6 = icmp ne i32 %3, 0
br i1 %tobool6, label %if.then7, label %if.end8

if.then7: ; preds = %if.end5
br label %for.cond

if.end8: ; preds = %if.end5
br label %wynn

wynn: ; preds = %if.end8, %if.then
br label %for.cond

if.end9: ; preds = %if.end
ret void
}

; Multi-level irreducibility, after reducing in the main scope we must then
; reduce in the inner loop that we just created.
; CHECK: br_table
; CHECK: br_table
define void @pi_next() {
entry:
br i1 undef, label %sw.bb5, label %return

sw.bb5: ; preds = %entry
br i1 undef, label %if.then.i49, label %if.else.i52

if.then.i49: ; preds = %sw.bb5
br label %for.inc197.i

if.else.i52: ; preds = %sw.bb5
br label %for.cond57.i

for.cond57.i: ; preds = %for.inc205.i, %if.else.i52
store i32 0, i32* undef, align 4
br label %for.cond65.i

for.cond65.i: ; preds = %for.inc201.i, %for.cond57.i
br i1 undef, label %for.body70.i, label %for.inc205.i

for.body70.i: ; preds = %for.cond65.i
br label %for.cond76.i

for.cond76.i: ; preds = %for.inc197.i, %for.body70.i
%0 = phi i32 [ %inc199.i, %for.inc197.i ], [ 0, %for.body70.i ]
%cmp81.i = icmp slt i32 %0, 0
br i1 %cmp81.i, label %for.body82.i, label %for.inc201.i

for.body82.i: ; preds = %for.cond76.i
br label %for.inc197.i

for.inc197.i: ; preds = %for.body82.i, %if.then.i49
%inc199.i = add nsw i32 undef, 1
br label %for.cond76.i

for.inc201.i: ; preds = %for.cond76.i
br label %for.cond65.i

for.inc205.i: ; preds = %for.cond65.i
br label %for.cond57.i

return: ; preds = %entry
ret void
}