diff --git a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h index 1ac0f053f67b5..e6dde450b7df9 100644 --- a/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/llvm/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -702,6 +702,9 @@ BasicBlock *CreateControlFlowHub( // successors void InvertBranch(BranchInst *PBI, IRBuilderBase &Builder); +// Check whether the function only has simple terminator: +// br/brcond/unreachable/ret +bool hasOnlySimpleTerminator(const Function &F); } // end namespace llvm #endif // LLVM_TRANSFORMS_UTILS_BASICBLOCKUTILS_H diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 9ad841c3c8a54..9bc3ba161c9eb 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -46,6 +46,7 @@ #include "llvm/Support/Casting.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -114,8 +115,6 @@ void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const { // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); - // This is a cluster of orthogonal Transforms - AU.addPreservedID(LowerSwitchID); FunctionPass::getAnalysisUsage(AU); AU.addRequired(); @@ -192,6 +191,8 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet( bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT, const PostDominatorTree &PDT, const UniformityInfo &UA) { + assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); + if (PDT.root_size() == 0 || (PDT.root_size() == 1 && !isa(PDT.getRoot()->getTerminator()))) diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index fac5695c7beaf..7d96a3478858b 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -42,6 +42,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/SSAUpdater.h" #include @@ -353,7 +354,6 @@ class StructurizeCFGLegacyPass : public RegionPass { void getAnalysisUsage(AnalysisUsage &AU) const override { if (SkipUniformRegions) AU.addRequired(); - AU.addRequiredID(LowerSwitchID); AU.addRequired(); AU.addPreserved(); @@ -368,7 +368,6 @@ char StructurizeCFGLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg", "Structurize the CFG", false, false) INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg", @@ -1173,6 +1172,8 @@ bool StructurizeCFG::run(Region *R, DominatorTree *DT) { this->DT = DT; Func = R->getEntry()->getParent(); + assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); + ParentRegion = R; orderNodes(); diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp index 389aab39dec34..05ff4efb7b944 100644 --- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -2076,3 +2076,13 @@ void llvm::InvertBranch(BranchInst *PBI, IRBuilderBase &Builder) { PBI->setCondition(NewCond); PBI->swapSuccessors(); } + +bool llvm::hasOnlySimpleTerminator(const Function &F) { + for (auto &BB : F) { + auto *Term = BB.getTerminator(); + if (!(isa(Term) || isa(Term) || + isa(Term))) + return false; + } + return true; +} diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp index dda236167363d..11e24d0585be4 100644 --- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp +++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp @@ -87,10 +87,8 @@ struct FixIrreducible : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(LowerSwitchID); AU.addRequired(); AU.addRequired(); - AU.addPreservedID(LowerSwitchID); AU.addPreserved(); AU.addPreserved(); } @@ -106,7 +104,6 @@ FunctionPass *llvm::createFixIrreduciblePass() { return new FixIrreducible(); } INITIALIZE_PASS_BEGIN(FixIrreducible, "fix-irreducible", "Convert irreducible control-flow into natural loops", false /* Only looks at CFG */, false /* Analysis Pass */) -INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(FixIrreducible, "fix-irreducible", @@ -317,6 +314,8 @@ static bool FixIrreducibleImpl(Function &F, LoopInfo &LI, DominatorTree &DT) { LLVM_DEBUG(dbgs() << "===== Fix irreducible control-flow in function: " << F.getName() << "\n"); + assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); + bool Changed = false; SmallVector WorkList; diff --git a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp index 2b706858cbedd..e5adbe5133cf4 100644 --- a/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp +++ b/llvm/lib/Transforms/Utils/UnifyFunctionExitNodes.cpp @@ -39,8 +39,6 @@ void UnifyFunctionExitNodesLegacyPass::getAnalysisUsage( AnalysisUsage &AU) const { // We preserve the non-critical-edgeness property AU.addPreservedID(BreakCriticalEdgesID); - // This is a cluster of orthogonal Transforms - AU.addPreservedID(LowerSwitchID); } namespace { diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp index 8c781f59ff5a4..2f37f7f972cbb 100644 --- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp +++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp @@ -44,10 +44,8 @@ struct UnifyLoopExitsLegacyPass : public FunctionPass { } void getAnalysisUsage(AnalysisUsage &AU) const override { - AU.addRequiredID(LowerSwitchID); AU.addRequired(); AU.addRequired(); - AU.addPreservedID(LowerSwitchID); AU.addPreserved(); AU.addPreserved(); } @@ -65,7 +63,6 @@ FunctionPass *llvm::createUnifyLoopExitsPass() { INITIALIZE_PASS_BEGIN(UnifyLoopExitsLegacyPass, "unify-loop-exits", "Fixup each natural loop to have a single exit block", false /* Only looks at CFG */, false /* Analysis Pass */) -INITIALIZE_PASS_DEPENDENCY(LowerSwitchLegacyPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) INITIALIZE_PASS_END(UnifyLoopExitsLegacyPass, "unify-loop-exits", @@ -234,6 +231,8 @@ bool UnifyLoopExitsLegacyPass::runOnFunction(Function &F) { auto &LI = getAnalysis().getLoopInfo(); auto &DT = getAnalysis().getDomTree(); + assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); + return runImpl(LI, DT); } diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index b939c8d2e339d..bf2f6e983b529 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -60,8 +60,6 @@ ; GCN-O0-NEXT: Cycle Info Analysis ; GCN-O0-NEXT: Uniformity Analysis ; GCN-O0-NEXT: Unify divergent function exit nodes -; GCN-O0-NEXT: Lazy Value Information Analysis -; GCN-O0-NEXT: Lower SwitchInst's to branches ; GCN-O0-NEXT: Dominator Tree Construction ; GCN-O0-NEXT: Natural Loop Information ; GCN-O0-NEXT: Convert irreducible control-flow into natural loops @@ -251,8 +249,6 @@ ; GCN-O1-NEXT: Code sinking ; GCN-O1-NEXT: Post-Dominator Tree Construction ; GCN-O1-NEXT: Unify divergent function exit nodes -; GCN-O1-NEXT: Lazy Value Information Analysis -; GCN-O1-NEXT: Lower SwitchInst's to branches ; GCN-O1-NEXT: Dominator Tree Construction ; GCN-O1-NEXT: Natural Loop Information ; GCN-O1-NEXT: Convert irreducible control-flow into natural loops @@ -537,8 +533,6 @@ ; GCN-O1-OPTS-NEXT: Code sinking ; GCN-O1-OPTS-NEXT: Post-Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Unify divergent function exit nodes -; GCN-O1-OPTS-NEXT: Lazy Value Information Analysis -; GCN-O1-OPTS-NEXT: Lower SwitchInst's to branches ; GCN-O1-OPTS-NEXT: Dominator Tree Construction ; GCN-O1-OPTS-NEXT: Natural Loop Information ; GCN-O1-OPTS-NEXT: Convert irreducible control-flow into natural loops @@ -841,8 +835,6 @@ ; GCN-O2-NEXT: Code sinking ; GCN-O2-NEXT: Post-Dominator Tree Construction ; GCN-O2-NEXT: Unify divergent function exit nodes -; GCN-O2-NEXT: Lazy Value Information Analysis -; GCN-O2-NEXT: Lower SwitchInst's to branches ; GCN-O2-NEXT: Dominator Tree Construction ; GCN-O2-NEXT: Natural Loop Information ; GCN-O2-NEXT: Convert irreducible control-flow into natural loops @@ -1159,8 +1151,6 @@ ; GCN-O3-NEXT: Code sinking ; GCN-O3-NEXT: Post-Dominator Tree Construction ; GCN-O3-NEXT: Unify divergent function exit nodes -; GCN-O3-NEXT: Lazy Value Information Analysis -; GCN-O3-NEXT: Lower SwitchInst's to branches ; GCN-O3-NEXT: Dominator Tree Construction ; GCN-O3-NEXT: Natural Loop Information ; GCN-O3-NEXT: Convert irreducible control-flow into natural loops diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll index f58bed3b98e97..0a68820f902a1 100644 --- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll +++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll @@ -1,5 +1,5 @@ -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s -; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx600 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s +; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1100 -mattr=-wavefrontsize32,+wavefrontsize64 -S -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -simplifycfg-require-and-preserve-domtree=1 %s | FileCheck -check-prefix=IR %s ; RUN: llc -march=amdgcn -verify-machineinstrs -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck -check-prefix=GCN %s ; Add an extra verifier runs. There were some cases where invalid IR diff --git a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll index f7479c8475592..8d2a312b34632 100644 --- a/llvm/test/CodeGen/AMDGPU/multilevel-break.ll +++ b/llvm/test/CodeGen/AMDGPU/multilevel-break.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -S -mtriple=amdgcn-- -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s +; RUN: opt -S -mtriple=amdgcn-- -lowerswitch -structurizecfg -si-annotate-control-flow < %s | FileCheck -check-prefix=OPT %s ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s ; Ensure two if.break calls, for both the inner and outer loops diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll index 56d7fc335911e..1eef7b967f6d9 100644 --- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll +++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-return-unreachable.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s -; RUN: opt -mtriple=amdgcn-amd-amdhsa -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -lowerswitch -amdgpu-unify-divergent-exit-nodes -verify -structurizecfg -verify -si-annotate-control-flow -verify -S %s -o - | FileCheck -check-prefix=IR %s ; A test with a divergent unreachable block and uniform return block. The ; compiler needs to create a regions that includes them so that diff --git a/llvm/test/Transforms/FixIrreducible/switch.ll b/llvm/test/Transforms/FixIrreducible/switch.ll index efa4310912522..57b92c08ed311 100644 --- a/llvm/test/Transforms/FixIrreducible/switch.ll +++ b/llvm/test/Transforms/FixIrreducible/switch.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -fix-irreducible -S | FileCheck %s +; RUN: opt < %s -passes='lowerswitch,fix-irreducible' -S | FileCheck %s define void @loop_1(i32 %Value, i1 %PredEntry, i1 %PredD) { ; CHECK-LABEL: @loop_1( diff --git a/llvm/test/Transforms/StructurizeCFG/switch.ll b/llvm/test/Transforms/StructurizeCFG/switch.ll index 445c0ab0c2d85..153b6d1914e67 100644 --- a/llvm/test/Transforms/StructurizeCFG/switch.ll +++ b/llvm/test/Transforms/StructurizeCFG/switch.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -structurizecfg %s -o - | FileCheck %s +; RUN: opt -S -passes='lowerswitch,structurizecfg' %s -o - | FileCheck %s ; The structurizecfg pass cannot handle switch instructions, so we need to ; make sure the lower switch pass is always run before structurizecfg.