diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index fe7a287657b00..64dc8604e76ac 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -22,7 +22,6 @@ #include "AMDGPURegBankSelect.h" #include "AMDGPUTargetObjectFile.h" #include "AMDGPUTargetTransformInfo.h" -#include "AMDGPUUnifyDivergentExitNodes.h" #include "GCNIterativeScheduler.h" #include "GCNSchedStrategy.h" #include "GCNVOPDUtils.h" @@ -656,10 +655,6 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) { PM.addPass(AMDGPUPromoteKernelArgumentsPass()); return true; } - if (PassName == "amdgpu-unify-divergent-exit-nodes") { - PM.addPass(AMDGPUUnifyDivergentExitNodesPass()); - return true; - } return false; }); diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 5d59ee47ec430..5f204f5be51ae 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -19,7 +19,6 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUUnifyDivergentExitNodes.h" #include "AMDGPU.h" #include "SIDefines.h" #include "llvm/ADT/ArrayRef.h" @@ -54,33 +53,25 @@ using namespace llvm; namespace { -class AMDGPUUnifyDivergentExitNodesImpl { +class AMDGPUUnifyDivergentExitNodes : public FunctionPass { private: const TargetTransformInfo *TTI = nullptr; public: - AMDGPUUnifyDivergentExitNodesImpl() = delete; - AMDGPUUnifyDivergentExitNodesImpl(const TargetTransformInfo *TTI) - : TTI(TTI) {} + static char ID; // Pass identification, replacement for typeid + + AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) { + initializeAMDGPUUnifyDivergentExitNodesPass(*PassRegistry::getPassRegistry()); + } // We can preserve non-critical-edgeness when we unify function exit nodes + void getAnalysisUsage(AnalysisUsage &AU) const override; BasicBlock *unifyReturnBlockSet(Function &F, DomTreeUpdater &DTU, ArrayRef ReturningBlocks, StringRef Name); - bool run(Function &F, DominatorTree &DT, const PostDominatorTree &PDT, - const UniformityInfo &UA); -}; - -class AMDGPUUnifyDivergentExitNodes : public FunctionPass { -public: - static char ID; - AMDGPUUnifyDivergentExitNodes() : FunctionPass(ID) { - initializeAMDGPUUnifyDivergentExitNodesPass( - *PassRegistry::getPassRegistry()); - } - void getAnalysisUsage(AnalysisUsage &AU) const override; bool runOnFunction(Function &F) override; }; + } // end anonymous namespace char AMDGPUUnifyDivergentExitNodes::ID = 0; @@ -88,14 +79,14 @@ char AMDGPUUnifyDivergentExitNodes::ID = 0; char &llvm::AMDGPUUnifyDivergentExitNodesID = AMDGPUUnifyDivergentExitNodes::ID; INITIALIZE_PASS_BEGIN(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, - "Unify divergent function exit nodes", false, false) + "Unify divergent function exit nodes", false, false) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass) INITIALIZE_PASS_END(AMDGPUUnifyDivergentExitNodes, DEBUG_TYPE, "Unify divergent function exit nodes", false, false) -void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const { +void AMDGPUUnifyDivergentExitNodes::getAnalysisUsage(AnalysisUsage &AU) const{ if (RequireAndPreserveDomTree) AU.addRequired(); @@ -141,7 +132,7 @@ static bool isUniformlyReached(const UniformityInfo &UA, BasicBlock &BB) { return true; } -BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet( +BasicBlock *AMDGPUUnifyDivergentExitNodes::unifyReturnBlockSet( Function &F, DomTreeUpdater &DTU, ArrayRef ReturningBlocks, StringRef Name) { // Otherwise, we need to insert a new basic block into the function, add a PHI @@ -189,14 +180,21 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet( return NewRetBlock; } -bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree &DT, - const PostDominatorTree &PDT, - const UniformityInfo &UA) { +bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { + DominatorTree *DT = nullptr; + if (RequireAndPreserveDomTree) + DT = &getAnalysis().getDomTree(); + + auto &PDT = getAnalysis().getPostDomTree(); if (PDT.root_size() == 0 || (PDT.root_size() == 1 && !isa(PDT.getRoot()->getTerminator()))) return false; + UniformityInfo &UA = + getAnalysis().getUniformityInfo(); + TTI = &getAnalysis().getTTI(F); + // Loop over all of the blocks in a function, tracking all of the blocks that // return. SmallVector ReturningBlocks; @@ -329,30 +327,3 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree &DT, unifyReturnBlockSet(F, DTU, ReturningBlocks, "UnifiedReturnBlock"); return true; } - -bool AMDGPUUnifyDivergentExitNodes::runOnFunction(Function &F) { - DominatorTree *DT = nullptr; - if (RequireAndPreserveDomTree) - DT = &getAnalysis().getDomTree(); - const auto &PDT = - getAnalysis().getPostDomTree(); - const auto &UA = getAnalysis().getUniformityInfo(); - const auto *TranformInfo = - &getAnalysis().getTTI(F); - return AMDGPUUnifyDivergentExitNodesImpl(TranformInfo).run(F, *DT, PDT, UA); -} - -PreservedAnalyses -AMDGPUUnifyDivergentExitNodesPass::run(Function &F, - FunctionAnalysisManager &AM) { - DominatorTree *DT = nullptr; - if (RequireAndPreserveDomTree) - DT = &AM.getResult(F); - - const auto &PDT = AM.getResult(F); - const auto &UA = AM.getResult(F); - const auto *TransformInfo = &AM.getResult(F); - return AMDGPUUnifyDivergentExitNodesImpl(TransformInfo).run(F, *DT, PDT, UA) - ? PreservedAnalyses::none() - : PreservedAnalyses::all(); -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h deleted file mode 100644 index e58925bc01d9e..0000000000000 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.h +++ /dev/null @@ -1,31 +0,0 @@ -//===- AMDGPUUnifyDivergentExitNodes.h ------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This is a variant of the UnifyFunctionExitNodes pass. Rather than ensuring -// there is at most one ret and one unreachable instruction, it ensures there is -// at most one divergent exiting block. -// -// StructurizeCFG can't deal with multi-exit regions formed by branches to -// multiple return nodes. It is not desirable to structurize regions with -// uniform branches, so unifying those to the same return block as divergent -// branches inhibits use of scalar branching. It still can't deal with the case -// where one branch goes to return, and one unreachable. Replace unreachable in -// this case with a return. -// -//===----------------------------------------------------------------------===// - -#include "AMDGPU.h" - -namespace llvm { -class AMDGPUUnifyDivergentExitNodesPass - : public PassInfoMixin { -public: - PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); -}; - -} // end namespace llvm diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll index 13f8eff94f86b..58d5dc20d5ac5 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll @@ -1,48 +1,36 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt -mtriple=amdgcn-amd-amdhsa -p simplifycfg,amdgpu-unify-divergent-exit-nodes %s -S -o - | FileCheck %s --check-prefix=OPT -; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=ISA +; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s define void @nested_inf_loop(i1 %0, i1 %1) { -; OPT-LABEL: @nested_inf_loop( -; OPT-NEXT: BB: -; OPT-NEXT: br label [[BB1:%.*]] -; OPT: BB1: -; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0:%.*]], i1 true, i1 [[TMP1:%.*]] -; OPT-NEXT: br i1 [[BRMERGE]], label [[BB1]], label [[INFLOOP:%.*]] -; OPT: infloop: -; OPT-NEXT: br i1 true, label [[INFLOOP]], label [[DUMMYRETURNBLOCK:%.*]] -; OPT: DummyReturnBlock: -; OPT-NEXT: ret void -; -; ISA-LABEL: nested_inf_loop: -; ISA-NEXT: %bb.0: ; %BB -; ISA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ISA-NEXT: v_and_b32_e32 v1, 1, v1 -; ISA-NEXT: v_and_b32_e32 v0, 1, v0 -; ISA-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1 -; ISA-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; ISA-NEXT: s_xor_b64 s[6:7], vcc, -1 -; ISA-NEXT: s_mov_b64 s[8:9], 0 -; ISA-NEXT: .LBB0_1: ; %BB1 -; ISA: s_and_b64 s[10:11], exec, s[6:7] -; ISA-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] -; ISA-NEXT: s_andn2_b64 exec, exec, s[8:9] -; ISA-NEXT: s_cbranch_execnz .LBB0_1 -; ISA-NEXT: %bb.2: ; %BB2 -; ISA: s_or_b64 exec, exec, s[8:9] -; ISA-NEXT: s_mov_b64 s[8:9], 0 -; ISA-NEXT: .LBB0_3: ; %BB4 -; ISA: s_and_b64 s[10:11], exec, s[4:5] -; ISA-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] -; ISA-NEXT: s_andn2_b64 exec, exec, s[8:9] -; ISA-NEXT: s_cbranch_execnz .LBB0_3 -; ISA-NEXT: %bb.4: ; %loop.exit.guard -; ISA: s_or_b64 exec, exec, s[8:9] -; ISA-NEXT: s_mov_b64 vcc, 0 -; ISA-NEXT: s_mov_b64 s[8:9], 0 -; ISA-NEXT: s_branch .LBB0_1 -; ISA-NEXT: %bb.5: ; %DummyReturnBlock -; ISA-NEXT: s_setpc_b64 s[30:31] +; CHECK-LABEL: nested_inf_loop: +; CHECK-NEXT: %bb.0: ; %BB +; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; CHECK-NEXT: v_and_b32_e32 v1, 1, v1 +; CHECK-NEXT: v_and_b32_e32 v0, 1, v0 +; CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1 +; CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 +; CHECK-NEXT: s_xor_b64 s[6:7], vcc, -1 +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: .LBB0_1: ; %BB1 +; CHECK: s_and_b64 s[10:11], exec, s[6:7] +; CHECK-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_cbranch_execnz .LBB0_1 +; CHECK-NEXT: %bb.2: ; %BB2 +; CHECK: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: .LBB0_3: ; %BB4 +; CHECK: s_and_b64 s[10:11], exec, s[4:5] +; CHECK-NEXT: s_or_b64 s[8:9], s[10:11], s[8:9] +; CHECK-NEXT: s_andn2_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_cbranch_execnz .LBB0_3 +; CHECK-NEXT: %bb.4: ; %loop.exit.guard +; CHECK: s_or_b64 exec, exec, s[8:9] +; CHECK-NEXT: s_mov_b64 vcc, 0 +; CHECK-NEXT: s_mov_b64 s[8:9], 0 +; CHECK-NEXT: s_branch .LBB0_1 +; CHECK-NEXT: %bb.5: ; %DummyReturnBlock +; CHECK-NEXT: s_setpc_b64 s[30:31] BB: br label %BB1