diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp index 706237b906cc3..ddf9a24eb5230 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp @@ -181,52 +181,14 @@ BasicBlock *AMDGPUUnifyDivergentExitNodesImpl::unifyReturnBlockSet( return NewRetBlock; } -static BasicBlock * -createDummyReturnBlock(Function &F, - SmallVector &ReturningBlocks) { - BasicBlock *DummyReturnBB = - BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F); - Type *RetTy = F.getReturnType(); - Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); - ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); - ReturningBlocks.push_back(DummyReturnBB); - return DummyReturnBB; -} - -/// Handle conditional branch instructions (-> 2 targets) and callbr -/// instructions with N targets. -static void handleNBranch(Function &F, BasicBlock *BB, Instruction *BI, - BasicBlock *DummyReturnBB, - std::vector &Updates) { - SmallVector Successors(successors(BB)); - - // Create a new transition block to hold the conditional branch. - BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); - - Updates.reserve(Updates.size() + 2 * Successors.size() + 2); - - // 'Successors' become successors of TransitionBB instead of BB, - // and TransitionBB becomes a single successor of BB. - Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); - for (BasicBlock *Successor : Successors) { - Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); - Updates.emplace_back(DominatorTree::Delete, BB, Successor); - } - - // Create a branch that will always branch to the transition block and - // references DummyReturnBB. - BB->getTerminator()->eraseFromParent(); - BranchInst::Create(TransitionBB, DummyReturnBB, - ConstantInt::getTrue(F.getContext()), BB); - Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); -} - bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT, const PostDominatorTree &PDT, const UniformityInfo &UA) { + assert(hasOnlySimpleTerminator(F) && "Unsupported block terminator."); + if (PDT.root_size() == 0 || (PDT.root_size() == 1 && - !isa(PDT.getRoot()->getTerminator()))) + !isa(PDT.getRoot()->getTerminator()))) return false; // Loop over all of the blocks in a function, tracking all of the blocks that @@ -260,27 +222,46 @@ bool AMDGPUUnifyDivergentExitNodesImpl::run(Function &F, DominatorTree *DT, if (HasDivergentExitBlock) UnreachableBlocks.push_back(BB); } else if (BranchInst *BI = dyn_cast(BB->getTerminator())) { - if (!DummyReturnBB) - DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks); + + ConstantInt *BoolTrue = ConstantInt::getTrue(F.getContext()); + if (DummyReturnBB == nullptr) { + DummyReturnBB = + BasicBlock::Create(F.getContext(), "DummyReturnBlock", &F); + Type *RetTy = F.getReturnType(); + Value *RetVal = RetTy->isVoidTy() ? nullptr : PoisonValue::get(RetTy); + ReturnInst::Create(F.getContext(), RetVal, DummyReturnBB); + ReturningBlocks.push_back(DummyReturnBB); + } if (BI->isUnconditional()) { BasicBlock *LoopHeaderBB = BI->getSuccessor(0); BI->eraseFromParent(); // Delete the unconditional branch. // Add a new conditional branch with a dummy edge to the return block. - BranchInst::Create(LoopHeaderBB, DummyReturnBB, - ConstantInt::getTrue(F.getContext()), BB); + BranchInst::Create(LoopHeaderBB, DummyReturnBB, BoolTrue, BB); + Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); + } else { // Conditional branch. + SmallVector Successors(successors(BB)); + + // Create a new transition block to hold the conditional branch. + BasicBlock *TransitionBB = BB->splitBasicBlock(BI, "TransitionBlock"); + + Updates.reserve(Updates.size() + 2 * Successors.size() + 2); + + // 'Successors' become successors of TransitionBB instead of BB, + // and TransitionBB becomes a single successor of BB. + Updates.emplace_back(DominatorTree::Insert, BB, TransitionBB); + for (BasicBlock *Successor : Successors) { + Updates.emplace_back(DominatorTree::Insert, TransitionBB, Successor); + Updates.emplace_back(DominatorTree::Delete, BB, Successor); + } + + // Create a branch that will always branch to the transition block and + // references DummyReturnBB. + BB->getTerminator()->eraseFromParent(); + BranchInst::Create(TransitionBB, DummyReturnBB, BoolTrue, BB); Updates.emplace_back(DominatorTree::Insert, BB, DummyReturnBB); - } else { - handleNBranch(F, BB, BI, DummyReturnBB, Updates); } Changed = true; - } else if (CallBrInst *CBI = dyn_cast(BB->getTerminator())) { - if (!DummyReturnBB) - DummyReturnBB = createDummyReturnBlock(F, ReturningBlocks); - - handleNBranch(F, BB, CBI, DummyReturnBB, Updates); - } else { - llvm_unreachable("unsupported block terminator"); } } diff --git a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp index 0a8f5ea2fdae1..5f6f66a4bc213 100644 --- a/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp +++ b/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp @@ -558,10 +558,11 @@ void StructurizeCFG::analyzeLoops(RegionNode *N) { } else { // Test for successors as back edge BasicBlock *BB = N->getNodeAs(); - if (BranchInst *Term = dyn_cast(BB->getTerminator())) - for (BasicBlock *Succ : Term->successors()) - if (Visited.count(Succ)) - Loops[Succ] = BB; + BranchInst *Term = cast(BB->getTerminator()); + + for (BasicBlock *Succ : Term->successors()) + if (Visited.count(Succ)) + Loops[Succ] = BB; } } @@ -593,7 +594,7 @@ void StructurizeCFG::gatherPredicates(RegionNode *N) { for (BasicBlock *P : predecessors(BB)) { // Ignore it if it's a branch from outside into our region entry - if (!ParentRegion->contains(P) || !dyn_cast(P->getTerminator())) + if (!ParentRegion->contains(P)) continue; Region *R = RI->getRegionFor(P); @@ -1401,17 +1402,13 @@ bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) { /// Run the transformation for each region found bool StructurizeCFG::run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI) { - // CallBr and its corresponding direct target blocks are for now ignored by - // this pass. This is not a limitation for the currently intended uses cases - // of callbr in the AMDGPU backend. - // Parent and child regions are not affected by this (current) restriction. - // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details. - if (R->isTopLevelRegion() || isa(R->getEntry()->getTerminator())) + if (R->isTopLevelRegion()) return false; this->DT = DT; this->TTI = TTI; Func = R->getEntry()->getParent(); + assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator."); ParentRegion = R; diff --git a/llvm/test/CodeGen/AMDGPU/callbr.ll b/llvm/test/CodeGen/AMDGPU/callbr.ll deleted file mode 100644 index 253a6ec100eae..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/callbr.ll +++ /dev/null @@ -1,54 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -mcpu=gfx90a < %s | FileCheck %s - -define void @callbr_inline_asm(ptr %src, ptr %dst1, ptr %dst2, i32 %c) { -; CHECK-LABEL: callbr_inline_asm: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: flat_load_dword v0, v[0:1] -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: v_cmp_gt_i32 vcc v6, 42; s_cbranch_vccnz .LBB0_2 -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: ; %bb.1: ; %fallthrough -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: flat_store_dword v[2:3], v0 -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] -; CHECK-NEXT: .LBB0_2: ; Inline asm indirect target -; CHECK-NEXT: ; %indirect -; CHECK-NEXT: ; Label of block must be emitted -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: flat_store_dword v[4:5], v0 -; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) -; CHECK-NEXT: s_setpc_b64 s[30:31] - %a = load i32, ptr %src, align 4 - callbr void asm "v_cmp_gt_i32 vcc $0, 42; s_cbranch_vccnz ${1:l}", "r,!i"(i32 %c) to label %fallthrough [label %indirect] -fallthrough: - store i32 %a, ptr %dst1, align 4 - br label %ret -indirect: - store i32 %a, ptr %dst2, align 4 - br label %ret -ret: - ret void -} - -define void @callbr_self_loop(i1 %c) { -; CHECK-LABEL: callbr_self_loop: -; CHECK: ; %bb.0: -; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; CHECK-NEXT: .LBB1_1: ; %callbr -; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_branch .LBB1_1 -; CHECK-NEXT: .LBB1_2: ; Inline asm indirect target -; CHECK-NEXT: ; %callbr.target.ret -; CHECK-NEXT: ; Label of block must be emitted -; CHECK-NEXT: s_setpc_b64 s[30:31] - br label %callbr -callbr: - callbr void asm "", "!i"() to label %callbr [label %ret] -ret: - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll index 076a99ff8588f..007e3f0a6bdbc 100644 --- a/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll +++ b/llvm/test/CodeGen/AMDGPU/do-not-unify-divergent-exit-nodes-with-musttail.ll @@ -3,7 +3,6 @@ declare void @foo(ptr) declare i1 @bar(ptr) -declare i32 @bar32(ptr) define void @musttail_call_without_return_value(ptr %p) { ; CHECK-LABEL: define void @musttail_call_without_return_value( @@ -29,31 +28,6 @@ bb.1: ret void } -define void @musttail_call_without_return_value_callbr(ptr %p) { -; CHECK-LABEL: define void @musttail_call_without_return_value_callbr( -; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1 -; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]]) -; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1] -; CHECK: [[BB_0]]: -; CHECK-NEXT: musttail call void @foo(ptr [[P]]) -; CHECK-NEXT: ret void -; CHECK: [[BB_1:.*:]] -; CHECK-NEXT: ret void -; -entry: - %load = load i32, ptr %p, align 1 - callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1] - -bb.0: - musttail call void @foo(ptr %p) - ret void - -bb.1: - ret void -} - define i1 @musttail_call_with_return_value(ptr %p) { ; CHECK-LABEL: define i1 @musttail_call_with_return_value( ; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { @@ -77,28 +51,3 @@ bb.0: bb.1: ret i1 %load } - -define i32 @musttail_call_with_return_value_callbr(ptr %p) { -; CHECK-LABEL: define i32 @musttail_call_with_return_value_callbr( -; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[ENTRY:.*:]] -; CHECK-NEXT: [[LOAD:%.*]] = load i32, ptr [[P]], align 1 -; CHECK-NEXT: callbr void asm "", "r,!i"(i32 [[LOAD]]) -; CHECK-NEXT: to label %[[BB_0:.*]] [label %bb.1] -; CHECK: [[BB_0]]: -; CHECK-NEXT: [[RET:%.*]] = musttail call i32 @bar32(ptr [[P]]) -; CHECK-NEXT: ret i32 [[RET]] -; CHECK: [[BB_1:.*:]] -; CHECK-NEXT: ret i32 [[LOAD]] -; -entry: - %load = load i32, ptr %p, align 1 - callbr void asm "", "r,!i"(i32 %load) to label %bb.0 [label %bb.1] - -bb.0: - %ret = musttail call i32 @bar32(ptr %p) - ret i32 %ret - -bb.1: - ret i32 %load -} diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll index df635925b87df..3e2e43faca5aa 100644 --- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll +++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll @@ -36,60 +36,26 @@ loop: br label %loop } -define amdgpu_kernel void @infinite_loop_callbr(ptr addrspace(1) %out) { -; SI-LABEL: infinite_loop_callbr: -; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 -; SI-NEXT: ;;#ASMSTART -; SI-NEXT: ;;#ASMEND -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: s_endpgm -; IR-LABEL: @infinite_loop_callbr( -; IR-NEXT: entry: -; IR-NEXT: callbr void asm "", ""() -; IR-NEXT: to label [[LOOP:%.*]] [] -; IR: loop: -; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 -; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]] -; IR: TransitionBlock: -; IR-NEXT: callbr void asm "", ""() -; IR-NEXT: to label [[LOOP]] [] -; IR: DummyReturnBlock: -; IR-NEXT: ret void -; -entry: - callbr void asm "", ""() to label %loop [] - -loop: - store volatile i32 999, ptr addrspace(1) %out, align 4 - callbr void asm "", ""() to label %loop [] -} - define amdgpu_kernel void @infinite_loop_ret(ptr addrspace(1) %out) { ; SI-LABEL: infinite_loop_ret: ; SI: ; %bb.0: ; %entry ; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc -; SI-NEXT: s_cbranch_execz .LBB2_3 +; SI-NEXT: s_cbranch_execz .LBB1_3 ; SI-NEXT: ; %bb.1: ; %loop.preheader ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 ; SI-NEXT: s_and_b64 vcc, exec, -1 -; SI-NEXT: .LBB2_2: ; %loop +; SI-NEXT: .LBB1_2: ; %loop ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccnz .LBB2_2 -; SI-NEXT: .LBB2_3: ; %UnifiedReturnBlock +; SI-NEXT: s_cbranch_vccnz .LBB1_2 +; SI-NEXT: .LBB1_3: ; %UnifiedReturnBlock ; SI-NEXT: s_endpgm ; IR-LABEL: @infinite_loop_ret( ; IR-NEXT: entry: @@ -115,93 +81,44 @@ return: ret void } -define amdgpu_kernel void @infinite_loop_ret_callbr(ptr addrspace(1) %out) { -; SI-LABEL: infinite_loop_ret_callbr: -; SI: ; %bb.0: ; %entry -; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: ;;#ASMSTART -; SI-NEXT: ;;#ASMEND -; SI-NEXT: ; %bb.1: ; %loop.preheader -; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: .LBB3_2: ; Inline asm indirect target -; SI-NEXT: ; %UnifiedReturnBlock -; SI-NEXT: ; Label of block must be emitted -; SI-NEXT: s_endpgm -; IR-LABEL: @infinite_loop_ret_callbr( -; IR-NEXT: entry: -; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; IR-NEXT: [[COND:%.*]] = icmp eq i32 [[TMP]], 1 -; IR-NEXT: [[COND32:%.*]] = zext i1 [[COND]] to i32 -; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND32]]) -; IR-NEXT: to label [[LOOP:%.*]] [label %UnifiedReturnBlock] -; IR: loop: -; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 -; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]] -; IR: TransitionBlock: -; IR-NEXT: callbr void asm "", ""() -; IR-NEXT: to label [[LOOP]] [] -; IR: UnifiedReturnBlock: -; IR-NEXT: ret void -; -entry: - %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() - %cond = icmp eq i32 %tmp, 1 - %cond32 = zext i1 %cond to i32 - callbr void asm "", "r,!i"(i32 %cond32) to label %loop [label %return] - -loop: - store volatile i32 999, ptr addrspace(1) %out, align 4 - callbr void asm "", ""() to label %loop [] - -return: - ret void -} - define amdgpu_kernel void @infinite_loops(ptr addrspace(1) %out) { ; SI-LABEL: infinite_loops: ; SI: ; %bb.0: ; %entry ; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 ; SI-NEXT: s_mov_b64 s[2:3], -1 -; SI-NEXT: s_cbranch_scc1 .LBB4_4 +; SI-NEXT: s_cbranch_scc1 .LBB2_4 ; SI-NEXT: ; %bb.1: ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: v_mov_b32_e32 v0, 0x378 ; SI-NEXT: s_and_b64 vcc, exec, -1 -; SI-NEXT: .LBB4_2: ; %loop2 +; SI-NEXT: .LBB2_2: ; %loop2 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccnz .LBB4_2 +; SI-NEXT: s_cbranch_vccnz .LBB2_2 ; SI-NEXT: ; %bb.3: ; %Flow ; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: .LBB4_4: ; %Flow2 +; SI-NEXT: .LBB2_4: ; %Flow2 ; SI-NEXT: s_and_b64 vcc, exec, s[2:3] ; SI-NEXT: s_waitcnt lgkmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccz .LBB4_7 +; SI-NEXT: s_cbranch_vccz .LBB2_7 ; SI-NEXT: ; %bb.5: ; SI-NEXT: s_mov_b32 s3, 0xf000 ; SI-NEXT: s_mov_b32 s2, -1 ; SI-NEXT: s_waitcnt expcnt(0) ; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 ; SI-NEXT: s_and_b64 vcc, exec, 0 -; SI-NEXT: .LBB4_6: ; %loop1 +; SI-NEXT: .LBB2_6: ; %loop1 ; SI-NEXT: ; =>This Inner Loop Header: Depth=1 ; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_mov_b64 vcc, vcc -; SI-NEXT: s_cbranch_vccz .LBB4_6 -; SI-NEXT: .LBB4_7: ; %DummyReturnBlock +; SI-NEXT: s_cbranch_vccz .LBB2_6 +; SI-NEXT: .LBB2_7: ; %DummyReturnBlock ; SI-NEXT: s_endpgm ; IR-LABEL: @infinite_loops( ; IR-NEXT: entry: @@ -227,78 +144,24 @@ loop2: br label %loop2 } -define amdgpu_kernel void @infinite_loops_callbr(ptr addrspace(1) %out) { -; SI-LABEL: infinite_loops_callbr: -; SI: ; %bb.0: ; %entry -; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: ;;#ASMSTART -; SI-NEXT: ;;#ASMEND -; SI-NEXT: ; %bb.1: ; %loop1 -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: s_endpgm -; SI-NEXT: .LBB5_2: ; Inline asm indirect target -; SI-NEXT: ; %loop2.preheader -; SI-NEXT: ; Label of block must be emitted -; SI-NEXT: s_mov_b32 s3, 0xf000 -; SI-NEXT: s_mov_b32 s2, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x378 -; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: s_endpgm -; IR-LABEL: @infinite_loops_callbr( -; IR-NEXT: entry: -; IR-NEXT: callbr void asm "", "r,!i"(i32 poison) -; IR-NEXT: to label [[LOOP1:%.*]] [label %loop2] -; IR: loop1: -; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 -; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[DUMMYRETURNBLOCK:%.*]] -; IR: TransitionBlock: -; IR-NEXT: callbr void asm "", ""() -; IR-NEXT: to label [[LOOP1]] [] -; IR: loop2: -; IR-NEXT: store volatile i32 888, ptr addrspace(1) [[OUT]], align 4 -; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK1:%.*]], label [[DUMMYRETURNBLOCK]] -; IR: TransitionBlock1: -; IR-NEXT: callbr void asm "", ""() -; IR-NEXT: to label [[LOOP2:%.*]] [] -; IR: DummyReturnBlock: -; IR-NEXT: ret void -; -entry: - callbr void asm "", "r,!i"(i32 poison) to label %loop1 [label %loop2] - -loop1: - store volatile i32 999, ptr addrspace(1) %out, align 4 - callbr void asm "", ""() to label %loop1 [] - -loop2: - store volatile i32 888, ptr addrspace(1) %out, align 4 - callbr void asm "", ""() to label %loop2 [] -} - define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) { ; SI-LABEL: infinite_loop_nest_ret: ; SI: ; %bb.0: ; %entry ; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 ; SI-NEXT: s_and_saveexec_b64 s[0:1], vcc -; SI-NEXT: s_cbranch_execz .LBB6_5 +; SI-NEXT: s_cbranch_execz .LBB3_5 ; SI-NEXT: ; %bb.1: ; %outer_loop.preheader ; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 ; SI-NEXT: v_cmp_ne_u32_e64 s[0:1], 3, v0 ; SI-NEXT: s_mov_b32 s7, 0xf000 ; SI-NEXT: s_mov_b32 s6, -1 ; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 -; SI-NEXT: .LBB6_2: ; %outer_loop +; SI-NEXT: .LBB3_2: ; %outer_loop ; SI-NEXT: ; =>This Loop Header: Depth=1 -; SI-NEXT: ; Child Loop BB6_3 Depth 2 +; SI-NEXT: ; Child Loop BB3_3 Depth 2 ; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: .LBB6_3: ; %inner_loop -; SI-NEXT: ; Parent Loop BB6_2 Depth=1 +; SI-NEXT: .LBB3_3: ; %inner_loop +; SI-NEXT: ; Parent Loop BB3_2 Depth=1 ; SI-NEXT: ; => This Inner Loop Header: Depth=2 ; SI-NEXT: s_and_b64 s[8:9], exec, s[0:1] ; SI-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] @@ -306,13 +169,13 @@ define amdgpu_kernel void @infinite_loop_nest_ret(ptr addrspace(1) %out) { ; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; SI-NEXT: s_waitcnt vmcnt(0) ; SI-NEXT: s_andn2_b64 exec, exec, s[2:3] -; SI-NEXT: s_cbranch_execnz .LBB6_3 +; SI-NEXT: s_cbranch_execnz .LBB3_3 ; SI-NEXT: ; %bb.4: ; %loop.exit.guard -; SI-NEXT: ; in Loop: Header=BB6_2 Depth=1 +; SI-NEXT: ; in Loop: Header=BB3_2 Depth=1 ; SI-NEXT: s_or_b64 exec, exec, s[2:3] ; SI-NEXT: s_mov_b64 vcc, 0 -; SI-NEXT: s_branch .LBB6_2 -; SI-NEXT: .LBB6_5: ; %UnifiedReturnBlock +; SI-NEXT: s_branch .LBB3_2 +; SI-NEXT: .LBB3_5: ; %UnifiedReturnBlock ; SI-NEXT: s_endpgm ; IR-LABEL: @infinite_loop_nest_ret( ; IR-NEXT: entry: @@ -349,82 +212,4 @@ return: ret void } -define amdgpu_kernel void @infinite_loop_nest_ret_callbr(ptr addrspace(1) %out) { -; SI-LABEL: infinite_loop_nest_ret_callbr: -; SI: ; %bb.0: ; %entry -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 1, v0 -; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc -; SI-NEXT: ;;#ASMSTART -; SI-NEXT: ;;#ASMEND -; SI-NEXT: ; %bb.1: ; %outer_loop.preheader -; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 -; SI-NEXT: s_mov_b32 s7, 0xf000 -; SI-NEXT: s_mov_b32 s6, -1 -; SI-NEXT: v_mov_b32_e32 v0, 0x3e7 -; SI-NEXT: s_and_b64 s[0:1], exec, 0 -; SI-NEXT: s_branch .LBB7_3 -; SI-NEXT: .LBB7_2: ; %loop.exit.guard -; SI-NEXT: ; in Loop: Header=BB7_3 Depth=1 -; SI-NEXT: s_and_b64 vcc, exec, s[2:3] -; SI-NEXT: s_cbranch_vccnz .LBB7_5 -; SI-NEXT: .LBB7_3: ; %outer_loop -; SI-NEXT: ; =>This Inner Loop Header: Depth=1 -; SI-NEXT: ;;#ASMSTART -; SI-NEXT: ;;#ASMEND -; SI-NEXT: s_waitcnt lgkmcnt(0) -; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 -; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: s_mov_b64 s[2:3], -1 -; SI-NEXT: s_mov_b64 vcc, s[0:1] -; SI-NEXT: s_cbranch_vccz .LBB7_2 -; SI-NEXT: ; %bb.4: ; %TransitionBlock.target.outer_loop -; SI-NEXT: ; in Loop: Header=BB7_3 Depth=1 -; SI-NEXT: s_mov_b64 s[2:3], 0 -; SI-NEXT: s_branch .LBB7_2 -; SI-NEXT: .LBB7_5: ; Inline asm indirect target -; SI-NEXT: ; %UnifiedReturnBlock -; SI-NEXT: ; Label of block must be emitted -; SI-NEXT: s_endpgm -; IR-LABEL: @infinite_loop_nest_ret_callbr( -; IR-NEXT: entry: -; IR-NEXT: [[TMP:%.*]] = tail call i32 @llvm.amdgcn.workitem.id.x() -; IR-NEXT: [[COND1:%.*]] = icmp ne i32 [[TMP]], 1 -; IR-NEXT: [[COND1_32:%.*]] = zext i1 [[COND1]] to i32 -; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND1_32]]) -; IR-NEXT: to label [[OUTER_LOOP:%.*]] [label %UnifiedReturnBlock] -; IR: outer_loop: -; IR-NEXT: callbr void asm "", ""() -; IR-NEXT: to label [[INNER_LOOP:%.*]] [] -; IR: inner_loop: -; IR-NEXT: store volatile i32 999, ptr addrspace(1) [[OUT:%.*]], align 4 -; IR-NEXT: [[COND3:%.*]] = icmp eq i32 [[TMP]], 3 -; IR-NEXT: [[COND3_32:%.*]] = zext i1 [[COND3]] to i32 -; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK:%.*]], label [[UNIFIEDRETURNBLOCK:%.*]] -; IR: TransitionBlock: -; IR-NEXT: callbr void asm "", "r,!i"(i32 [[COND3_32]]) -; IR-NEXT: to label [[INNER_LOOP]] [label %outer_loop] -; IR: UnifiedReturnBlock: -; IR-NEXT: ret void -; -entry: - %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() - %cond1 = icmp ne i32 %tmp, 1 ; avoid following BB optimizing away through the domination - %cond1_32 = zext i1 %cond1 to i32 - callbr void asm "", "r,!i"(i32 %cond1_32) to label %outer_loop [label %return] - -outer_loop: - ; %cond2 = icmp eq i32 %tmp, 2 - ; br i1 %cond2, label %outer_loop, label %inner_loop - callbr void asm "", ""() to label %inner_loop [] - -inner_loop: ; preds = %LeafBlock, %LeafBlock1 - store volatile i32 999, ptr addrspace(1) %out, align 4 - %cond3 = icmp eq i32 %tmp, 3 - %cond3_32 = zext i1 %cond3 to i32 - callbr void asm "", "r,!i"(i32 %cond3_32) to label %inner_loop [label %outer_loop] - -return: - ret void -} - declare i32 @llvm.amdgcn.workitem.id.x() diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll index 01bcdad3fc220..34de1e48bfb59 100644 --- a/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll +++ b/llvm/test/CodeGen/AMDGPU/si-annotate-nested-control-flows.ll @@ -3,16 +3,15 @@ ; RUN: llc -mtriple=amdgcn-amd-amdhsa %s -o - | FileCheck %s --check-prefix=ISA define void @nested_inf_loop(i1 %0, i1 %1) { -; OPT-LABEL: define void @nested_inf_loop( -; OPT-SAME: i1 [[TMP0:%.*]], i1 [[TMP1:%.*]]) { -; OPT-NEXT: [[BB:.*:]] -; OPT-NEXT: br label %[[BB1:.*]] -; OPT: [[BB1]]: -; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0]], i1 true, i1 [[TMP1]] -; OPT-NEXT: br i1 [[BRMERGE]], label %[[BB1]], label %[[INFLOOP:.*]] -; OPT: [[INFLOOP]]: -; OPT-NEXT: br i1 true, label %[[INFLOOP]], label %[[DUMMYRETURNBLOCK:.*]] -; OPT: [[DUMMYRETURNBLOCK]]: +; OPT-LABEL: @nested_inf_loop( +; OPT-NEXT: BB: +; OPT-NEXT: br label [[BB1:%.*]] +; OPT: BB1: +; OPT-NEXT: [[BRMERGE:%.*]] = select i1 [[TMP0:%.*]], i1 true, i1 [[TMP1:%.*]] +; OPT-NEXT: br i1 [[BRMERGE]], label [[BB1]], label [[INFLOOP:%.*]] +; OPT: infloop: +; OPT-NEXT: br i1 true, label [[INFLOOP]], label [[DUMMYRETURNBLOCK:%.*]] +; OPT: DummyReturnBlock: ; OPT-NEXT: ret void ; ; ISA-LABEL: nested_inf_loop: @@ -64,84 +63,3 @@ BB4: BB3: br label %BB1 } - -define void @nested_inf_loop_callbr(i32 %0, i32 %1) { -; OPT-LABEL: define void @nested_inf_loop_callbr( -; OPT-SAME: i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) { -; OPT-NEXT: [[BB:.*:]] -; OPT-NEXT: callbr void asm "", ""() -; OPT-NEXT: to label %[[BB1:.*]] [] -; OPT: [[BB1]]: -; OPT-NEXT: callbr void asm "", "r,!i"(i32 [[TMP0]]) -; OPT-NEXT: to label %[[BB3:.*]] [label %BB2] -; OPT: [[BB2:.*:]] -; OPT-NEXT: callbr void asm "", ""() -; OPT-NEXT: to label %[[BB4:.*]] [] -; OPT: [[BB4]]: -; OPT-NEXT: br i1 true, label %[[TRANSITIONBLOCK:.*]], label %[[DUMMYRETURNBLOCK:.*]] -; OPT: [[TRANSITIONBLOCK]]: -; OPT-NEXT: callbr void asm "", "r,!i"(i32 [[TMP1]]) -; OPT-NEXT: to label %[[BB3]] [label %BB4] -; OPT: [[BB3]]: -; OPT-NEXT: callbr void asm "", ""() -; OPT-NEXT: to label %[[BB1]] [] -; OPT: [[DUMMYRETURNBLOCK]]: -; OPT-NEXT: ret void -; -; ISA-LABEL: nested_inf_loop_callbr: -; ISA: ; %bb.0: ; %BB -; ISA-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; ISA-NEXT: ;;#ASMSTART -; ISA-NEXT: ;;#ASMEND -; ISA-NEXT: ; implicit-def: $sgpr6_sgpr7 -; ISA-NEXT: ; implicit-def: $sgpr4_sgpr5 -; ISA-NEXT: .LBB1_1: ; %BB1 -; ISA-NEXT: ; =>This Inner Loop Header: Depth=1 -; ISA-NEXT: ;;#ASMSTART -; ISA-NEXT: ;;#ASMEND -; ISA-NEXT: s_andn2_b64 s[6:7], s[6:7], exec -; ISA-NEXT: s_and_b64 s[8:9], s[4:5], exec -; ISA-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] -; ISA-NEXT: .LBB1_2: ; %BB3 -; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; ISA-NEXT: ;;#ASMSTART -; ISA-NEXT: ;;#ASMEND -; ISA-NEXT: s_andn2_b64 s[4:5], s[4:5], exec -; ISA-NEXT: s_and_b64 s[8:9], s[6:7], exec -; ISA-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] -; ISA-NEXT: s_branch .LBB1_1 -; ISA-NEXT: .LBB1_3: ; Inline asm indirect target -; ISA-NEXT: ; %BB2 -; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; ISA-NEXT: ; Label of block must be emitted -; ISA-NEXT: ;;#ASMSTART -; ISA-NEXT: ;;#ASMEND -; ISA-NEXT: s_mov_b64 s[6:7], -1 -; ISA-NEXT: s_and_saveexec_b64 s[8:9], s[4:5] -; ISA-NEXT: s_cbranch_execz .LBB1_5 -; ISA-NEXT: ; %bb.4: ; %TransitionBlock.target.BB3 -; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; ISA-NEXT: s_xor_b64 s[6:7], exec, -1 -; ISA-NEXT: .LBB1_5: ; %loop.exit.guard -; ISA-NEXT: ; in Loop: Header=BB1_1 Depth=1 -; ISA-NEXT: s_or_b64 exec, exec, s[8:9] -; ISA-NEXT: s_and_b64 vcc, exec, s[6:7] -; ISA-NEXT: s_mov_b64 s[6:7], 0 -; ISA-NEXT: s_cbranch_vccz .LBB1_2 -; ISA-NEXT: ; %bb.6: ; %DummyReturnBlock -; ISA-NEXT: s_setpc_b64 s[30:31] -BB: - callbr void asm "", ""() to label %BB1 [] - -BB1: - callbr void asm "", "r,!i"(i32 %0) to label %BB3 [label %BB2] - -BB2: - callbr void asm "", ""() to label %BB4 [] - -BB4: - callbr void asm "", "r,!i"(i32 %1) to label %BB3 [label %BB4] - -BB3: - callbr void asm "", ""() to label %BB1 [] -} diff --git a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll index 004c27971131d..4cbe682cf9f9f 100644 --- a/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll +++ b/llvm/test/CodeGen/AMDGPU/si-unify-exit-multiple-unreachables.ll @@ -1,5 +1,5 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -stop-after=amdgpu-unify-divergent-exit-nodes | FileCheck %s --check-prefix=UNIFY ; RUN: llc < %s -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 | FileCheck %s declare void @llvm.trap() @@ -70,33 +70,8 @@ define amdgpu_kernel void @kernel(i32 %a, ptr addrspace(1) %x, i32 noundef %n) { ; CHECK-NEXT: s_mov_b64 s[2:3], -1 ; CHECK-NEXT: s_trap 2 ; CHECK-NEXT: s_branch .LBB0_4 -; UNIFY-LABEL: @kernel( -; UNIFY-NEXT: entry: -; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() -; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256 -; UNIFY-NEXT: br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]] -; UNIFY: if.then: -; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 -; UNIFY-NEXT: br i1 [[CMP1]], label [[IF_END6_SINK_SPLIT:%.*]], label [[COND_FALSE:%.*]] -; UNIFY: cond.false: -; UNIFY-NEXT: call void @llvm.trap() -; UNIFY-NEXT: unreachable -; UNIFY: if.else: -; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10 -; UNIFY-NEXT: br i1 [[CMP2]], label [[IF_THEN3:%.*]], label [[IF_END6:%.*]] -; UNIFY: if.then3: -; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0 -; UNIFY-NEXT: br i1 [[CMP1_I7]], label [[IF_END6_SINK_SPLIT]], label [[COND_FALSE_I8:%.*]] -; UNIFY: cond.false.i8: -; UNIFY-NEXT: call void @llvm.trap() -; UNIFY-NEXT: unreachable -; UNIFY: if.end6.sink.split: -; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]] -; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4 -; UNIFY-NEXT: br label [[IF_END6]] -; UNIFY: if.end6: -; UNIFY-NEXT: ret void -; + + entry: %tid = call i32 @llvm.amdgcn.workitem.id.x() %cmp = icmp eq i32 %n, 256 @@ -130,129 +105,5 @@ if.end6.sink.split: if.end6: ret void } - -define amdgpu_kernel void @kernel_callbr(i32 %a, ptr addrspace(1) %x, i32 noundef %n) { -; CHECK-LABEL: kernel_callbr: -; CHECK: ; %bb.0: ; %entry -; CHECK-NEXT: s_load_dword s1, s[8:9], 0x10 -; CHECK-NEXT: s_load_dword s0, s[8:9], 0x0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: s_cmpk_eq_i32 s1, 0x100 -; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: ; %bb.1: ; %if.then -; CHECK-NEXT: s_cmp_eq_u32 s0, 0 -; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB1_2: ; %if.end6.sink.split -; CHECK-NEXT: s_load_dwordx2 s[2:3], s[8:9], 0x8 -; CHECK-NEXT: v_lshlrev_b32_e32 v0, 2, v0 -; CHECK-NEXT: v_mov_b32_e32 v1, s0 -; CHECK-NEXT: s_waitcnt lgkmcnt(0) -; CHECK-NEXT: global_store_dword v0, v1, s[2:3] -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: .LBB1_3: ; Inline asm indirect target -; CHECK-NEXT: ; %UnifiedReturnBlock -; CHECK-NEXT: ; Label of block must be emitted -; CHECK-NEXT: s_endpgm -; CHECK-NEXT: .LBB1_4: ; Inline asm indirect target -; CHECK-NEXT: ; %if.else -; CHECK-NEXT: ; Label of block must be emitted -; CHECK-NEXT: v_cmp_gt_u32_e32 vcc, 10, v0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: ; %bb.5: ; %if.then3 -; CHECK-NEXT: s_cmp_eq_u32 s0, 0 -; CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 -; CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3] -; CHECK-NEXT: ;;#ASMSTART -; CHECK-NEXT: ;;#ASMEND -; CHECK-NEXT: s_branch .LBB1_2 -; CHECK-NEXT: .LBB1_6: ; Inline asm indirect target -; CHECK-NEXT: ; %cond.false.i8 -; CHECK-NEXT: ; Label of block must be emitted -; CHECK-NEXT: .LBB1_7: ; Inline asm indirect target -; CHECK-NEXT: ; %cond.false -; CHECK-NEXT: ; Label of block must be emitted -; CHECK-NEXT: s_trap 2 -; CHECK-NEXT: ; divergent unreachable -; CHECK-NEXT: s_branch .LBB1_3 -; UNIFY-LABEL: @kernel_callbr( -; UNIFY-NEXT: entry: -; UNIFY-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() -; UNIFY-NEXT: [[CMP:%.*]] = icmp eq i32 [[N:%.*]], 256 -; UNIFY-NEXT: [[CMP32:%.*]] = zext i1 [[CMP]] to i32 -; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP32]]) -; UNIFY-NEXT: to label [[IF_THEN:%.*]] [label %if.else] -; UNIFY: if.then: -; UNIFY-NEXT: [[CMP1:%.*]] = icmp eq i32 [[A:%.*]], 0 -; UNIFY-NEXT: [[CMP1_32:%.*]] = zext i1 [[CMP1]] to i32 -; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_32]]) -; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT:%.*]] [label %cond.false] -; UNIFY: cond.false: -; UNIFY-NEXT: call void @llvm.trap() -; UNIFY-NEXT: unreachable -; UNIFY: if.else: -; UNIFY-NEXT: [[CMP2:%.*]] = icmp ult i32 [[TID]], 10 -; UNIFY-NEXT: [[CMP2_32:%.*]] = zext i1 [[CMP2]] to i32 -; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP2_32]]) -; UNIFY-NEXT: to label [[IF_THEN3:%.*]] [label %if.end6] -; UNIFY: if.then3: -; UNIFY-NEXT: [[CMP1_I7:%.*]] = icmp eq i32 [[A]], 0 -; UNIFY-NEXT: [[CMP1_I7_32:%.*]] = zext i1 [[CMP1_I7]] to i32 -; UNIFY-NEXT: callbr void asm "", "r,!i"(i32 [[CMP1_I7_32]]) -; UNIFY-NEXT: to label [[IF_END6_SINK_SPLIT]] [label %cond.false.i8] -; UNIFY: cond.false.i8: -; UNIFY-NEXT: call void @llvm.trap() -; UNIFY-NEXT: unreachable -; UNIFY: if.end6.sink.split: -; UNIFY-NEXT: [[X1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[X:%.*]], i32 [[TID]] -; UNIFY-NEXT: store i32 [[A]], ptr addrspace(1) [[X1]], align 4 -; UNIFY-NEXT: callbr void asm "", ""() -; UNIFY-NEXT: to label [[IF_END6:%.*]] [] -; UNIFY: if.end6: -; UNIFY-NEXT: ret void -; -entry: - %tid = call i32 @llvm.amdgcn.workitem.id.x() - %cmp = icmp eq i32 %n, 256 - %cmp32 = zext i1 %cmp to i32 - callbr void asm "", "r,!i"(i32 %cmp32) to label %if.then [label %if.else] - -if.then: - %cmp1 = icmp eq i32 %a, 0 - %cmp1_32 = zext i1 %cmp1 to i32 - callbr void asm "", "r,!i"(i32 %cmp1_32) to label %if.end6.sink.split [label %cond.false] - -cond.false: - call void @llvm.trap() - unreachable - -if.else: - %cmp2 = icmp ult i32 %tid, 10 - %cmp2_32 = zext i1 %cmp2 to i32 - callbr void asm "", "r,!i"(i32 %cmp2_32) to label %if.then3 [label %if.end6] - -if.then3: - %cmp1.i7 = icmp eq i32 %a, 0 - %cmp1.i7_32 = zext i1 %cmp1.i7 to i32 - callbr void asm "", "r,!i"(i32 %cmp1.i7_32) to label %if.end6.sink.split [label %cond.false.i8] - -cond.false.i8: - call void @llvm.trap() - unreachable - -if.end6.sink.split: - %x1 = getelementptr inbounds i32, ptr addrspace(1) %x, i32 %tid - store i32 %a, ptr addrspace(1) %x1, align 4 - callbr void asm "", ""() to label %if.end6 [] - -if.end6: - ret void -} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; UNIFY: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/update-phi.ll b/llvm/test/CodeGen/AMDGPU/update-phi.ll index 684dc1a1f0092..50666bee325e8 100644 --- a/llvm/test/CodeGen/AMDGPU/update-phi.ll +++ b/llvm/test/CodeGen/AMDGPU/update-phi.ll @@ -37,42 +37,3 @@ n28: ; preds = %.loopexit, %n28 n31: ; preds = ret void } - -define amdgpu_ps void @_amdgpu_ps_main_callbr() local_unnamed_addr #3 { -; IR-LABEL: @_amdgpu_ps_main_callbr( -; IR-NEXT: .entry: -; IR-NEXT: callbr void asm "", ""() -; IR-NEXT: to label [[DOTLOOPEXIT:%.*]] [] -; IR: .loopexit: -; IR-NEXT: callbr void asm "", ""() -; IR-NEXT: to label [[N28:%.*]] [] -; IR: n28: -; IR-NEXT: [[DOT01:%.*]] = phi float [ 0.000000e+00, [[DOTLOOPEXIT]] ], [ [[N29:%.*]], [[TRANSITIONBLOCK:%.*]] ] -; IR-NEXT: [[N29]] = fadd float [[DOT01]], 1.000000e+00 -; IR-NEXT: [[N30:%.*]] = fcmp ogt float [[N29]], 4.000000e+00 -; IR-NEXT: [[N30_32:%.*]] = zext i1 [[N30]] to i32 -; IR-NEXT: br i1 true, label [[TRANSITIONBLOCK]], label [[DUMMYRETURNBLOCK:%.*]] -; IR: TransitionBlock: -; IR-NEXT: callbr void asm "", "r,!i"(i32 [[N30_32]]) -; IR-NEXT: to label [[DOTLOOPEXIT]] [label %n28] -; IR: n31: -; IR-NEXT: ret void -; IR: DummyReturnBlock: -; IR-NEXT: ret void -; -.entry: - callbr void asm "", ""() to label %.loopexit [] - -.loopexit: ; preds = %n28, %.entry - callbr void asm "", ""() to label %n28 [] - -n28: ; preds = %.loopexit, %n28 - %.01 = phi float [ 0.000000e+00, %.loopexit ], [ %n29, %n28 ] - %n29 = fadd float %.01, 1.0 - %n30 = fcmp ogt float %n29, 4.000000e+00 - %n30.32 = zext i1 %n30 to i32 - callbr void asm "", "r,!i"(i32 %n30.32) to label %.loopexit [label %n28] - -n31: ; preds = - ret void -} diff --git a/llvm/test/Transforms/StructurizeCFG/callbr.ll b/llvm/test/Transforms/StructurizeCFG/callbr.ll deleted file mode 100644 index 42f95194980d4..0000000000000 --- a/llvm/test/Transforms/StructurizeCFG/callbr.ll +++ /dev/null @@ -1,235 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 -; RUN: opt -S -passes=structurizecfg %s -o - | FileCheck %s - -; Structurize as usual, but don't tear callbr and its destination blocks apart. -; -; Note: currently, callbr blocks and their corresponding target blocks -; themselves are not handled by the structurizer.* If the CFG turns out to be -; unstructured at the end, the CFG lowering (si-annotate-control-flow) will -; detect this. For the currently intended use cases of callbr in the context of -; the AMDGPU backend, this is not a limitation (cf. -; https://discourse.llvm.org/t/rfc-add-callbr-intrinsic-support/86087). -; -; Note 2: while callbr and its targets remain untouched, everything else is -; handled as usual, even if it is nested in a callbr region. -; -; *FIXME: this will be fixed in the future. Callbr can be handled as follows: -; Input IR: -; ``` -; define void @foo_callbr() { -; callbr void asm "", "!i"() to label %fallthrough [label %indirect, ...] -; fallthrough: -; br label %exit -; indirect: -; br label %exit -; ... -; exit: -; ret void -; } -; ``` -; -; Output IR: -; ``` -; define void @foo_callbr() { -; callbr void asm "", "!i"() -; to label %fallthrough [label %fake.indirect, label %fake.indirect1, label %fake.indirect2, ...] -; fake.indirect: ; preds = %0 -; br label %Flow -; fake.indirect1: ; preds = %0 -; br label %Flow -; fake.indirect2: ; preds = %0 -; br label %Flow -; ... -; Flow: ; preds = %fallthrough, %fake.indirect[0-N] -; %1 = phi i1 [ false, %fallthrough ], [ true, %fake.indirect ], [ false, %fake.indirect[1-N] ] -; br i1 %1, label %indirect, label %Flow1 -; Flow1: ; preds = %Flow, %indirect -; %2 = phi i1 [ false, %Flow], [ true, %fake.indirect1 ], [ false, %indirect ] -; br i1 %2, label %indirect1, label %Flow2 -; Flow2: ; preds = %Flow, %indirect1 -; %2 = phi i1 [ false, %Flow], [ true, %fake.indirect2 ], [ false, %indirect1 ] -; br i1 %2, label %indirect2, label %Flow3 -; ... -; fallthrough: ; preds = %0 -; br label %Flow -; indirect: ; preds = %Flow -; br label %Flow1 -; indirect1: ; preds = %Flow1 -; br label %Flow2 -; indirect2: : preds = %Flow2 -; br label %Flow3 -; ... -; exit: ; preds = %indirectN, %FlowN -; ret void -; } -; ``` -; -; Output IR as ASCII-art: -; %0 -; --------------------- -; | | | | -; v v v v -; f f.i f.i1 f.i2 -; | | | | -; v v v v -; --------------------- -; %Flow -; | \ -; | %indirect -; | / -; %Flow1 -; | \ -; | %indirect1 -; | / -; %Flow2 -; | \ -; | %indirect2 -; | / -; %exit -; - -; Only callbr, nothing to do. -define void @callbr_simple() { -; CHECK-LABEL: define void @callbr_simple() { -; CHECK-NEXT: [[CALLBR:.*:]] -; CHECK-NEXT: callbr void asm "", "!i"() -; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect] -; CHECK: [[INDIRECT]]: -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[INDIRECT1:.*:]] -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; -callbr: - callbr void asm "", "!i"() to label %fallthrough [label %indirect] -fallthrough: - br label %exit -indirect: - br label %exit -exit: - ret void -} - -; Callbr nested in non-callbr: non-callbr is transformed -define void @callbr_in_non_callbr(i1 %c) { -; CHECK-LABEL: define void @callbr_in_non_callbr( -; CHECK-SAME: i1 [[C:%.*]]) { -; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true -; CHECK-NEXT: br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW:.*]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[NOCALLBR]] ], [ true, [[TMP0:%.*]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[EXIT:.*]] -; CHECK: [[CALLBR]]: -; CHECK-NEXT: callbr void asm "", "!i"() -; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect] -; CHECK: [[INDIRECT]]: -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[INDIRECT1:.*:]] -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[NOCALLBR]]: -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; - br i1 %c, label %callbr, label %nocallbr -callbr: - callbr void asm "", "!i"() to label %fallthrough [label %indirect] -fallthrough: - br label %exit -indirect: - br label %exit -nocallbr: - br label %exit -exit: - ret void -} - -; Callbr parent of non-callbr: non-callbr is transformed -define void @non_callbr_in_callbr(i1 %c) { -; CHECK-LABEL: define void @non_callbr_in_callbr( -; CHECK-SAME: i1 [[C:%.*]]) { -; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true -; CHECK-NEXT: callbr void asm "", "!i"() -; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect] -; CHECK: [[INDIRECT]]: -; CHECK-NEXT: br i1 [[C_INV]], label %[[FALLTHROUGH2:.*]], label %[[FLOW:.*]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FALLTHROUGH2]] ], [ true, %[[INDIRECT]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[FALLTHROUGH1:.*]], label %[[FLOW1:.*]] -; CHECK: [[FALLTHROUGH1]]: -; CHECK-NEXT: br label %[[FLOW1]] -; CHECK: [[FALLTHROUGH2]]: -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[INDIRECT1:.*:]] -; CHECK-NEXT: br label %[[EXIT:.*]] -; CHECK: [[FLOW1]]: -; CHECK-NEXT: br label %[[EXIT]] -; CHECK: [[EXIT]]: -; CHECK-NEXT: ret void -; - callbr void asm "", "!i"() to label %fallthrough [label %indirect] -fallthrough: - br i1 %c, label %fallthrough1, label %fallthrough2 -fallthrough1: - br label %exit -fallthrough2: - br label %exit -indirect: - br label %exit -exit: - ret void -} - -; Callbr surrounded by non-callbr: all three regular branches are handled -; correctly -define void @callbr_nested_in_non_callbr(i1 %c, i1 %d, i1 %e, i1 %f) { -; CHECK-LABEL: define void @callbr_nested_in_non_callbr( -; CHECK-SAME: i1 [[C:%.*]], i1 [[D:%.*]], i1 [[E:%.*]], i1 [[F:%.*]]) { -; CHECK-NEXT: [[C_INV:%.*]] = xor i1 [[C]], true -; CHECK-NEXT: br i1 [[C_INV]], label %[[NOCALLBR:.*]], label %[[FLOW3:.*]] -; CHECK: [[FLOW3]]: -; CHECK-NEXT: [[TMP1:%.*]] = phi i1 [ false, %[[FLOW:.*]] ], [ true, [[TMP0:%.*]] ] -; CHECK-NEXT: br i1 [[TMP1]], label %[[CALLBR:.*]], label %[[RET:.*]] -; CHECK: [[CALLBR]]: -; CHECK-NEXT: callbr void asm "", "!i"() -; CHECK-NEXT: to label %[[INDIRECT:.*]] [label %indirect] -; CHECK: [[INDIRECT]]: -; CHECK-NEXT: br i1 [[D]], label %[[FALLTHROUGH1:.*]], label %[[FLOW2:.*]] -; CHECK: [[FALLTHROUGH1]]: -; CHECK-NEXT: br label %[[FLOW2]] -; CHECK: [[INDIRECT2:.*:]] -; CHECK-NEXT: br i1 [[E]], label %[[INDIRECT1:.*]], label %[[FLOW1:.*]] -; CHECK: [[INDIRECT1]]: -; CHECK-NEXT: br label %[[FLOW1]] -; CHECK: [[NOCALLBR]]: -; CHECK-NEXT: br i1 [[F]], label %[[NOCALLBR1:.*]], label %[[FLOW]] -; CHECK: [[NOCALLBR1]]: -; CHECK-NEXT: br label %[[FLOW]] -; CHECK: [[FLOW]]: -; CHECK-NEXT: br label %[[FLOW3]] -; CHECK: [[FLOW1]]: -; CHECK-NEXT: br label %[[RET]] -; CHECK: [[FLOW2]]: -; CHECK-NEXT: br label %[[RET]] -; CHECK: [[RET]]: -; CHECK-NEXT: ret void -; - br i1 %c, label %callbr, label %nocallbr -callbr: - callbr void asm "", "!i"() to label %fallthrough [label %indirect] -fallthrough: - br i1 %d, label %fallthrough1, label %ret -fallthrough1: - br label %ret -indirect: - br i1 %e, label %indirect1, label %ret -indirect1: - br label %ret -nocallbr: - br i1 %f, label %nocallbr1, label %ret -nocallbr1: - br label %ret -ret: - ret void -}