From 0e517e118df4ffb38064f9421da3dd3c5703c2c2 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Thu, 4 Dec 2025 04:45:21 -0600 Subject: [PATCH 1/2] [AMDGPU][GlobalISel] Fix / workaround amdgcn.kill/.unreachable lowering cf. https://github.com/llvm/llvm-project/pull/133907#issuecomment-3611354688 --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 25 ++++++++++++++----- llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll | 22 +++++++++++++--- 2 files changed, 37 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index d9a7e898076b3..7fff97f8336a0 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3088,18 +3088,31 @@ bool IRTranslator::translateCallBr(const User &U, return false; // Retrieve successors. - SmallPtrSet Dests = {I.getDefaultDest()}; + SmallPtrSet Dests; + Dests.insert(I.getDefaultDest()); MachineBasicBlock *Return = &getMBB(*I.getDefaultDest()); // Update successor info. addSuccessorWithProb(CallBrMBB, Return, BranchProbability::getOne()); - // TODO: For most of the cases where there is an intrinsic callbr, we're - // having exactly one indirect target, which will be unreachable. As soon as - // this changes, we might need to enhance - // Target->setIsInlineAsmBrIndirectTarget or add something similar for - // intrinsic indirect branches. + + // Add indirect targets as successors. For intrinsic callbr, these represent + // implicit control flow (e.g., the "kill" path for amdgcn.kill). We mark them + // with setIsInlineAsmBrIndirectTarget so the machine verifier accepts them as + // valid successors, even though they're not from inline asm. + for (BasicBlock *Dest : I.getIndirectDests()) { + MachineBasicBlock *Target = &getMBB(*Dest); + Target->setIsInlineAsmBrIndirectTarget(); + Target->setLabelMustBeEmitted(); + // Don't add duplicate machine successors. + if (Dests.insert(Dest).second) + addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + } + CallBrMBB->normalizeSuccProbs(); + // Drop into default successor. + MIRBuilder.buildBr(*Return); + return true; } diff --git a/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll b/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll index ff1e23836fed2..8ac31b3c70ed7 100644 --- a/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll +++ b/llvm/test/CodeGen/AMDGPU/callbr-intrinsics.ll @@ -32,14 +32,21 @@ define void @test_kill(ptr %src, ptr %dst, i1 %c) { ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc ; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7] -; GISEL-NEXT: s_cbranch_scc0 .LBB0_2 +; GISEL-NEXT: s_cbranch_scc0 .LBB0_4 ; GISEL-NEXT: ; %bb.1: ; GISEL-NEXT: s_and_b64 exec, exec, s[4:5] +; GISEL-NEXT: ; %bb.2: ; %cont ; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GISEL-NEXT: flat_store_dword v[2:3], v0 ; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] -; GISEL-NEXT: .LBB0_2: +; GISEL-NEXT: .LBB0_3: ; Inline asm indirect target +; GISEL-NEXT: ; %kill +; GISEL-NEXT: ; Label of block must be emitted +; GISEL-NEXT: ; divergent unreachable +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB0_4: ; GISEL-NEXT: s_mov_b64 exec, 0 ; GISEL-NEXT: s_endpgm %a = load i32, ptr %src, align 4 @@ -81,14 +88,21 @@ define void @test_kill_block_order(ptr %src, ptr %dst, i1 %c) { ; GISEL-NEXT: s_mov_b64 s[4:5], exec ; GISEL-NEXT: s_andn2_b64 s[6:7], exec, vcc ; GISEL-NEXT: s_andn2_b64 s[4:5], s[4:5], s[6:7] -; GISEL-NEXT: s_cbranch_scc0 .LBB1_2 +; GISEL-NEXT: s_cbranch_scc0 .LBB1_4 ; GISEL-NEXT: ; %bb.1: ; GISEL-NEXT: s_and_b64 exec, exec, s[4:5] +; GISEL-NEXT: ; %bb.2: ; %cont ; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GISEL-NEXT: flat_store_dword v[2:3], v0 ; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] -; GISEL-NEXT: .LBB1_2: +; GISEL-NEXT: .LBB1_3: ; Inline asm indirect target +; GISEL-NEXT: ; %kill +; GISEL-NEXT: ; Label of block must be emitted +; GISEL-NEXT: ; divergent unreachable +; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_setpc_b64 s[30:31] +; GISEL-NEXT: .LBB1_4: ; GISEL-NEXT: s_mov_b64 exec, 0 ; GISEL-NEXT: s_endpgm %a = load i32, ptr %src, align 4 From dd3b2a4fb1c03d70438eda253258a06787b09376 Mon Sep 17 00:00:00 2001 From: Robert Imschweiler Date: Thu, 4 Dec 2025 05:15:07 -0600 Subject: [PATCH 2/2] implement feedback --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp index 7fff97f8336a0..d87a2311d5739 100644 --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3088,8 +3088,7 @@ bool IRTranslator::translateCallBr(const User &U, return false; // Retrieve successors. - SmallPtrSet Dests; - Dests.insert(I.getDefaultDest()); + SmallPtrSet Dests = {I.getDefaultDest()}; MachineBasicBlock *Return = &getMBB(*I.getDefaultDest()); // Update successor info. @@ -3100,12 +3099,12 @@ bool IRTranslator::translateCallBr(const User &U, // with setIsInlineAsmBrIndirectTarget so the machine verifier accepts them as // valid successors, even though they're not from inline asm. for (BasicBlock *Dest : I.getIndirectDests()) { - MachineBasicBlock *Target = &getMBB(*Dest); - Target->setIsInlineAsmBrIndirectTarget(); - Target->setLabelMustBeEmitted(); + MachineBasicBlock &Target = getMBB(*Dest); + Target.setIsInlineAsmBrIndirectTarget(); + Target.setLabelMustBeEmitted(); // Don't add duplicate machine successors. if (Dests.insert(Dest).second) - addSuccessorWithProb(CallBrMBB, Target, BranchProbability::getZero()); + addSuccessorWithProb(CallBrMBB, &Target, BranchProbability::getZero()); } CallBrMBB->normalizeSuccProbs();