From 80aa06dd4fdf5cd6c7dca3641270bd0e067b76c3 Mon Sep 17 00:00:00 2001 From: Prasoon Mishra Date: Mon, 3 Nov 2025 09:24:24 +0000 Subject: [PATCH] [AMDGPU] Fix CFG invalidation detection in insertSimulatedTrap When SIMULATED_TRAP is at the end of a block with no successors, insertSimulatedTrap incorrectly returns the original MBB despite adding HaltLoopBB to the CFG. EmitInstrWithCustomInserter detects CFG changes by comparing the returned MBB with the original. When they match, it assumes no modification occurred and skips MachineLoopInfo invalidation. This causes stale loop information in subsequent passes. Fix: Return HaltLoopBB to properly signal the CFG modification. --- llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index a7333e3373f38..9b05d99e265e6 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1963,6 +1963,10 @@ MachineBasicBlock *SIInstrInfo::insertSimulatedTrap(MachineRegisterInfo &MRI, BuildMI(MBB, MI, DL, get(AMDGPU::S_CBRANCH_EXECNZ)).addMBB(TrapBB); MF->push_back(TrapBB); MBB.addSuccessor(TrapBB); + } else { + // Since we're adding HaltLoopBB and modifying the CFG, we must return a + // different block to signal the change. + ContBB = HaltLoopBB; } // Start with a `s_trap 2`, if we're in PRIV=1 and we need the workaround this