diff --git a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp index 484861dcaac07..362ef140a28f8 100644 --- a/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp +++ b/llvm/lib/Target/AMDGPU/SIMemoryLegalizer.cpp @@ -360,11 +360,13 @@ class SICacheControl { /// between memory instructions to enforce the order they become visible as /// observed by other memory instructions executing in memory scope \p Scope. /// \p IsCrossAddrSpaceOrdering indicates if the memory ordering is between - /// address spaces. Returns true iff any instructions inserted. + /// address spaces. If \p AtomicsOnly is true, only insert waits for counters + /// that are used by atomic instructions. + /// Returns true iff any instructions inserted. virtual bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, Position Pos, - AtomicOrdering Order) const = 0; + AtomicOrdering Order, bool AtomicsOnly) const = 0; /// Inserts any necessary instructions at position \p Pos relative to /// instruction \p MI to ensure any subsequent memory instructions of this @@ -437,7 +439,7 @@ class SIGfx6CacheControl : public SICacheControl { bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, Position Pos, - AtomicOrdering Order) const override; + AtomicOrdering Order, bool AtomicsOnly) const override; bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -484,7 +486,7 @@ class SIGfx90ACacheControl : public SIGfx7CacheControl { bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, Position Pos, - AtomicOrdering Order) const override; + AtomicOrdering Order, bool AtomicsOnly) const override; bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -572,7 +574,7 @@ class SIGfx10CacheControl : public SIGfx7CacheControl { bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, Position Pos, - AtomicOrdering Order) const override; + AtomicOrdering Order, bool AtomicsOnly) const override; bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, @@ -629,7 +631,7 @@ class SIGfx12CacheControl : public SIGfx11CacheControl { bool insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, Position Pos, - AtomicOrdering Order) const override; + AtomicOrdering Order, bool AtomicsOnly) const override; bool insertAcquire(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, Position Pos) const override; @@ -1120,7 +1122,8 @@ bool SIGfx6CacheControl::enableVolatileAndOrNonTemporal( // observable outside the program, so no need to cause a waitcnt for LDS // address space operations. Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, - Position::AFTER, AtomicOrdering::Unordered); + Position::AFTER, AtomicOrdering::Unordered, + /*AtomicsOnly=*/false); return Changed; } @@ -1140,7 +1143,8 @@ bool SIGfx6CacheControl::insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, Position Pos, - AtomicOrdering Order) const { + AtomicOrdering Order, + bool AtomicsOnly) const { bool Changed = false; MachineBasicBlock &MBB = *MI->getParent(); @@ -1294,7 +1298,8 @@ bool SIGfx6CacheControl::insertRelease(MachineBasicBlock::iterator &MI, bool IsCrossAddrSpaceOrdering, Position Pos) const { return insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, - IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release); + IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release, + /*AtomicsOnly=*/false); } bool SIGfx7CacheControl::insertAcquire(MachineBasicBlock::iterator &MI, @@ -1447,7 +1452,8 @@ bool SIGfx90ACacheControl::enableVolatileAndOrNonTemporal( // observable outside the program, so no need to cause a waitcnt for LDS // address space operations. Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, - Position::AFTER, AtomicOrdering::Unordered); + Position::AFTER, AtomicOrdering::Unordered, + /*AtomicsOnly=*/false); return Changed; } @@ -1467,8 +1473,8 @@ bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, - Position Pos, - AtomicOrdering Order) const { + Position Pos, AtomicOrdering Order, + bool AtomicsOnly) const { if (ST.isTgSplitEnabled()) { // In threadgroup split mode the waves of a work-group can be executing on // different CUs. Therefore need to wait for global or GDS memory operations @@ -1488,7 +1494,8 @@ bool SIGfx90ACacheControl::insertWait(MachineBasicBlock::iterator &MI, AddrSpace &= ~SIAtomicAddrSpace::LDS; } return SIGfx7CacheControl::insertWait(MI, Scope, AddrSpace, Op, - IsCrossAddrSpaceOrdering, Pos, Order); + IsCrossAddrSpaceOrdering, Pos, Order, + AtomicsOnly); } bool SIGfx90ACacheControl::insertAcquire(MachineBasicBlock::iterator &MI, @@ -1747,7 +1754,8 @@ bool SIGfx940CacheControl::enableVolatileAndOrNonTemporal( // observable outside the program, so no need to cause a waitcnt for LDS // address space operations. Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, - Position::AFTER, AtomicOrdering::Unordered); + Position::AFTER, AtomicOrdering::Unordered, + /*AtomicsOnly=*/false); return Changed; } @@ -1904,7 +1912,8 @@ bool SIGfx940CacheControl::insertRelease(MachineBasicBlock::iterator &MI, // Ensure the necessary S_WAITCNT needed by any "BUFFER_WBL2" as well as other // S_WAITCNT needed. Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, - IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release); + IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release, + /*AtomicsOnly=*/false); return Changed; } @@ -1984,7 +1993,8 @@ bool SIGfx10CacheControl::enableVolatileAndOrNonTemporal( // observable outside the program, so no need to cause a waitcnt for LDS // address space operations. Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, - Position::AFTER, AtomicOrdering::Unordered); + Position::AFTER, AtomicOrdering::Unordered, + /*AtomicsOnly=*/false); return Changed; } @@ -2007,7 +2017,8 @@ bool SIGfx10CacheControl::insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, - Position Pos, AtomicOrdering Order) const { + Position Pos, AtomicOrdering Order, + bool AtomicsOnly) const { bool Changed = false; MachineBasicBlock &MBB = *MI->getParent(); @@ -2281,7 +2292,8 @@ bool SIGfx11CacheControl::enableVolatileAndOrNonTemporal( // observable outside the program, so no need to cause a waitcnt for LDS // address space operations. Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, - Position::AFTER, AtomicOrdering::Unordered); + Position::AFTER, AtomicOrdering::Unordered, + /*AtomicsOnly=*/false); return Changed; } @@ -2354,7 +2366,8 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI, SIAtomicScope Scope, SIAtomicAddrSpace AddrSpace, SIMemOp Op, bool IsCrossAddrSpaceOrdering, - Position Pos, AtomicOrdering Order) const { + Position Pos, AtomicOrdering Order, + bool AtomicsOnly) const { bool Changed = false; MachineBasicBlock &MBB = *MI->getParent(); @@ -2444,7 +2457,7 @@ bool SIGfx12CacheControl::insertWait(MachineBasicBlock::iterator &MI, // // This also applies to fences. Fences cannot pair with an instruction // tracked with bvh/samplecnt as we don't have any atomics that do that. - if (Order != AtomicOrdering::Acquire && ST.hasImageInsts()) { + if (!AtomicsOnly && ST.hasImageInsts()) { BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_BVHCNT_soft)).addImm(0); BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_WAIT_SAMPLECNT_soft)).addImm(0); } @@ -2587,7 +2600,8 @@ bool SIGfx12CacheControl::insertRelease(MachineBasicBlock::iterator &MI, // complete, whether we inserted a WB or not. If we inserted a WB (storecnt), // we of course need to wait for that as well. Changed |= insertWait(MI, Scope, AddrSpace, SIMemOp::LOAD | SIMemOp::STORE, - IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release); + IsCrossAddrSpaceOrdering, Pos, AtomicOrdering::Release, + /*AtomicsOnly=*/false); return Changed; } @@ -2624,7 +2638,8 @@ bool SIGfx12CacheControl::enableVolatileAndOrNonTemporal( // observable outside the program, so no need to cause a waitcnt for LDS // address space operations. Changed |= insertWait(MI, SIAtomicScope::SYSTEM, AddrSpace, Op, false, - Position::AFTER, AtomicOrdering::Unordered); + Position::AFTER, AtomicOrdering::Unordered, + /*AtomicsOnly=*/false); } return Changed; @@ -2748,13 +2763,15 @@ bool SIMemoryLegalizer::expandLoad(const SIMemOpInfo &MOI, Changed |= CC->insertWait(MI, MOI.getScope(), MOI.getOrderingAddrSpace(), SIMemOp::LOAD | SIMemOp::STORE, MOI.getIsCrossAddressSpaceOrdering(), - Position::BEFORE, Order); + Position::BEFORE, Order, /*AtomicsOnly=*/false); if (Order == AtomicOrdering::Acquire || Order == AtomicOrdering::SequentiallyConsistent) { - Changed |= CC->insertWait( - MI, MOI.getScope(), MOI.getInstrAddrSpace(), SIMemOp::LOAD, - MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order); + // The wait below only needs to wait on the prior atomic. + Changed |= + CC->insertWait(MI, MOI.getScope(), MOI.getInstrAddrSpace(), + SIMemOp::LOAD, MOI.getIsCrossAddressSpaceOrdering(), + Position::AFTER, Order, /*AtomicsOnly=*/true); Changed |= CC->insertAcquire(MI, MOI.getScope(), MOI.getOrderingAddrSpace(), Position::AFTER); @@ -2830,9 +2847,11 @@ bool SIMemoryLegalizer::expandAtomicFence(const SIMemOpInfo &MOI, if (MOI.isAtomic()) { const AtomicOrdering Order = MOI.getOrdering(); if (Order == AtomicOrdering::Acquire) { - Changed |= CC->insertWait( - MI, MOI.getScope(), OrderingAddrSpace, SIMemOp::LOAD | SIMemOp::STORE, - MOI.getIsCrossAddressSpaceOrdering(), Position::BEFORE, Order); + // Acquire fences only need to wait on the previous atomic they pair with. + Changed |= CC->insertWait(MI, MOI.getScope(), OrderingAddrSpace, + SIMemOp::LOAD | SIMemOp::STORE, + MOI.getIsCrossAddressSpaceOrdering(), + Position::BEFORE, Order, /*AtomicsOnly=*/true); } if (Order == AtomicOrdering::Release || @@ -2897,10 +2916,12 @@ bool SIMemoryLegalizer::expandAtomicCmpxchgOrRmw(const SIMemOpInfo &MOI, Order == AtomicOrdering::SequentiallyConsistent || MOI.getFailureOrdering() == AtomicOrdering::Acquire || MOI.getFailureOrdering() == AtomicOrdering::SequentiallyConsistent) { - Changed |= CC->insertWait( - MI, MOI.getScope(), MOI.getInstrAddrSpace(), - isAtomicRet(*MI) ? SIMemOp::LOAD : SIMemOp::STORE, - MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, Order); + // Only wait on the previous atomic. + Changed |= + CC->insertWait(MI, MOI.getScope(), MOI.getInstrAddrSpace(), + isAtomicRet(*MI) ? SIMemOp::LOAD : SIMemOp::STORE, + MOI.getIsCrossAddressSpaceOrdering(), Position::AFTER, + Order, /*AtomicsOnly=*/true); Changed |= CC->insertAcquire(MI, MOI.getScope(), MOI.getOrderingAddrSpace(), Position::AFTER); diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll index 4caaad646378d..9e2906cf85432 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll @@ -795,8 +795,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -816,8 +814,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_load( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2942,8 +2938,6 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -2964,8 +2958,6 @@ define amdgpu_kernel void @flat_agent_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -3196,8 +3188,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -3218,8 +3208,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9001,8 +8989,6 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9027,8 +9013,6 @@ define amdgpu_kernel void @flat_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9349,8 +9333,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9375,8 +9357,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9677,8 +9657,6 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9699,8 +9677,6 @@ define amdgpu_kernel void @flat_agent_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -10335,8 +10311,6 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -10361,8 +10335,6 @@ define amdgpu_kernel void @flat_agent_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -10683,8 +10655,6 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -10709,8 +10679,6 @@ define amdgpu_kernel void @flat_agent_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -11031,8 +10999,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -11057,8 +11023,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -11379,8 +11343,6 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -11405,8 +11367,6 @@ define amdgpu_kernel void @flat_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12071,8 +12031,6 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12097,8 +12055,6 @@ define amdgpu_kernel void @flat_agent_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12419,8 +12375,6 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12445,8 +12399,6 @@ define amdgpu_kernel void @flat_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12767,8 +12719,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12793,8 +12743,6 @@ define amdgpu_kernel void @flat_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -13632,8 +13580,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -13654,8 +13600,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_load( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -15789,8 +15733,6 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -15812,8 +15754,6 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -16054,8 +15994,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -16077,8 +16015,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -21829,8 +21765,6 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -21856,8 +21790,6 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -22188,8 +22120,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22215,8 +22145,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -22527,8 +22455,6 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22550,8 +22476,6 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -23207,8 +23131,6 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -23234,8 +23156,6 @@ define amdgpu_kernel void @flat_agent_one_as_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -23566,8 +23486,6 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -23593,8 +23511,6 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -23925,8 +23841,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -23952,8 +23866,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -24284,8 +24196,6 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -24311,8 +24221,6 @@ define amdgpu_kernel void @flat_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -24998,8 +24906,6 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -25025,8 +24931,6 @@ define amdgpu_kernel void @flat_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -25357,8 +25261,6 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -25384,8 +25286,6 @@ define amdgpu_kernel void @flat_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -25716,8 +25616,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -25743,8 +25641,6 @@ define amdgpu_kernel void @flat_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll index 9ea9f1125c726..27283beb4b877 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-cluster.ll @@ -795,8 +795,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -816,8 +814,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_load( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2937,8 +2933,6 @@ define amdgpu_kernel void @flat_cluster_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -2959,8 +2953,6 @@ define amdgpu_kernel void @flat_cluster_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -3190,8 +3182,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -3212,8 +3202,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -8982,8 +8970,6 @@ define amdgpu_kernel void @flat_cluster_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9008,8 +8994,6 @@ define amdgpu_kernel void @flat_cluster_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9329,8 +9313,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9355,8 +9337,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9656,8 +9636,6 @@ define amdgpu_kernel void @flat_cluster_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9678,8 +9656,6 @@ define amdgpu_kernel void @flat_cluster_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -10314,8 +10290,6 @@ define amdgpu_kernel void @flat_cluster_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -10340,8 +10314,6 @@ define amdgpu_kernel void @flat_cluster_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -10661,8 +10633,6 @@ define amdgpu_kernel void @flat_cluster_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -10687,8 +10657,6 @@ define amdgpu_kernel void @flat_cluster_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -11008,8 +10976,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -11034,8 +11000,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -11355,8 +11319,6 @@ define amdgpu_kernel void @flat_cluster_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -11381,8 +11343,6 @@ define amdgpu_kernel void @flat_cluster_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12045,8 +12005,6 @@ define amdgpu_kernel void @flat_cluster_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12071,8 +12029,6 @@ define amdgpu_kernel void @flat_cluster_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12392,8 +12348,6 @@ define amdgpu_kernel void @flat_cluster_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12418,8 +12372,6 @@ define amdgpu_kernel void @flat_cluster_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12739,8 +12691,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12765,8 +12715,6 @@ define amdgpu_kernel void @flat_cluster_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -13603,8 +13551,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -13625,8 +13571,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_load( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -15755,8 +15699,6 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -15778,8 +15720,6 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -16019,8 +15959,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -16042,8 +15980,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -21781,8 +21717,6 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -21808,8 +21742,6 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -22139,8 +22071,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22166,8 +22096,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -22477,8 +22405,6 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22500,8 +22426,6 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -23157,8 +23081,6 @@ define amdgpu_kernel void @flat_cluster_one_as_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -23184,8 +23106,6 @@ define amdgpu_kernel void @flat_cluster_one_as_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -23515,8 +23435,6 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -23542,8 +23460,6 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -23873,8 +23789,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -23900,8 +23814,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -24231,8 +24143,6 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -24258,8 +24168,6 @@ define amdgpu_kernel void @flat_cluster_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -24943,8 +24851,6 @@ define amdgpu_kernel void @flat_cluster_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -24970,8 +24876,6 @@ define amdgpu_kernel void @flat_cluster_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -25301,8 +25205,6 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -25328,8 +25230,6 @@ define amdgpu_kernel void @flat_cluster_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -25659,8 +25559,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -25686,8 +25584,6 @@ define amdgpu_kernel void @flat_cluster_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll index 265c8c409f2d6..74065146fd385 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll @@ -799,8 +799,6 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -820,8 +818,6 @@ define amdgpu_kernel void @flat_system_seq_cst_load( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -2979,8 +2975,6 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -3002,8 +2996,6 @@ define amdgpu_kernel void @flat_system_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -3239,8 +3231,6 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -3262,8 +3252,6 @@ define amdgpu_kernel void @flat_system_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9126,8 +9114,6 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9153,8 +9139,6 @@ define amdgpu_kernel void @flat_system_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9480,8 +9464,6 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9507,8 +9489,6 @@ define amdgpu_kernel void @flat_system_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -9811,8 +9791,6 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9833,8 +9811,6 @@ define amdgpu_kernel void @flat_system_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -10476,8 +10452,6 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -10503,8 +10477,6 @@ define amdgpu_kernel void @flat_system_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -10830,8 +10802,6 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -10857,8 +10827,6 @@ define amdgpu_kernel void @flat_system_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -11184,8 +11152,6 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -11211,8 +11177,6 @@ define amdgpu_kernel void @flat_system_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -11538,8 +11502,6 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -11565,8 +11527,6 @@ define amdgpu_kernel void @flat_system_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12242,8 +12202,6 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12269,8 +12227,6 @@ define amdgpu_kernel void @flat_system_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12596,8 +12552,6 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12623,8 +12577,6 @@ define amdgpu_kernel void @flat_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -12950,8 +12902,6 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -12977,8 +12927,6 @@ define amdgpu_kernel void @flat_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -13820,8 +13768,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -13842,8 +13788,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_load( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -16010,8 +15954,6 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -16034,8 +15976,6 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -16281,8 +16221,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -16305,8 +16243,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -22138,8 +22074,6 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22166,8 +22100,6 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -22503,8 +22435,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22531,8 +22461,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -22845,8 +22773,6 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22868,8 +22794,6 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-CU-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -23532,8 +23456,6 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -23560,8 +23482,6 @@ define amdgpu_kernel void @flat_system_one_as_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -23897,8 +23817,6 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -23925,8 +23843,6 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -24262,8 +24178,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -24290,8 +24204,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -24627,8 +24539,6 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -24655,8 +24565,6 @@ define amdgpu_kernel void @flat_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -25353,8 +25261,6 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -25381,8 +25287,6 @@ define amdgpu_kernel void @flat_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -25718,8 +25622,6 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -25746,8 +25648,6 @@ define amdgpu_kernel void @flat_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 @@ -26083,8 +25983,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -26111,8 +26009,6 @@ define amdgpu_kernel void @flat_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll index e7f348e9f7d2f..3826953a8e2ab 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll @@ -786,8 +786,6 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -2837,8 +2835,6 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -3077,8 +3073,6 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -7431,8 +7425,6 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -7765,8 +7757,6 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -8083,8 +8073,6 @@ define amdgpu_kernel void @flat_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -8727,8 +8715,6 @@ define amdgpu_kernel void @flat_workgroup_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9061,8 +9047,6 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9395,8 +9379,6 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -9729,8 +9711,6 @@ define amdgpu_kernel void @flat_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -10395,8 +10375,6 @@ define amdgpu_kernel void @flat_workgroup_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -10729,8 +10707,6 @@ define amdgpu_kernel void @flat_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -11063,8 +11039,6 @@ define amdgpu_kernel void @flat_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -11879,8 +11853,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -13853,8 +13825,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -14082,8 +14052,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_swap_b32 v2, v[0:1], v2 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -19388,8 +19356,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -19711,8 +19677,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -20024,8 +19988,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 ; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -20654,8 +20616,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -20977,8 +20937,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -21300,8 +21258,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -21623,8 +21579,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22267,8 +22221,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22590,8 +22542,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 @@ -22913,8 +22863,6 @@ define amdgpu_kernel void @flat_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: flat_atomic_cmpswap_b32 v2, v[0:1], v[2:3] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0 diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll index 5c2d8eb4f5ec0..493f985c84701 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll @@ -804,8 +804,6 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -822,8 +820,6 @@ define amdgpu_kernel void @global_agent_seq_cst_load( ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -2984,8 +2980,6 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3003,8 +2997,6 @@ define amdgpu_kernel void @global_agent_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3229,8 +3221,6 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3248,8 +3238,6 @@ define amdgpu_kernel void @global_agent_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8606,8 +8594,6 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8629,8 +8615,6 @@ define amdgpu_kernel void @global_agent_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8923,8 +8907,6 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8946,8 +8928,6 @@ define amdgpu_kernel void @global_agent_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9219,8 +9199,6 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9238,8 +9216,6 @@ define amdgpu_kernel void @global_agent_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9814,8 +9790,6 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9837,8 +9811,6 @@ define amdgpu_kernel void @global_agent_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10131,8 +10103,6 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10154,8 +10124,6 @@ define amdgpu_kernel void @global_agent_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10448,8 +10416,6 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10471,8 +10437,6 @@ define amdgpu_kernel void @global_agent_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10765,8 +10729,6 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10788,8 +10750,6 @@ define amdgpu_kernel void @global_agent_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11395,8 +11355,6 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11418,8 +11376,6 @@ define amdgpu_kernel void @global_agent_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11712,8 +11668,6 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11735,8 +11689,6 @@ define amdgpu_kernel void @global_agent_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12029,8 +11981,6 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12052,8 +12002,6 @@ define amdgpu_kernel void @global_agent_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12878,8 +12826,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12896,8 +12842,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_load( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -15058,8 +15002,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -15077,8 +15019,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -15303,8 +15243,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -15322,8 +15260,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20384,8 +20320,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20407,8 +20341,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20701,8 +20633,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20724,8 +20654,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20997,8 +20925,6 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21016,8 +20942,6 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21592,8 +21516,6 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21615,8 +21537,6 @@ define amdgpu_kernel void @global_agent_one_as_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21909,8 +21829,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21932,8 +21850,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22226,8 +22142,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22249,8 +22163,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22543,8 +22455,6 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22566,8 +22476,6 @@ define amdgpu_kernel void @global_agent_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23173,8 +23081,6 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23196,8 +23102,6 @@ define amdgpu_kernel void @global_agent_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23490,8 +23394,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23513,8 +23415,6 @@ define amdgpu_kernel void @global_agent_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23807,8 +23707,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23830,8 +23728,6 @@ define amdgpu_kernel void @global_agent_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-cluster.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-cluster.ll index 7c0a2ad5cdb78..40257df684990 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-cluster.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-cluster.ll @@ -804,8 +804,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -822,8 +820,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_load( ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -2979,8 +2975,6 @@ define amdgpu_kernel void @global_cluster_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -2998,8 +2992,6 @@ define amdgpu_kernel void @global_cluster_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3223,8 +3215,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3242,8 +3232,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8587,8 +8575,6 @@ define amdgpu_kernel void @global_cluster_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8610,8 +8596,6 @@ define amdgpu_kernel void @global_cluster_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8903,8 +8887,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8926,8 +8908,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9198,8 +9178,6 @@ define amdgpu_kernel void @global_cluster_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9217,8 +9195,6 @@ define amdgpu_kernel void @global_cluster_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9793,8 +9769,6 @@ define amdgpu_kernel void @global_cluster_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9816,8 +9790,6 @@ define amdgpu_kernel void @global_cluster_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10109,8 +10081,6 @@ define amdgpu_kernel void @global_cluster_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10132,8 +10102,6 @@ define amdgpu_kernel void @global_cluster_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10425,8 +10393,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10448,8 +10414,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10741,8 +10705,6 @@ define amdgpu_kernel void @global_cluster_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10764,8 +10726,6 @@ define amdgpu_kernel void @global_cluster_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11369,8 +11329,6 @@ define amdgpu_kernel void @global_cluster_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11392,8 +11350,6 @@ define amdgpu_kernel void @global_cluster_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11685,8 +11641,6 @@ define amdgpu_kernel void @global_cluster_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11708,8 +11662,6 @@ define amdgpu_kernel void @global_cluster_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12001,8 +11953,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12024,8 +11974,6 @@ define amdgpu_kernel void @global_cluster_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12849,8 +12797,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12867,8 +12813,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_load( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -15024,8 +14968,6 @@ define amdgpu_kernel void @global_cluster_one_as_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -15043,8 +14985,6 @@ define amdgpu_kernel void @global_cluster_one_as_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -15268,8 +15208,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -15287,8 +15225,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20337,8 +20273,6 @@ define amdgpu_kernel void @global_cluster_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20360,8 +20294,6 @@ define amdgpu_kernel void @global_cluster_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20653,8 +20585,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20676,8 +20606,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20948,8 +20876,6 @@ define amdgpu_kernel void @global_cluster_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20967,8 +20893,6 @@ define amdgpu_kernel void @global_cluster_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21543,8 +21467,6 @@ define amdgpu_kernel void @global_cluster_one_as_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21566,8 +21488,6 @@ define amdgpu_kernel void @global_cluster_one_as_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21859,8 +21779,6 @@ define amdgpu_kernel void @global_cluster_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21882,8 +21800,6 @@ define amdgpu_kernel void @global_cluster_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22175,8 +22091,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22198,8 +22112,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22491,8 +22403,6 @@ define amdgpu_kernel void @global_cluster_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22514,8 +22424,6 @@ define amdgpu_kernel void @global_cluster_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23119,8 +23027,6 @@ define amdgpu_kernel void @global_cluster_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23142,8 +23048,6 @@ define amdgpu_kernel void @global_cluster_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23435,8 +23339,6 @@ define amdgpu_kernel void @global_cluster_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23458,8 +23360,6 @@ define amdgpu_kernel void @global_cluster_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23751,8 +23651,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_DEV ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -23774,8 +23672,6 @@ define amdgpu_kernel void @global_cluster_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_DEV -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_DEV ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll index e7880a81800fd..ee5a8bf742fe7 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll @@ -808,8 +808,6 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -826,8 +824,6 @@ define amdgpu_kernel void @global_system_seq_cst_load( ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3021,8 +3017,6 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3041,8 +3035,6 @@ define amdgpu_kernel void @global_system_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3272,8 +3264,6 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3292,8 +3282,6 @@ define amdgpu_kernel void @global_system_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -7235,8 +7223,6 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -7259,8 +7245,6 @@ define amdgpu_kernel void @global_system_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -7558,8 +7542,6 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -7582,8 +7564,6 @@ define amdgpu_kernel void @global_system_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -7857,8 +7837,6 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -7876,8 +7854,6 @@ define amdgpu_kernel void @global_system_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8459,8 +8435,6 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8483,8 +8457,6 @@ define amdgpu_kernel void @global_system_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8782,8 +8754,6 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8806,8 +8776,6 @@ define amdgpu_kernel void @global_system_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9105,8 +9073,6 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9129,8 +9095,6 @@ define amdgpu_kernel void @global_system_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9428,8 +9392,6 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9452,8 +9414,6 @@ define amdgpu_kernel void @global_system_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10070,8 +10030,6 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10094,8 +10052,6 @@ define amdgpu_kernel void @global_system_relese_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10393,8 +10349,6 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10417,8 +10371,6 @@ define amdgpu_kernel void @global_system_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10716,8 +10668,6 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10740,8 +10690,6 @@ define amdgpu_kernel void @global_system_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11570,8 +11518,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11588,8 +11534,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_load( ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: s_wait_kmcnt 0x0 ; GFX12-CU-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -13783,8 +13727,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -13803,8 +13745,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -14034,8 +13974,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -14054,8 +13992,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_ret_atomicrmw( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19493,8 +19429,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19517,8 +19451,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19816,8 +19748,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19840,8 +19770,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_monotonic_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20115,8 +20043,6 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20134,8 +20060,6 @@ define amdgpu_kernel void @global_system_one_as_monotonic_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-CU-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20717,8 +20641,6 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20741,8 +20663,6 @@ define amdgpu_kernel void @global_system_one_as_release_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21040,8 +20960,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21064,8 +20982,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21363,8 +21279,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21387,8 +21301,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21686,8 +21598,6 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21710,8 +21620,6 @@ define amdgpu_kernel void @global_system_one_as_monotonic_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22328,8 +22236,6 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22352,8 +22258,6 @@ define amdgpu_kernel void @global_system_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22651,8 +22555,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22675,8 +22577,6 @@ define amdgpu_kernel void @global_system_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22974,8 +22874,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SYS ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22998,8 +22896,6 @@ define amdgpu_kernel void @global_system_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: s_wait_storecnt 0x0 ; GFX12-CU-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SYS -; GFX12-CU-NEXT: s_wait_bvhcnt 0x0 -; GFX12-CU-NEXT: s_wait_samplecnt 0x0 ; GFX12-CU-NEXT: s_wait_loadcnt 0x0 ; GFX12-CU-NEXT: global_inv scope:SCOPE_SYS ; GFX12-CU-NEXT: global_store_b32 v0, v1, s[0:1] diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll index 885edec03c2b6..b755c5d615dda 100644 --- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll +++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll @@ -780,8 +780,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -2827,8 +2825,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -3050,8 +3046,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8037,8 +8031,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8332,8 +8324,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_monotonic_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -8610,8 +8600,6 @@ define amdgpu_kernel void @global_workgroup_monotonic_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9175,8 +9163,6 @@ define amdgpu_kernel void @global_workgroup_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9470,8 +9456,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -9765,8 +9749,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10060,8 +10042,6 @@ define amdgpu_kernel void @global_workgroup_monotonic_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10648,8 +10628,6 @@ define amdgpu_kernel void @global_workgroup_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -10943,8 +10921,6 @@ define amdgpu_kernel void @global_workgroup_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -11238,8 +11214,6 @@ define amdgpu_kernel void @global_workgroup_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -12052,8 +12026,6 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_load( ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: s_wait_kmcnt 0x0 ; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -14051,8 +14023,6 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -14266,8 +14236,6 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_ret_atomicrmw( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_swap_b32 v1, v0, v1, s[0:1] th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19149,8 +19117,6 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_monotonic_ret_cmpxchg ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19436,8 +19402,6 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_monotonic_ret_cmpxchg ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -19713,8 +19677,6 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_acquire_ret_cmpxchg ; GFX12-WGP-NEXT: ; kill: def $vgpr1 killed $vgpr1 def $vgpr1_vgpr2 killed $exec ; GFX12-WGP-NEXT: v_mov_b32_e32 v2, v3 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20271,8 +20233,6 @@ define amdgpu_kernel void @global_workgroup_one_as_release_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20558,8 +20518,6 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -20845,8 +20803,6 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_acquire_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21132,8 +21088,6 @@ define amdgpu_kernel void @global_workgroup_one_as_monotonic_seq_cst_ret_cmpxchg ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21704,8 +21658,6 @@ define amdgpu_kernel void @global_workgroup_one_as_release_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -21991,8 +21943,6 @@ define amdgpu_kernel void @global_workgroup_one_as_acq_rel_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] @@ -22278,8 +22228,6 @@ define amdgpu_kernel void @global_workgroup_one_as_seq_cst_seq_cst_ret_cmpxchg( ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: s_wait_storecnt 0x0 ; GFX12-WGP-NEXT: global_atomic_cmpswap_b32 v1, v0, v[1:2], s[0:1] offset:16 th:TH_ATOMIC_RETURN scope:SCOPE_SE -; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0 -; GFX12-WGP-NEXT: s_wait_samplecnt 0x0 ; GFX12-WGP-NEXT: s_wait_loadcnt 0x0 ; GFX12-WGP-NEXT: global_inv scope:SCOPE_SE ; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]