Expand Up
@@ -1140,6 +1140,7 @@ define amdgpu_kernel void @local_workgroup_release_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -1298,6 +1299,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -1752,6 +1754,7 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -1923,6 +1926,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -2097,6 +2101,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -2486,6 +2491,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -2692,6 +2698,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -3270,6 +3277,7 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -3481,6 +3489,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -3695,6 +3704,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -4301,6 +4311,7 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -4515,6 +4526,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -4729,6 +4741,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -4943,6 +4956,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -5157,6 +5171,7 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -5371,6 +5386,7 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -5585,6 +5601,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -5799,6 +5816,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -6480,6 +6498,7 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -6723,6 +6742,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -6967,6 +6987,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -7663,6 +7684,7 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -7907,6 +7929,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -8151,6 +8174,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -8395,6 +8419,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -8639,6 +8664,7 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -8883,6 +8909,7 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -9127,6 +9154,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Expand Up
@@ -9371,6 +9399,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down