30 changes: 30 additions & 0 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-global.ll
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX12-WGP-LABEL: workgroup_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -209,6 +210,7 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX12-WGP-LABEL: workgroup_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -281,6 +283,7 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX12-WGP-LABEL: workgroup_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -421,6 +424,7 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX12-WGP-LABEL: workgroup_one_as_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -492,6 +496,7 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX12-WGP-LABEL: workgroup_one_as_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -564,6 +569,7 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX12-WGP-LABEL: workgroup_one_as_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -739,6 +745,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX12-WGP-LABEL: agent_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -747,6 +754,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX12-CU-LABEL: agent_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -835,6 +843,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX12-WGP-LABEL: agent_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -844,6 +853,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX12-CU-LABEL: agent_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -933,6 +943,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX12-WGP-LABEL: agent_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -942,6 +953,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX12-CU-LABEL: agent_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1113,6 +1125,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX12-WGP-LABEL: agent_one_as_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1121,6 +1134,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX12-CU-LABEL: agent_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1209,6 +1223,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX12-WGP-LABEL: agent_one_as_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1218,6 +1233,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX12-CU-LABEL: agent_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1307,6 +1323,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX12-WGP-LABEL: agent_one_as_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1316,6 +1333,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX12-CU-LABEL: agent_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1491,6 +1509,7 @@ define amdgpu_kernel void @system_release_fence() {
;
; GFX12-WGP-LABEL: system_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1499,6 +1518,7 @@ define amdgpu_kernel void @system_release_fence() {
;
; GFX12-CU-LABEL: system_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1591,6 +1611,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
;
; GFX12-WGP-LABEL: system_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1600,6 +1621,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
;
; GFX12-CU-LABEL: system_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1693,6 +1715,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
;
; GFX12-WGP-LABEL: system_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1702,6 +1725,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
;
; GFX12-CU-LABEL: system_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1877,6 +1901,7 @@ define amdgpu_kernel void @system_one_as_release_fence() {
;
; GFX12-WGP-LABEL: system_one_as_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1885,6 +1910,7 @@ define amdgpu_kernel void @system_one_as_release_fence() {
;
; GFX12-CU-LABEL: system_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1977,6 +2003,7 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
;
; GFX12-WGP-LABEL: system_one_as_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -1986,6 +2013,7 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
;
; GFX12-CU-LABEL: system_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -2079,6 +2107,7 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
;
; GFX12-WGP-LABEL: system_one_as_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -2088,6 +2117,7 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
;
; GFX12-CU-LABEL: system_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down
18 changes: 0 additions & 18 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-fence-mmra-local.ll
Original file line number Diff line number Diff line change
Expand Up @@ -137,12 +137,10 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX12-WGP-LABEL: workgroup_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: workgroup_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") release, !mmra !{!"amdgpu-as", !"local"}
Expand Down Expand Up @@ -205,12 +203,10 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX12-WGP-LABEL: workgroup_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: workgroup_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") acq_rel, !mmra !{!"amdgpu-as", !"local"}
Expand Down Expand Up @@ -273,12 +269,10 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX12-WGP-LABEL: workgroup_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: workgroup_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("workgroup") seq_cst, !mmra !{!"amdgpu-as", !"local"}
Expand Down Expand Up @@ -637,12 +631,10 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX12-WGP-LABEL: agent_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: agent_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") release, !mmra !{!"amdgpu-as", !"local"}
Expand Down Expand Up @@ -705,12 +697,10 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX12-WGP-LABEL: agent_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: agent_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") acq_rel, !mmra !{!"amdgpu-as", !"local"}
Expand Down Expand Up @@ -773,12 +763,10 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX12-WGP-LABEL: agent_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: agent_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence syncscope("agent") seq_cst, !mmra !{!"amdgpu-as", !"local"}
Expand Down Expand Up @@ -1137,12 +1125,10 @@ define amdgpu_kernel void @system_release_fence() {
;
; GFX12-WGP-LABEL: system_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence release, !mmra !{!"amdgpu-as", !"local"}
Expand Down Expand Up @@ -1205,12 +1191,10 @@ define amdgpu_kernel void @system_acq_rel_fence() {
;
; GFX12-WGP-LABEL: system_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence acq_rel, !mmra !{!"amdgpu-as", !"local"}
Expand Down Expand Up @@ -1273,12 +1257,10 @@ define amdgpu_kernel void @system_seq_cst_fence() {
;
; GFX12-WGP-LABEL: system_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: s_wait_dscnt 0x0
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: system_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: s_wait_dscnt 0x0
; GFX12-CU-NEXT: s_endpgm
entry:
fence seq_cst, !mmra !{!"amdgpu-as", !"local"}
Expand Down
30 changes: 30 additions & 0 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-fence.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1065,6 +1065,7 @@ define amdgpu_kernel void @workgroup_release_fence() {
;
; GFX12-WGP-LABEL: workgroup_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1144,6 +1145,7 @@ define amdgpu_kernel void @workgroup_acq_rel_fence() {
;
; GFX12-WGP-LABEL: workgroup_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1224,6 +1226,7 @@ define amdgpu_kernel void @workgroup_seq_cst_fence() {
;
; GFX12-WGP-LABEL: workgroup_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1365,6 +1368,7 @@ define amdgpu_kernel void @workgroup_one_as_release_fence() {
;
; GFX12-WGP-LABEL: workgroup_one_as_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1436,6 +1440,7 @@ define amdgpu_kernel void @workgroup_one_as_acq_rel_fence() {
;
; GFX12-WGP-LABEL: workgroup_one_as_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1508,6 +1513,7 @@ define amdgpu_kernel void @workgroup_one_as_seq_cst_fence() {
;
; GFX12-WGP-LABEL: workgroup_one_as_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -1683,6 +1689,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX12-WGP-LABEL: agent_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand All @@ -1691,6 +1698,7 @@ define amdgpu_kernel void @agent_release_fence() {
;
; GFX12-CU-LABEL: agent_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1779,6 +1787,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX12-WGP-LABEL: agent_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand All @@ -1788,6 +1797,7 @@ define amdgpu_kernel void @agent_acq_rel_fence() {
;
; GFX12-CU-LABEL: agent_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1877,6 +1887,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX12-WGP-LABEL: agent_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand All @@ -1886,6 +1897,7 @@ define amdgpu_kernel void @agent_seq_cst_fence() {
;
; GFX12-CU-LABEL: agent_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2057,6 +2069,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX12-WGP-LABEL: agent_one_as_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -2065,6 +2078,7 @@ define amdgpu_kernel void @agent_one_as_release_fence() {
;
; GFX12-CU-LABEL: agent_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -2153,6 +2167,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX12-WGP-LABEL: agent_one_as_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -2162,6 +2177,7 @@ define amdgpu_kernel void @agent_one_as_acq_rel_fence() {
;
; GFX12-CU-LABEL: agent_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -2251,6 +2267,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX12-WGP-LABEL: agent_one_as_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_DEV
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -2260,6 +2277,7 @@ define amdgpu_kernel void @agent_one_as_seq_cst_fence() {
;
; GFX12-CU-LABEL: agent_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_DEV
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -2435,6 +2453,7 @@ define amdgpu_kernel void @system_release_fence() {
;
; GFX12-WGP-LABEL: system_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand All @@ -2443,6 +2462,7 @@ define amdgpu_kernel void @system_release_fence() {
;
; GFX12-CU-LABEL: system_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2535,6 +2555,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
;
; GFX12-WGP-LABEL: system_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand All @@ -2544,6 +2565,7 @@ define amdgpu_kernel void @system_acq_rel_fence() {
;
; GFX12-CU-LABEL: system_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2637,6 +2659,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
;
; GFX12-WGP-LABEL: system_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand All @@ -2646,6 +2669,7 @@ define amdgpu_kernel void @system_seq_cst_fence() {
;
; GFX12-CU-LABEL: system_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2821,6 +2845,7 @@ define amdgpu_kernel void @system_one_as_release_fence() {
;
; GFX12-WGP-LABEL: system_one_as_release_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -2829,6 +2854,7 @@ define amdgpu_kernel void @system_one_as_release_fence() {
;
; GFX12-CU-LABEL: system_one_as_release_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -2921,6 +2947,7 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
;
; GFX12-WGP-LABEL: system_one_as_acq_rel_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -2930,6 +2957,7 @@ define amdgpu_kernel void @system_one_as_acq_rel_fence() {
;
; GFX12-CU-LABEL: system_one_as_acq_rel_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -3023,6 +3051,7 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
;
; GFX12-WGP-LABEL: system_one_as_seq_cst_fence:
; GFX12-WGP: ; %bb.0: ; %entry
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SYS
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand All @@ -3032,6 +3061,7 @@ define amdgpu_kernel void @system_one_as_seq_cst_fence() {
;
; GFX12-CU-LABEL: system_one_as_seq_cst_fence:
; GFX12-CU: ; %bb.0: ; %entry
; GFX12-CU-NEXT: global_wb scope:SCOPE_SYS
; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
; GFX12-CU-NEXT: s_wait_samplecnt 0x0
; GFX12-CU-NEXT: s_wait_loadcnt 0x0
Expand Down
468 changes: 292 additions & 176 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-agent.ll

Large diffs are not rendered by default.

468 changes: 292 additions & 176 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-system.ll

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-volatile.ll
Original file line number Diff line number Diff line change
Expand Up @@ -901,7 +901,7 @@ define amdgpu_kernel void @flat_volatile_workgroup_acquire_load(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3
; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] th:TH_LOAD_NT
; GFX12-WGP-NEXT: flat_load_b32 v2, v[0:1] scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
Expand Down Expand Up @@ -1018,11 +1018,12 @@ define amdgpu_kernel void @flat_volatile_workgroup_release_store(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s3
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2
; GFX12-WGP-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: flat_volatile_workgroup_release_store:
Expand Down
222 changes: 138 additions & 84 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-flat-workgroup.ll

Large diffs are not rendered by default.

462 changes: 288 additions & 174 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-global-agent.ll

Large diffs are not rendered by default.

438 changes: 272 additions & 166 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-global-system.ll

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-global-volatile.ll
Original file line number Diff line number Diff line change
Expand Up @@ -837,7 +837,7 @@ define amdgpu_kernel void @global_volatile_workgroup_acquire_load(
; GFX12-WGP-NEXT: s_load_b64 s[2:3], s[0:1], 0x0
; GFX12-WGP-NEXT: s_load_b64 s[0:1], s[0:1], 0x8
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] th:TH_LOAD_NT
; GFX12-WGP-NEXT: global_load_b32 v1, v0, s[2:3] scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
Expand Down Expand Up @@ -972,11 +972,12 @@ define amdgpu_kernel void @global_volatile_workgroup_release_store(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, 0
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
; GFX12-WGP-NEXT: s_wait_loadcnt_dscnt 0x0
; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1]
; GFX12-WGP-NEXT: global_store_b32 v0, v1, s[0:1] scope:SCOPE_SE
; GFX12-WGP-NEXT: s_endpgm
;
; GFX12-CU-LABEL: global_volatile_workgroup_release_store:
Expand Down
234 changes: 146 additions & 88 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-global-workgroup.ll

Large diffs are not rendered by default.

29 changes: 29 additions & 0 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-local-agent.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ define amdgpu_kernel void @local_agent_release_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1298,6 +1299,7 @@ define amdgpu_kernel void @local_agent_seq_cst_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1752,6 +1754,7 @@ define amdgpu_kernel void @local_agent_release_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1923,6 +1926,7 @@ define amdgpu_kernel void @local_agent_acq_rel_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2097,6 +2101,7 @@ define amdgpu_kernel void @local_agent_seq_cst_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2486,6 +2491,7 @@ define amdgpu_kernel void @local_agent_acq_rel_ret_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2692,6 +2698,7 @@ define amdgpu_kernel void @local_agent_seq_cst_ret_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3270,6 +3277,7 @@ define amdgpu_kernel void @local_agent_release_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3481,6 +3489,7 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3695,6 +3704,7 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4301,6 +4311,7 @@ define amdgpu_kernel void @local_agent_release_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4515,6 +4526,7 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4729,6 +4741,7 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4943,6 +4956,7 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5157,6 +5171,7 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5371,6 +5386,7 @@ define amdgpu_kernel void @local_agent_release_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5585,6 +5601,7 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5799,6 +5816,7 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6480,6 +6498,7 @@ define amdgpu_kernel void @local_agent_release_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6723,6 +6742,7 @@ define amdgpu_kernel void @local_agent_acq_rel_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6967,6 +6987,7 @@ define amdgpu_kernel void @local_agent_seq_cst_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -7663,6 +7684,7 @@ define amdgpu_kernel void @local_agent_release_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -7907,6 +7929,7 @@ define amdgpu_kernel void @local_agent_acq_rel_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8151,6 +8174,7 @@ define amdgpu_kernel void @local_agent_seq_cst_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8395,6 +8419,7 @@ define amdgpu_kernel void @local_agent_monotonic_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8639,6 +8664,7 @@ define amdgpu_kernel void @local_agent_acquire_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8883,6 +8909,7 @@ define amdgpu_kernel void @local_agent_release_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -9127,6 +9154,7 @@ define amdgpu_kernel void @local_agent_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -9371,6 +9399,7 @@ define amdgpu_kernel void @local_agent_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-local-system.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ define amdgpu_kernel void @local_system_release_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1298,6 +1299,7 @@ define amdgpu_kernel void @local_system_seq_cst_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1752,6 +1754,7 @@ define amdgpu_kernel void @local_system_release_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1923,6 +1926,7 @@ define amdgpu_kernel void @local_system_acq_rel_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2097,6 +2101,7 @@ define amdgpu_kernel void @local_system_seq_cst_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2486,6 +2491,7 @@ define amdgpu_kernel void @local_system_acq_rel_ret_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2692,6 +2698,7 @@ define amdgpu_kernel void @local_system_seq_cst_ret_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3270,6 +3277,7 @@ define amdgpu_kernel void @local_system_release_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3481,6 +3489,7 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3695,6 +3704,7 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4301,6 +4311,7 @@ define amdgpu_kernel void @local_system_release_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4515,6 +4526,7 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4729,6 +4741,7 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4943,6 +4956,7 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5157,6 +5171,7 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5371,6 +5386,7 @@ define amdgpu_kernel void @local_system_release_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5585,6 +5601,7 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5799,6 +5816,7 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6480,6 +6498,7 @@ define amdgpu_kernel void @local_system_release_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6723,6 +6742,7 @@ define amdgpu_kernel void @local_system_acq_rel_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6967,6 +6987,7 @@ define amdgpu_kernel void @local_system_seq_cst_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -7663,6 +7684,7 @@ define amdgpu_kernel void @local_system_release_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -7907,6 +7929,7 @@ define amdgpu_kernel void @local_system_acq_rel_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8151,6 +8174,7 @@ define amdgpu_kernel void @local_system_seq_cst_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8395,6 +8419,7 @@ define amdgpu_kernel void @local_system_monotonic_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8639,6 +8664,7 @@ define amdgpu_kernel void @local_system_acquire_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8883,6 +8909,7 @@ define amdgpu_kernel void @local_system_release_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -9127,6 +9154,7 @@ define amdgpu_kernel void @local_system_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -9371,6 +9399,7 @@ define amdgpu_kernel void @local_system_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -834,6 +834,7 @@ define amdgpu_kernel void @local_volatile_workgroup_release_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/AMDGPU/memory-legalizer-local-workgroup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,7 @@ define amdgpu_kernel void @local_workgroup_release_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1298,6 +1299,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_store(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1752,6 +1754,7 @@ define amdgpu_kernel void @local_workgroup_release_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -1923,6 +1926,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2097,6 +2101,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2486,6 +2491,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_ret_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -2692,6 +2698,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_ret_atomicrmw(
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3270,6 +3277,7 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3481,6 +3489,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -3695,6 +3704,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4301,6 +4311,7 @@ define amdgpu_kernel void @local_workgroup_release_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4515,6 +4526,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4729,6 +4741,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -4943,6 +4956,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5157,6 +5171,7 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5371,6 +5386,7 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5585,6 +5601,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -5799,6 +5816,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s1
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s0
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6480,6 +6498,7 @@ define amdgpu_kernel void @local_workgroup_release_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6723,6 +6742,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -6967,6 +6987,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_monotonic_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -7663,6 +7684,7 @@ define amdgpu_kernel void @local_workgroup_release_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -7907,6 +7929,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8151,6 +8174,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_acquire_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8395,6 +8419,7 @@ define amdgpu_kernel void @local_workgroup_monotonic_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8639,6 +8664,7 @@ define amdgpu_kernel void @local_workgroup_acquire_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -8883,6 +8909,7 @@ define amdgpu_kernel void @local_workgroup_release_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -9127,6 +9154,7 @@ define amdgpu_kernel void @local_workgroup_acq_rel_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down Expand Up @@ -9371,6 +9399,7 @@ define amdgpu_kernel void @local_workgroup_seq_cst_seq_cst_ret_cmpxchg(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s2
; GFX12-WGP-NEXT: v_mov_b32_e32 v2, s1
; GFX12-WGP-NEXT: global_wb scope:SCOPE_SE
; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
; GFX12-WGP-NEXT: s_wait_storecnt 0x0
Expand Down