Expand Up
@@ -16,9 +16,9 @@ declare double @llvm.amdgcn.raw.buffer.atomic.fmax.f64(double, <4 x i32>, i32, i
define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64 (<4 x i32 > inreg %rsrc , double %data , i32 %vindex ) {
; SI-LABEL: raw_buffer_atomic_min_noret_f64:
; SI: ; %bb.0: ; %main_body
; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
; SI-NEXT: s_load_dword s6, s[2:3 ], 0xf
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
; SI-NEXT: s_load_dword s6, s[0:1 ], 0xf
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -28,9 +28,9 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc
;
; GFX7-LABEL: raw_buffer_atomic_min_noret_f64:
; GFX7: ; %bb.0: ; %main_body
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
; GFX7-NEXT: s_load_dword s6, s[2:3 ], 0xf
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
; GFX7-NEXT: s_load_dword s6, s[0:1 ], 0xf
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -41,22 +41,22 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc
; GFX10-LABEL: raw_buffer_atomic_min_noret_f64:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_clause 0x2
; GFX10-NEXT: s_load_dwordx2 s[0:1 ], s[2:3 ], 0x34
; GFX10-NEXT: s_load_dword s8, s[2:3 ], 0x3c
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3 ], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3 ], s[0:1 ], 0x34
; GFX10-NEXT: s_load_dword s8, s[0:1 ], 0x3c
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1 ], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: v_mov_b32_e32 v2, s8
; GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[4:7], 0 offen
; GFX10-NEXT: s_endpgm
;
; GFX1030-LABEL: raw_buffer_atomic_min_noret_f64:
; GFX1030: ; %bb.0: ; %main_body
; GFX1030-NEXT: s_clause 0x2
; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0x34
; GFX1030-NEXT: s_load_dword s6, s[2:3 ], 0x3c
; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0x34
; GFX1030-NEXT: s_load_dword s6, s[0:1 ], 0x3c
; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v0, s4
; GFX1030-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -66,9 +66,9 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc
;
; G_SI-LABEL: raw_buffer_atomic_min_noret_f64:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
; G_SI-NEXT: s_load_dword s6, s[2:3 ], 0xf
; G_SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
; G_SI-NEXT: s_load_dword s6, s[0:1 ], 0xf
; G_SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
; G_SI-NEXT: s_waitcnt lgkmcnt(0)
; G_SI-NEXT: v_mov_b32_e32 v0, s4
; G_SI-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -78,9 +78,9 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc
;
; G_GFX7-LABEL: raw_buffer_atomic_min_noret_f64:
; G_GFX7: ; %bb.0: ; %main_body
; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
; G_GFX7-NEXT: s_load_dword s6, s[2:3 ], 0xf
; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
; G_GFX7-NEXT: s_load_dword s6, s[0:1 ], 0xf
; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -91,22 +91,22 @@ define amdgpu_kernel void @raw_buffer_atomic_min_noret_f64(<4 x i32> inreg %rsrc
; G_GFX10-LABEL: raw_buffer_atomic_min_noret_f64:
; G_GFX10: ; %bb.0: ; %main_body
; G_GFX10-NEXT: s_clause 0x2
; G_GFX10-NEXT: s_load_dwordx2 s[0:1 ], s[2:3 ], 0x34
; G_GFX10-NEXT: s_load_dword s8, s[2:3 ], 0x3c
; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3 ], 0x24
; G_GFX10-NEXT: s_load_dwordx2 s[2:3 ], s[0:1 ], 0x34
; G_GFX10-NEXT: s_load_dword s8, s[0:1 ], 0x3c
; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1 ], 0x24
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: v_mov_b32_e32 v0, s0
; G_GFX10-NEXT: v_mov_b32_e32 v1, s1
; G_GFX10-NEXT: v_mov_b32_e32 v0, s2
; G_GFX10-NEXT: v_mov_b32_e32 v1, s3
; G_GFX10-NEXT: v_mov_b32_e32 v2, s8
; G_GFX10-NEXT: buffer_atomic_fmin_x2 v[0:1], v2, s[4:7], 0 offen
; G_GFX10-NEXT: s_endpgm
;
; G_GFX1030-LABEL: raw_buffer_atomic_min_noret_f64:
; G_GFX1030: ; %bb.0: ; %main_body
; G_GFX1030-NEXT: s_clause 0x2
; G_GFX1030-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0x34
; G_GFX1030-NEXT: s_load_dword s6, s[2:3 ], 0x3c
; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
; G_GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0x34
; G_GFX1030-NEXT: s_load_dword s6, s[0:1 ], 0x3c
; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4
; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5
Expand Down
Expand Up
@@ -253,9 +253,9 @@ main_body:
define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64 (<4 x i32 > inreg %rsrc , double %data , i32 %vindex ) {
; SI-LABEL: raw_buffer_atomic_max_noret_f64:
; SI: ; %bb.0: ; %main_body
; SI-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
; SI-NEXT: s_load_dword s6, s[2:3 ], 0xf
; SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
; SI-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
; SI-NEXT: s_load_dword s6, s[0:1 ], 0xf
; SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s4
; SI-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -265,9 +265,9 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc
;
; GFX7-LABEL: raw_buffer_atomic_max_noret_f64:
; GFX7: ; %bb.0: ; %main_body
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
; GFX7-NEXT: s_load_dword s6, s[2:3 ], 0xf
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
; GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
; GFX7-NEXT: s_load_dword s6, s[0:1 ], 0xf
; GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s4
; GFX7-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -278,22 +278,22 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc
; GFX10-LABEL: raw_buffer_atomic_max_noret_f64:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_clause 0x2
; GFX10-NEXT: s_load_dwordx2 s[0:1 ], s[2:3 ], 0x34
; GFX10-NEXT: s_load_dword s8, s[2:3 ], 0x3c
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3 ], 0x24
; GFX10-NEXT: s_load_dwordx2 s[2:3 ], s[0:1 ], 0x34
; GFX10-NEXT: s_load_dword s8, s[0:1 ], 0x3c
; GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1 ], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s0
; GFX10-NEXT: v_mov_b32_e32 v1, s1
; GFX10-NEXT: v_mov_b32_e32 v0, s2
; GFX10-NEXT: v_mov_b32_e32 v1, s3
; GFX10-NEXT: v_mov_b32_e32 v2, s8
; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[4:7], 0 offen
; GFX10-NEXT: s_endpgm
;
; GFX1030-LABEL: raw_buffer_atomic_max_noret_f64:
; GFX1030: ; %bb.0: ; %main_body
; GFX1030-NEXT: s_clause 0x2
; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0x34
; GFX1030-NEXT: s_load_dword s6, s[2:3 ], 0x3c
; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
; GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0x34
; GFX1030-NEXT: s_load_dword s6, s[0:1 ], 0x3c
; GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v0, s4
; GFX1030-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -303,9 +303,9 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc
;
; G_SI-LABEL: raw_buffer_atomic_max_noret_f64:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
; G_SI-NEXT: s_load_dword s6, s[2:3 ], 0xf
; G_SI-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
; G_SI-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
; G_SI-NEXT: s_load_dword s6, s[0:1 ], 0xf
; G_SI-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
; G_SI-NEXT: s_waitcnt lgkmcnt(0)
; G_SI-NEXT: v_mov_b32_e32 v0, s4
; G_SI-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -315,9 +315,9 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc
;
; G_GFX7-LABEL: raw_buffer_atomic_max_noret_f64:
; G_GFX7: ; %bb.0: ; %main_body
; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0xd
; G_GFX7-NEXT: s_load_dword s6, s[2:3 ], 0xf
; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x9
; G_GFX7-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0xd
; G_GFX7-NEXT: s_load_dword s6, s[0:1 ], 0xf
; G_GFX7-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x9
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
; G_GFX7-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -328,22 +328,22 @@ define amdgpu_kernel void @raw_buffer_atomic_max_noret_f64(<4 x i32> inreg %rsrc
; G_GFX10-LABEL: raw_buffer_atomic_max_noret_f64:
; G_GFX10: ; %bb.0: ; %main_body
; G_GFX10-NEXT: s_clause 0x2
; G_GFX10-NEXT: s_load_dwordx2 s[0:1 ], s[2:3 ], 0x34
; G_GFX10-NEXT: s_load_dword s8, s[2:3 ], 0x3c
; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[2:3 ], 0x24
; G_GFX10-NEXT: s_load_dwordx2 s[2:3 ], s[0:1 ], 0x34
; G_GFX10-NEXT: s_load_dword s8, s[0:1 ], 0x3c
; G_GFX10-NEXT: s_load_dwordx4 s[4:7], s[0:1 ], 0x24
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: v_mov_b32_e32 v0, s0
; G_GFX10-NEXT: v_mov_b32_e32 v1, s1
; G_GFX10-NEXT: v_mov_b32_e32 v0, s2
; G_GFX10-NEXT: v_mov_b32_e32 v1, s3
; G_GFX10-NEXT: v_mov_b32_e32 v2, s8
; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[4:7], 0 offen
; G_GFX10-NEXT: s_endpgm
;
; G_GFX1030-LABEL: raw_buffer_atomic_max_noret_f64:
; G_GFX1030: ; %bb.0: ; %main_body
; G_GFX1030-NEXT: s_clause 0x2
; G_GFX1030-NEXT: s_load_dwordx2 s[4:5], s[2:3 ], 0x34
; G_GFX1030-NEXT: s_load_dword s6, s[2:3 ], 0x3c
; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[2:3 ], 0x24
; G_GFX1030-NEXT: s_load_dwordx2 s[4:5], s[0:1 ], 0x34
; G_GFX1030-NEXT: s_load_dword s6, s[0:1 ], 0x3c
; G_GFX1030-NEXT: s_load_dwordx4 s[0:3], s[0:1 ], 0x24
; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4
; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5
Expand Down
Expand Up
@@ -424,7 +424,7 @@ main_body:
define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc (<4 x i32 > inreg %rsrc , double %data , i32 %vindex , ptr addrspace (3 ) %out ) {
; SI-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
; SI: ; %bb.0: ; %main_body
; SI-NEXT: s_load_dwordx8 s[0:7], s[2:3 ], 0x9
; SI-NEXT: s_load_dwordx8 s[0:7], s[0:1 ], 0x9
; SI-NEXT: s_mov_b32 m0, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s4
Expand All
@@ -438,7 +438,7 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
;
; GFX7-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
; GFX7: ; %bb.0: ; %main_body
; GFX7-NEXT: s_load_dwordx8 s[0:7], s[2:3 ], 0x9
; GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1 ], 0x9
; GFX7-NEXT: s_mov_b32 m0, -1
; GFX7-NEXT: s_waitcnt lgkmcnt(0)
; GFX7-NEXT: v_mov_b32_e32 v0, s4
Expand All
@@ -452,20 +452,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
;
; GFX10-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
; GFX10: ; %bb.0: ; %main_body
; GFX10-NEXT: s_load_dwordx8 s[0:7 ], s[2:3 ], 0x24
; GFX10-NEXT: s_load_dwordx8 s[4:11 ], s[0:1 ], 0x24
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: v_mov_b32_e32 v0, s4
; GFX10-NEXT: v_mov_b32_e32 v1, s5
; GFX10-NEXT: v_mov_b32_e32 v2, s6
; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3 ], 4 offen glc slc
; GFX10-NEXT: v_mov_b32_e32 v2, s7
; GFX10-NEXT: v_mov_b32_e32 v0, s8
; GFX10-NEXT: v_mov_b32_e32 v1, s9
; GFX10-NEXT: v_mov_b32_e32 v2, s10
; GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[4:7 ], 4 offen glc slc
; GFX10-NEXT: v_mov_b32_e32 v2, s11
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: ds_write_b64 v2, v[0:1]
; GFX10-NEXT: s_endpgm
;
; GFX1030-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
; GFX1030: ; %bb.0: ; %main_body
; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[2:3 ], 0x24
; GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1 ], 0x24
; GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; GFX1030-NEXT: v_mov_b32_e32 v0, s4
; GFX1030-NEXT: v_mov_b32_e32 v1, s5
Expand All
@@ -478,7 +478,7 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
;
; G_SI-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
; G_SI: ; %bb.0: ; %main_body
; G_SI-NEXT: s_load_dwordx8 s[0:7], s[2:3 ], 0x9
; G_SI-NEXT: s_load_dwordx8 s[0:7], s[0:1 ], 0x9
; G_SI-NEXT: s_mov_b32 m0, -1
; G_SI-NEXT: s_waitcnt lgkmcnt(0)
; G_SI-NEXT: v_mov_b32_e32 v0, s4
Expand All
@@ -492,7 +492,7 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
;
; G_GFX7-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
; G_GFX7: ; %bb.0: ; %main_body
; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[2:3 ], 0x9
; G_GFX7-NEXT: s_load_dwordx8 s[0:7], s[0:1 ], 0x9
; G_GFX7-NEXT: s_mov_b32 m0, -1
; G_GFX7-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX7-NEXT: v_mov_b32_e32 v0, s4
Expand All
@@ -506,20 +506,20 @@ define amdgpu_kernel void @raw_buffer_atomic_max_rtn_f64_off4_slc(<4 x i32> inre
;
; G_GFX10-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
; G_GFX10: ; %bb.0: ; %main_body
; G_GFX10-NEXT: s_load_dwordx8 s[0:7 ], s[2:3 ], 0x24
; G_GFX10-NEXT: s_load_dwordx8 s[4:11 ], s[0:1 ], 0x24
; G_GFX10-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX10-NEXT: v_mov_b32_e32 v0, s4
; G_GFX10-NEXT: v_mov_b32_e32 v1, s5
; G_GFX10-NEXT: v_mov_b32_e32 v2, s6
; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[0:3 ], 4 offen glc slc
; G_GFX10-NEXT: v_mov_b32_e32 v2, s7
; G_GFX10-NEXT: v_mov_b32_e32 v0, s8
; G_GFX10-NEXT: v_mov_b32_e32 v1, s9
; G_GFX10-NEXT: v_mov_b32_e32 v2, s10
; G_GFX10-NEXT: buffer_atomic_fmax_x2 v[0:1], v2, s[4:7 ], 4 offen glc slc
; G_GFX10-NEXT: v_mov_b32_e32 v2, s11
; G_GFX10-NEXT: s_waitcnt vmcnt(0)
; G_GFX10-NEXT: ds_write_b64 v2, v[0:1]
; G_GFX10-NEXT: s_endpgm
;
; G_GFX1030-LABEL: raw_buffer_atomic_max_rtn_f64_off4_slc:
; G_GFX1030: ; %bb.0: ; %main_body
; G_GFX1030-NEXT: s_load_dwordx8 s[0:7], s[2:3 ], 0x24
; G_GFX1030-NEXT: s_load_dwordx8 s[0:7], s[0:1 ], 0x24
; G_GFX1030-NEXT: s_waitcnt lgkmcnt(0)
; G_GFX1030-NEXT: v_mov_b32_e32 v0, s4
; G_GFX1030-NEXT: v_mov_b32_e32 v1, s5
Expand Down