Expand Up
@@ -309,21 +309,6 @@ define void @zero_init_foo() {
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: zero_init_foo:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_mov_b32 s1, s0
; GCN-NEXT: s_mov_b32 s2, s0
; GCN-NEXT: s_mov_b32 s3, s0
; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:48
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:32
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:16
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%alloca = alloca [32 x i16 ], align 2 , addrspace (5 )
call void @llvm.memset.p5.i64 (ptr addrspace (5 ) align 2 dereferenceable (64 ) %alloca , i8 0 , i64 64 , i1 false )
ret void
Expand Down
Expand Up
@@ -462,21 +447,6 @@ define amdgpu_kernel void @store_load_sindex_kernel(i32 %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_endpgm
; GCN-LABEL: store_load_sindex_kernel:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s0, s[0:1], 0x24
; GCN-NEXT: v_mov_b32_e32 v0, 15
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_lshl_b32 s1, s0, 2
; GCN-NEXT: s_and_b32 s0, s0, 15
; GCN-NEXT: s_lshl_b32 s0, s0, 2
; GCN-NEXT: s_add_u32 s1, 4, s1
; GCN-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_add_u32 s0, 4, s0
; GCN-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float ], align 4 , addrspace (5 )
%i7 = getelementptr inbounds [32 x float ], ptr addrspace (5 ) %i , i32 0 , i32 %idx
Expand Down
Expand Up
@@ -607,19 +577,6 @@ define amdgpu_ps void @store_load_sindex_foo(i32 inreg %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s1 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_endpgm
; GCN-LABEL: store_load_sindex_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_lshl_b32 s1, s0, 2
; GCN-NEXT: s_and_b32 s0, s0, 15
; GCN-NEXT: s_lshl_b32 s0, s0, 2
; GCN-NEXT: s_add_u32 s1, 4, s1
; GCN-NEXT: v_mov_b32_e32 v0, 15
; GCN-NEXT: scratch_store_dword off, v0, s1 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_add_u32 s0, 4, s0
; GCN-NEXT: scratch_load_dword v0, off, s0 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float ], align 4 , addrspace (5 )
%i7 = getelementptr inbounds [32 x float ], ptr addrspace (5 ) %i , i32 0 , i32 %idx
Expand Down
Expand Up
@@ -740,16 +697,6 @@ define amdgpu_kernel void @store_load_vindex_kernel() {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_endpgm
; GCN-LABEL: store_load_vindex_kernel:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: v_mov_b32_e32 v1, 15
; GCN-NEXT: scratch_store_dword v0, v1, off offset:4 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_sub_u32_e32 v0, 4, v0
; GCN-NEXT: scratch_load_dword v0, v0, off offset:124 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
bb:
%i = alloca [32 x float ], align 4 , addrspace (5 )
%i2 = tail call i32 @llvm.amdgcn.workitem.id.x ()
Expand Down
Expand Up
@@ -860,18 +807,6 @@ define void @store_load_vindex_foo(i32 %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s32 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_vindex_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: scratch_store_dword v1, v2, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: scratch_load_dword v0, v0, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [32 x float ], align 4 , addrspace (5 )
%i7 = getelementptr inbounds [32 x float ], ptr addrspace (5 ) %i , i32 0 , i32 %idx
Expand Down
Expand Up
@@ -947,13 +882,6 @@ define void @private_ptr_foo(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_store_b32 v0, v1, off
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: private_ptr_foo:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v1, 0x41200000
; GCN-NEXT: scratch_store_dword v0, v1, off offset:4
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%gep = getelementptr inbounds float , ptr addrspace (5 ) %arg , i32 1
store float 1 .000000e+01 , ptr addrspace (5 ) %gep , align 4
ret void
Expand Down
Expand Up
@@ -1296,23 +1224,6 @@ define void @zero_init_small_offset_foo() {
; GFX11-PAL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:304
; GFX11-PAL-NEXT: s_waitcnt_vscnt null, 0x0
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: zero_init_small_offset_foo:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: scratch_load_dword v0, off, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_mov_b32 s0, 0
; GCN-NEXT: s_mov_b32 s1, s0
; GCN-NEXT: s_mov_b32 s2, s0
; GCN-NEXT: s_mov_b32 s3, s0
; GCN-NEXT: v_mov_b64_e32 v[0:1], s[0:1]
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[2:3]
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:256
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:272
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:288
; GCN-NEXT: scratch_store_dwordx4 off, v[0:3], s32 offset:304
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
%padding = alloca [64 x i32 ], align 4 , addrspace (5 )
%alloca = alloca [32 x i16 ], align 2 , addrspace (5 )
%pad_gep = getelementptr inbounds [64 x i32 ], ptr addrspace (5 ) %padding , i32 0 , i32 undef
Expand Down
Expand Up
@@ -1970,20 +1881,6 @@ define void @store_load_vindex_small_offset_foo(i32 %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, s32 offset:256 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_vindex_small_offset_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: scratch_load_dword v1, off, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: scratch_store_dword v1, v2, s32 offset:256 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: scratch_load_dword v0, v0, s32 offset:256 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%padding = alloca [64 x i32 ], align 4 , addrspace (5 )
%i = alloca [32 x float ], align 4 , addrspace (5 )
Expand Down
Expand Up
@@ -3048,22 +2945,6 @@ define void @store_load_vindex_large_offset_foo(i32 %idx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v1, vcc_lo glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_vindex_large_offset_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: scratch_load_dword v1, off, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_lshlrev_b32_e32 v1, 2, v0
; GCN-NEXT: v_and_b32_e32 v0, v0, v2
; GCN-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GCN-NEXT: scratch_store_dword v1, v2, vcc_hi sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_lshlrev_b32_e32 v0, 2, v0
; GCN-NEXT: s_add_u32 vcc_hi, s32, 0x4000
; GCN-NEXT: scratch_load_dword v0, v0, vcc_hi sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%padding = alloca [4096 x i32 ], align 4 , addrspace (5 )
%i = alloca [32 x float ], align 4 , addrspace (5 )
Expand Down
Expand Up
@@ -3350,19 +3231,6 @@ define void @store_load_large_imm_offset_foo() {
; GFX11-PAL-NEXT: scratch_load_b32 v0, off, s0 offset:3712 glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_large_imm_offset_foo:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 13
; GCN-NEXT: scratch_store_dword off, v0, s32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v0, 0x3000
; GCN-NEXT: v_mov_b32_e32 v1, 15
; GCN-NEXT: scratch_store_dword v0, v1, s32 offset:3712 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dword v0, v0, s32 offset:3712 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
%i = alloca [4096 x i32 ], align 4 , addrspace (5 )
%i1 = getelementptr inbounds [4096 x i32 ], ptr addrspace (5 ) %i , i32 0 , i32 undef
Expand Down
Expand Up
@@ -3498,17 +3366,6 @@ define amdgpu_kernel void @store_load_vidx_sidx_offset(i32 %sidx) {
; GFX11-PAL-NEXT: scratch_load_b32 v0, v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_endpgm
; GCN-LABEL: store_load_vidx_sidx_offset:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_load_dword s0, s[0:1], 0x24
; GCN-NEXT: v_mov_b32_e32 v1, 15
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_add_lshl_u32 v0, s0, v0, 2
; GCN-NEXT: scratch_store_dword v0, v1, off offset:1028 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dword v0, v0, off offset:1028 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
bb:
%alloca = alloca [32 x i32 ], align 4 , addrspace (5 )
%vidx = tail call i32 @llvm.amdgcn.workitem.id.x ()
Expand Down
Expand Up
@@ -3599,16 +3456,6 @@ define void @store_load_i64_aligned(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_i64_aligned:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile i64 15 , ptr addrspace (5 ) %arg , align 8
%load = load volatile i64 , ptr addrspace (5 ) %arg , align 8
Expand Down
Expand Up
@@ -3694,16 +3541,6 @@ define void @store_load_i64_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_load_b64 v[0:1], v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_i64_unaligned:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 15
; GCN-NEXT: v_mov_b32_e32 v3, 0
; GCN-NEXT: scratch_store_dwordx2 v0, v[2:3], off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dwordx2 v[0:1], v0, off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile i64 15 , ptr addrspace (5 ) %arg , align 1
%load = load volatile i64 , ptr addrspace (5 ) %arg , align 1
Expand Down
Expand Up
@@ -3796,17 +3633,6 @@ define void @store_load_v3i32_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_load_b96 v[0:2], v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_v3i32_unaligned:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 1
; GCN-NEXT: v_mov_b32_e32 v3, 2
; GCN-NEXT: v_mov_b32_e32 v4, 3
; GCN-NEXT: scratch_store_dwordx3 v0, v[2:4], off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dwordx3 v[0:2], v0, off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile <3 x i32 > <i32 1 , i32 2 , i32 3 >, ptr addrspace (5 ) %arg , align 1
%load = load volatile <3 x i32 >, ptr addrspace (5 ) %arg , align 1
Expand Down
Expand Up
@@ -3904,18 +3730,6 @@ define void @store_load_v4i32_unaligned(ptr addrspace(5) nocapture %arg) {
; GFX11-PAL-NEXT: scratch_load_b128 v[0:3], v0, off glc dlc
; GFX11-PAL-NEXT: s_waitcnt vmcnt(0)
; GFX11-PAL-NEXT: s_setpc_b64 s[30:31]
; GCN-LABEL: store_load_v4i32_unaligned:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: v_mov_b32_e32 v2, 1
; GCN-NEXT: v_mov_b32_e32 v3, 2
; GCN-NEXT: v_mov_b32_e32 v4, 3
; GCN-NEXT: v_mov_b32_e32 v5, 4
; GCN-NEXT: scratch_store_dwordx4 v0, v[2:5], off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: scratch_load_dwordx4 v[0:3], v0, off sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
bb:
store volatile <4 x i32 > <i32 1 , i32 2 , i32 3 , i32 4 >, ptr addrspace (5 ) %arg , align 1
%load = load volatile <4 x i32 >, ptr addrspace (5 ) %arg , align 1
Expand Down