324 changes: 154 additions & 170 deletions llvm/test/CodeGen/AMDGPU/indirect-call.ll

Large diffs are not rendered by default.

28 changes: 5 additions & 23 deletions llvm/test/CodeGen/AMDGPU/kernel-vgpr-spill-mubuf-with-voffset.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,30 +13,17 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: s_addc_u32 flat_scratch_hi, s13, 0
; CHECK-NEXT: s_add_u32 s0, s0, s17
; CHECK-NEXT: s_addc_u32 s1, s1, 0
; CHECK-NEXT: ; implicit-def: $vgpr3
; CHECK-NEXT: v_writelane_b32 v3, s16, 0
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: s_add_i32 s12, s33, 0x100200
; CHECK-NEXT: buffer_store_dword v3, off, s[0:3], s12 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: v_writelane_b32 v40, s16, 0
; CHECK-NEXT: s_mov_b32 s13, s15
; CHECK-NEXT: s_mov_b32 s12, s14
; CHECK-NEXT: v_readlane_b32 s14, v3, 0
; CHECK-NEXT: v_readlane_b32 s14, v40, 0
; CHECK-NEXT: s_mov_b64 s[16:17], s[8:9]
; CHECK-NEXT: v_mov_b32_e32 v3, v2
; CHECK-NEXT: v_mov_b32_e32 v2, v1
; CHECK-NEXT: v_mov_b32_e32 v1, v0
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: s_add_i32 s8, s33, 0x100200
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s8 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: s_load_dword s8, s[16:17], 0x0
; CHECK-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; CHECK-NEXT: v_writelane_b32 v0, s8, 1
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: s_add_i32 s8, s33, 0x100200
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s8 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: v_writelane_b32 v40, s8, 1
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; def vgpr10
; CHECK-NEXT: ;;#ASMEND
Expand Down Expand Up @@ -69,14 +56,9 @@ define amdgpu_kernel void @test_kernel(i32 %val) #0 {
; CHECK-NEXT: s_mov_b64 s[2:3], s[22:23]
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: s_add_i32 s4, s33, 0x100200
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: s_add_i32 s4, s33, 0x100100
; CHECK-NEXT: buffer_load_dword v10, off, s[0:3], s4 ; 4-byte Folded Reload
; CHECK-NEXT: s_waitcnt vmcnt(1)
; CHECK-NEXT: v_readlane_b32 s4, v0, 1
; CHECK-NEXT: v_readlane_b32 s4, v40, 1
; CHECK-NEXT: s_mov_b32 s5, 0
; CHECK-NEXT: s_cmp_eq_u32 s4, s5
; CHECK-NEXT: v_mov_b32_e32 v0, 0x4000
Expand Down
296 changes: 148 additions & 148 deletions llvm/test/CodeGen/AMDGPU/load-constant-i16.ll

Large diffs are not rendered by default.

29 changes: 12 additions & 17 deletions llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.ll
Original file line number Diff line number Diff line change
Expand Up @@ -231,10 +231,10 @@ entry:
; W64-O0-DAG: s_mov_b64 [[SAVEEXEC:s\[[0-9]+:[0-9]+\]]], exec

; W64-O0: [[LOOPBB0:.LBB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:44 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
; W64-O0: s_waitcnt vmcnt(0)
; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]]
Expand All @@ -251,7 +251,7 @@ entry:
; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]]
; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]]
; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:4 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 ; 4-byte Folded Reload
; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[S0]]:[[S3]]], {{.*}} idxen
; W64-O0: s_waitcnt vmcnt(0)
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
Expand All @@ -264,18 +264,16 @@ entry:
; W64-O0: s_cbranch_execz [[TERMBB:.LBB[0-9]+_[0-9]+]]

; W64-O0: ; %bb.{{[0-9]+}}: ; %bb1
; W64-O0: buffer_load_dword
; W64-O0: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
; W64-O0-DAG: buffer_store_dword {{v[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF:[0-9]+]] ; 4-byte Folded Spill
; W64-O0-DAG: s_mov_b64 s[[[SAVEEXEC0:[0-9]+]]:[[SAVEEXEC1:[0-9]+]]], exec
; W64-O0: v_writelane_b32 [[VSAVEEXEC:v[0-9]+]], s[[SAVEEXEC0]], [[SAVEEXEC_IDX0:[0-9]+]]
; W64-O0: v_writelane_b32 [[VSAVEEXEC]], s[[SAVEEXEC1]], [[SAVEEXEC_IDX1:[0-9]+]]
; W64-O0: buffer_store_dword [[VSAVEEXEC]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Spill

; W64-O0: [[LOOPBB1:.LBB[0-9]+_[0-9]+]]: ; =>This Inner Loop Header: Depth=1
; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC0:[0-9]+]], off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC1:[0-9]+]], off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC2:[0-9]+]], off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
; W64-O0: buffer_load_dword v[[VRSRC3:[0-9]+]], off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
; W64-O0: s_waitcnt vmcnt(0)
; W64-O0-DAG: v_readfirstlane_b32 s[[S0:[0-9]+]], v[[VRSRC0]]
; W64-O0-DAG: v_readfirstlane_b32 s[[SRSRCTMP1:[0-9]+]], v[[VRSRC1]]
Expand All @@ -292,19 +290,16 @@ entry:
; W64-O0-DAG: s_mov_b32 s[[S2:[0-9]+]], s[[SRSRCTMP2]]
; W64-O0-DAG: s_mov_b32 s[[S3:[0-9]+]], s[[SRSRCTMP3]]
; W64-O0: s_and_saveexec_b64 [[SAVE:s\[[0-9]+:[0-9]+\]]], [[AND]]
; W64-O0: buffer_store_dword
; W64-O0: buffer_load_dword [[IDX:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s32 offset:[[IDX_OFF]] ; 4-byte Folded Reload
; W64-O0: buffer_load_dword
; W64-O0: buffer_load_format_x [[RES:v[0-9]+]], [[IDX]], s[[[S0]]:[[S3]]], {{.*}} idxen
; W64-O0: s_waitcnt vmcnt(0)
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP:[0-9]+]] ; 4-byte Folded Spill
; W64-O0: s_xor_b64 exec, exec, [[SAVE]]
; W64-O0-NEXT: s_cbranch_execnz [[LOOPBB1]]

; W64-O0: buffer_load_dword [[RES:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF_TMP]] ; 4-byte Folded Reload
; W64-O0: buffer_load_dword [[VSAVEEXEC1:v[0-9]+]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} ; 4-byte Folded Reload
; W64-O0: v_readlane_b32 s[[SAVEEXEC0:[0-9]+]], [[VSAVEEXEC1]], [[SAVEEXEC_IDX0]]
; W64-O0: v_readlane_b32 s[[SAVEEXEC1:[0-9]+]], [[VSAVEEXEC1]], [[SAVEEXEC_IDX1]]
; W64-O0: v_readlane_b32 s[[SAVEEXEC0:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX0]]
; W64-O0: v_readlane_b32 s[[SAVEEXEC1:[0-9]+]], [[VSAVEEXEC]], [[SAVEEXEC_IDX1]]
; W64-O0: s_mov_b64 exec, s[[[SAVEEXEC0]]:[[SAVEEXEC1]]]
; W64-O0: buffer_store_dword [[RES]], off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:[[RES_OFF]] ; 4-byte Folded Spill

Expand Down
47 changes: 23 additions & 24 deletions llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
Original file line number Diff line number Diff line change
Expand Up @@ -191,49 +191,48 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: s_mov_b32 s4, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: ; implicit-def: $vgpr42
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: s_addk_i32 s32, 0x800
; GFX9-NEXT: v_writelane_b32 v42, s30, 0
; GFX9-NEXT: v_writelane_b32 v42, s31, 1
; GFX9-NEXT: v_writelane_b32 v42, s34, 2
; GFX9-NEXT: v_writelane_b32 v40, s34, 2
; GFX9-NEXT: v_writelane_b32 v44, s4, 0
; GFX9-NEXT: v_writelane_b32 v42, s36, 3
; GFX9-NEXT: v_writelane_b32 v40, s36, 3
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12
; GFX9-NEXT: v_writelane_b32 v42, s37, 4
; GFX9-NEXT: v_writelane_b32 v40, s37, 4
; GFX9-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_mov_b32_e32 v40, v1
; GFX9-NEXT: v_mov_b32_e32 v41, v0
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v41, v40
; GFX9-NEXT: v_mov_b32_e32 v41, v1
; GFX9-NEXT: v_mov_b32_e32 v42, v0
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v42, v41
; GFX9-NEXT: s_mov_b32 s34, s15
; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v40
; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v41
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: v_mad_u32_u24 v40, v41, v40, v43
; GFX9-NEXT: v_mad_u32_u24 v41, v42, v41, v43
; GFX9-NEXT: s_mov_b32 s15, s34
; GFX9-NEXT: v_mov_b32_e32 v0, v40
; GFX9-NEXT: v_mov_b32_e32 v0, v41
; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: v_add_u32_e32 v0, v40, v43
; GFX9-NEXT: v_add_u32_e32 v0, v41, v43
; GFX9-NEXT: s_mov_b32 s15, s34
; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s37, v42, 4
; GFX9-NEXT: v_readlane_b32 s36, v42, 3
; GFX9-NEXT: v_readlane_b32 s34, v42, 2
; GFX9-NEXT: v_readlane_b32 s31, v42, 1
; GFX9-NEXT: v_readlane_b32 s30, v42, 0
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s37, v40, 4
; GFX9-NEXT: v_readlane_b32 s36, v40, 3
; GFX9-NEXT: v_readlane_b32 s34, v40, 2
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s4, v44, 0
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v44, off, s[0:3], s33 offset:16 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: s_addk_i32 s32, 0xf800
Expand Down
68 changes: 24 additions & 44 deletions llvm/test/CodeGen/AMDGPU/need-fp-from-vgpr-spills.ll
Original file line number Diff line number Diff line change
Expand Up @@ -27,19 +27,15 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-LABEL: csr_vgpr_spill_fp_callee:
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s14, s33
; CHECK-NEXT: s_mov_b32 s6, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0x400
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: v_writelane_b32 v0, s30, 0
; CHECK-NEXT: v_writelane_b32 v0, s31, 1
; CHECK-NEXT: s_or_saveexec_b64 s[12:13], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[12:13]
; CHECK-NEXT: v_writelane_b32 v1, s30, 0
; CHECK-NEXT: v_writelane_b32 v1, s31, 1
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12
Expand All @@ -48,21 +44,17 @@ define internal fastcc void @csr_vgpr_spill_fp_callee() #0 {
; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9]
; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11]
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_or_saveexec_b64 s[12:13], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[12:13]
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; clobber csr v40
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_readlane_b32 s31, v0, 1
; CHECK-NEXT: v_readlane_b32 s30, v0, 0
; CHECK-NEXT: v_readlane_b32 s31, v1, 1
; CHECK-NEXT: v_readlane_b32 s30, v1, 0
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
; CHECK-NEXT: s_mov_b32 s33, s14
; CHECK-NEXT: s_mov_b32 s33, s6
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
bb:
Expand Down Expand Up @@ -99,22 +91,21 @@ define internal fastcc void @csr_vgpr_spill_fp_tailcall_callee() #0 {
; CHECK: ; %bb.0: ; %bb
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: v_writelane_b32 v0, s33, 0
; CHECK-NEXT: v_writelane_b32 v1, s33, 0
; CHECK-NEXT: ;;#ASMSTART
; CHECK-NEXT: ; clobber csr v40
; CHECK-NEXT: ;;#ASMEND
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, callee_has_fp@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, callee_has_fp@rel32@hi+12
; CHECK-NEXT: v_readlane_b32 s33, v0, 0
; CHECK-NEXT: v_readlane_b32 s33, v1, 0
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; CHECK-NEXT: s_xor_saveexec_b64 s[8:9], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[8:9]
; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_setpc_b64 s[4:5]
bb:
call void asm sideeffect "; clobber csr v40", "~{v40}"()
Expand Down Expand Up @@ -161,13 +152,12 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 {
; CHECK-LABEL: caller_save_vgpr_spill_fp_tail_call:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s12, s33
; CHECK-NEXT: s_mov_b32 s6, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0x400
; CHECK-NEXT: ; implicit-def: $vgpr1
; CHECK-NEXT: v_writelane_b32 v1, s30, 0
; CHECK-NEXT: v_writelane_b32 v1, s31, 1
; CHECK-NEXT: s_getpc_b64 s[4:5]
Expand All @@ -184,7 +174,7 @@ define hidden i32 @caller_save_vgpr_spill_fp_tail_call() #0 {
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
; CHECK-NEXT: s_mov_b32 s33, s12
; CHECK-NEXT: s_mov_b32 s33, s6
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
Expand All @@ -196,19 +186,14 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 {
; CHECK-LABEL: caller_save_vgpr_spill_fp:
; CHECK: ; %bb.0: ; %entry
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s13, s33
; CHECK-NEXT: s_mov_b32 s7, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v2, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0x400
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: v_writelane_b32 v0, s30, 0
; CHECK-NEXT: v_writelane_b32 v0, s31, 1
; CHECK-NEXT: s_or_saveexec_b64 s[14:15], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[14:15]
; CHECK-NEXT: v_writelane_b32 v2, s30, 0
; CHECK-NEXT: v_writelane_b32 v2, s31, 1
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, caller_save_vgpr_spill_fp_tail_call@rel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, caller_save_vgpr_spill_fp_tail_call@rel32@hi+12
Expand All @@ -217,18 +202,13 @@ define hidden i32 @caller_save_vgpr_spill_fp() #0 {
; CHECK-NEXT: s_mov_b64 s[0:1], s[8:9]
; CHECK-NEXT: s_mov_b64 s[2:3], s[10:11]
; CHECK-NEXT: s_swappc_b64 s[30:31], s[4:5]
; CHECK-NEXT: s_or_saveexec_b64 s[14:15], -1
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[14:15]
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_readlane_b32 s31, v1, 1
; CHECK-NEXT: v_readlane_b32 s30, v1, 0
; CHECK-NEXT: v_readlane_b32 s31, v2, 1
; CHECK-NEXT: v_readlane_b32 s30, v2, 0
; CHECK-NEXT: s_xor_saveexec_b64 s[4:5], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v1, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v2, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[4:5]
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
; CHECK-NEXT: s_mov_b32 s33, s13
; CHECK-NEXT: s_mov_b32 s33, s7
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: s_setpc_b64 s[30:31]
entry:
Expand Down
38 changes: 12 additions & 26 deletions llvm/test/CodeGen/AMDGPU/no-source-locations-in-prologue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,14 @@ define hidden void @_ZL3barv() #0 !dbg !1644 {
; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; CHECK-NEXT: s_mov_b32 s16, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_xor_saveexec_b64 s[18:19], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, -1
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
; CHECK-NEXT: v_writelane_b32 v40, s34, 0
; CHECK-NEXT: v_writelane_b32 v40, s35, 1
; CHECK-NEXT: v_writelane_b32 v40, s16, 2
; CHECK-NEXT: v_writelane_b32 v41, s16, 0
; CHECK-NEXT: s_add_i32 s32, s32, 0x400
; CHECK-NEXT: ; implicit-def: $vgpr0
; CHECK-NEXT: v_writelane_b32 v0, s30, 0
; CHECK-NEXT: v_writelane_b32 v0, s31, 1
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: buffer_store_dword v0, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: .loc 0 31 3 prologue_end ; lane-info.cpp:31:3
; CHECK-NEXT: s_getpc_b64 s[16:17]
Expand All @@ -43,20 +36,13 @@ define hidden void @_ZL3barv() #0 !dbg !1644 {
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
; CHECK-NEXT: s_swappc_b64 s[30:31], s[16:17]
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: s_or_saveexec_b64 s[34:35], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[34:35]
; CHECK-NEXT: .loc 0 32 1 ; lane-info.cpp:32:1
; CHECK-NEXT: s_waitcnt vmcnt(0)
; CHECK-NEXT: v_readlane_b32 s31, v0, 1
; CHECK-NEXT: v_readlane_b32 s30, v0, 0
; CHECK-NEXT: v_readlane_b32 s34, v40, 0
; CHECK-NEXT: v_readlane_b32 s35, v40, 1
; CHECK-NEXT: v_readlane_b32 s4, v40, 2
; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v41, 0
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: .loc 0 32 1 epilogue_begin is_stmt 0 ; lane-info.cpp:32:1
; CHECK-NEXT: s_add_i32 s32, s32, 0xfffffc00
Expand Down
1,053 changes: 502 additions & 551 deletions llvm/test/CodeGen/AMDGPU/partial-sgpr-to-vgpr-spills.ll

Large diffs are not rendered by default.

389 changes: 17 additions & 372 deletions llvm/test/CodeGen/AMDGPU/scc-clobbered-sgpr-to-vmem-spill.ll

Large diffs are not rendered by default.

26 changes: 8 additions & 18 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill-dead-frame-in-dbg-value.mir
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck -check-prefix=SGPR_SPILL %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs --start-before=si-lower-sgpr-spills --stop-after=prologepilog -o - %s | FileCheck -check-prefix=PEI %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s

# After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, replace the dead frame index in the DBG_VALUE instruction with reg 0.
# Otherwise, the test would crash during PEI while trying to replace the dead frame index.
Expand Down Expand Up @@ -41,21 +39,13 @@ machineFunctionInfo:
workGroupIDX: { reg: '$sgpr8' }
privateSegmentWaveByteOffset: { reg: '$sgpr9' }
body: |
; SGPR_SPILL-LABEL: name: test
; SGPR_SPILL: bb.0:
; SGPR_SPILL: [[VGPR:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; SGPR_SPILL: [[VGPR]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 0, [[VGPR]]
; SGPR_SPILL: DBG_VALUE $noreg, 0
; SGPR_SPILL: bb.1:
; SGPR_SPILL: $sgpr10 = V_READLANE_B32 [[VGPR]], 0
; SGPR_SPILL: S_ENDPGM 0
; PEI-LABEL: name: test
; PEI: bb.0:
; PEI: renamable $[[VGPR:vgpr[0-9]+]] = IMPLICIT_DEF
; PEI: renamable $[[VGPR]] = V_WRITELANE_B32 killed $sgpr10, 0, killed $[[VGPR]]
; PEI: bb.1:
; PEI: $sgpr10 = V_READLANE_B32 killed $[[VGPR]], 0
; PEI: S_ENDPGM 0
; CHECK-LABEL: name: test
; CHECK: bb.0:
; CHECK: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 0, $vgpr0
; CHECK: DBG_VALUE $noreg, 0
; CHECK: bb.1:
; CHECK: $sgpr10 = V_READLANE_B32 $vgpr0, 0
; CHECK: S_ENDPGM 0
bb.0:
renamable $sgpr10 = IMPLICIT_DEF
SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=true -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s

# After handling the SGPR spill to VGPR in SILowerSGPRSpills pass, we replace the dead frame index in the DBG_VALUE instruction with reg 0.
# Skip looking for frame indices in the debug value instruction for incoming arguments passed via stack. The test would crash otherwise.
Expand Down Expand Up @@ -45,7 +45,7 @@ machineFunctionInfo:
body: |
; CHECK-LABEL: name: test
; CHECK: bb.0:
; CHECK: DBG_VALUE
; CHECK: DBG_VALUE $noreg, 0
bb.0:
renamable $sgpr10 = IMPLICIT_DEF
SI_SPILL_S32_SAVE killed $sgpr10, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
Expand Down
290 changes: 144 additions & 146 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill-no-vgprs.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -O0 -mtriple=amdgcn-amd-amdhsa -mcpu=hawaii -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s

; This test was originally written when SGPRs are spilled directly to physical VGPRs and
; stressed a case when there wasn't enough VGPRs to accommodate all spills.
; When we started spilling them into virtual VGPR lanes, we always succeed in doing so.
; The regalloc pass later takes care of allocating VGPRs to these virtual registers.
; The first 64 SGPR spills can go to a VGPR, but there isn't a second
; so some spills must be to memory. The last 16 element spill runs out of lanes at the 15th element.

define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %out, i32 %in) #1 {
; GCN-LABEL: partial_no_vgprs_last_sgpr_spill:
Expand All @@ -25,179 +23,179 @@ define amdgpu_kernel void @partial_no_vgprs_last_sgpr_spill(ptr addrspace(1) %ou
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[8:23]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: v_writelane_b32 v0, s8, 0
; GCN-NEXT: v_writelane_b32 v0, s9, 1
; GCN-NEXT: v_writelane_b32 v0, s10, 2
; GCN-NEXT: v_writelane_b32 v0, s11, 3
; GCN-NEXT: v_writelane_b32 v0, s12, 4
; GCN-NEXT: v_writelane_b32 v0, s13, 5
; GCN-NEXT: v_writelane_b32 v0, s14, 6
; GCN-NEXT: v_writelane_b32 v0, s15, 7
; GCN-NEXT: v_writelane_b32 v0, s16, 8
; GCN-NEXT: v_writelane_b32 v0, s17, 9
; GCN-NEXT: v_writelane_b32 v0, s18, 10
; GCN-NEXT: v_writelane_b32 v0, s19, 11
; GCN-NEXT: v_writelane_b32 v0, s20, 12
; GCN-NEXT: v_writelane_b32 v0, s21, 13
; GCN-NEXT: v_writelane_b32 v0, s22, 14
; GCN-NEXT: v_writelane_b32 v0, s23, 15
; GCN-NEXT: v_writelane_b32 v23, s8, 0
; GCN-NEXT: v_writelane_b32 v23, s9, 1
; GCN-NEXT: v_writelane_b32 v23, s10, 2
; GCN-NEXT: v_writelane_b32 v23, s11, 3
; GCN-NEXT: v_writelane_b32 v23, s12, 4
; GCN-NEXT: v_writelane_b32 v23, s13, 5
; GCN-NEXT: v_writelane_b32 v23, s14, 6
; GCN-NEXT: v_writelane_b32 v23, s15, 7
; GCN-NEXT: v_writelane_b32 v23, s16, 8
; GCN-NEXT: v_writelane_b32 v23, s17, 9
; GCN-NEXT: v_writelane_b32 v23, s18, 10
; GCN-NEXT: v_writelane_b32 v23, s19, 11
; GCN-NEXT: v_writelane_b32 v23, s20, 12
; GCN-NEXT: v_writelane_b32 v23, s21, 13
; GCN-NEXT: v_writelane_b32 v23, s22, 14
; GCN-NEXT: v_writelane_b32 v23, s23, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[8:23]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_writelane_b32 v0, s8, 16
; GCN-NEXT: v_writelane_b32 v0, s9, 17
; GCN-NEXT: v_writelane_b32 v0, s10, 18
; GCN-NEXT: v_writelane_b32 v0, s11, 19
; GCN-NEXT: v_writelane_b32 v0, s12, 20
; GCN-NEXT: v_writelane_b32 v0, s13, 21
; GCN-NEXT: v_writelane_b32 v0, s14, 22
; GCN-NEXT: v_writelane_b32 v0, s15, 23
; GCN-NEXT: v_writelane_b32 v0, s16, 24
; GCN-NEXT: v_writelane_b32 v0, s17, 25
; GCN-NEXT: v_writelane_b32 v0, s18, 26
; GCN-NEXT: v_writelane_b32 v0, s19, 27
; GCN-NEXT: v_writelane_b32 v0, s20, 28
; GCN-NEXT: v_writelane_b32 v0, s21, 29
; GCN-NEXT: v_writelane_b32 v0, s22, 30
; GCN-NEXT: v_writelane_b32 v0, s23, 31
; GCN-NEXT: v_writelane_b32 v23, s8, 16
; GCN-NEXT: v_writelane_b32 v23, s9, 17
; GCN-NEXT: v_writelane_b32 v23, s10, 18
; GCN-NEXT: v_writelane_b32 v23, s11, 19
; GCN-NEXT: v_writelane_b32 v23, s12, 20
; GCN-NEXT: v_writelane_b32 v23, s13, 21
; GCN-NEXT: v_writelane_b32 v23, s14, 22
; GCN-NEXT: v_writelane_b32 v23, s15, 23
; GCN-NEXT: v_writelane_b32 v23, s16, 24
; GCN-NEXT: v_writelane_b32 v23, s17, 25
; GCN-NEXT: v_writelane_b32 v23, s18, 26
; GCN-NEXT: v_writelane_b32 v23, s19, 27
; GCN-NEXT: v_writelane_b32 v23, s20, 28
; GCN-NEXT: v_writelane_b32 v23, s21, 29
; GCN-NEXT: v_writelane_b32 v23, s22, 30
; GCN-NEXT: v_writelane_b32 v23, s23, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[8:23]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_writelane_b32 v0, s8, 32
; GCN-NEXT: v_writelane_b32 v0, s9, 33
; GCN-NEXT: v_writelane_b32 v0, s10, 34
; GCN-NEXT: v_writelane_b32 v0, s11, 35
; GCN-NEXT: v_writelane_b32 v0, s12, 36
; GCN-NEXT: v_writelane_b32 v0, s13, 37
; GCN-NEXT: v_writelane_b32 v0, s14, 38
; GCN-NEXT: v_writelane_b32 v0, s15, 39
; GCN-NEXT: v_writelane_b32 v0, s16, 40
; GCN-NEXT: v_writelane_b32 v0, s17, 41
; GCN-NEXT: v_writelane_b32 v0, s18, 42
; GCN-NEXT: v_writelane_b32 v0, s19, 43
; GCN-NEXT: v_writelane_b32 v0, s20, 44
; GCN-NEXT: v_writelane_b32 v0, s21, 45
; GCN-NEXT: v_writelane_b32 v0, s22, 46
; GCN-NEXT: v_writelane_b32 v0, s23, 47
; GCN-NEXT: v_writelane_b32 v23, s8, 32
; GCN-NEXT: v_writelane_b32 v23, s9, 33
; GCN-NEXT: v_writelane_b32 v23, s10, 34
; GCN-NEXT: v_writelane_b32 v23, s11, 35
; GCN-NEXT: v_writelane_b32 v23, s12, 36
; GCN-NEXT: v_writelane_b32 v23, s13, 37
; GCN-NEXT: v_writelane_b32 v23, s14, 38
; GCN-NEXT: v_writelane_b32 v23, s15, 39
; GCN-NEXT: v_writelane_b32 v23, s16, 40
; GCN-NEXT: v_writelane_b32 v23, s17, 41
; GCN-NEXT: v_writelane_b32 v23, s18, 42
; GCN-NEXT: v_writelane_b32 v23, s19, 43
; GCN-NEXT: v_writelane_b32 v23, s20, 44
; GCN-NEXT: v_writelane_b32 v23, s21, 45
; GCN-NEXT: v_writelane_b32 v23, s22, 46
; GCN-NEXT: v_writelane_b32 v23, s23, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[8:23]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_writelane_b32 v0, s8, 48
; GCN-NEXT: v_writelane_b32 v0, s9, 49
; GCN-NEXT: v_writelane_b32 v0, s10, 50
; GCN-NEXT: v_writelane_b32 v0, s11, 51
; GCN-NEXT: v_writelane_b32 v0, s12, 52
; GCN-NEXT: v_writelane_b32 v0, s13, 53
; GCN-NEXT: v_writelane_b32 v0, s14, 54
; GCN-NEXT: v_writelane_b32 v0, s15, 55
; GCN-NEXT: v_writelane_b32 v0, s16, 56
; GCN-NEXT: v_writelane_b32 v0, s17, 57
; GCN-NEXT: v_writelane_b32 v0, s18, 58
; GCN-NEXT: v_writelane_b32 v0, s19, 59
; GCN-NEXT: v_writelane_b32 v0, s20, 60
; GCN-NEXT: v_writelane_b32 v0, s21, 61
; GCN-NEXT: v_writelane_b32 v0, s22, 62
; GCN-NEXT: v_writelane_b32 v0, s23, 63
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: v_writelane_b32 v23, s8, 48
; GCN-NEXT: v_writelane_b32 v23, s9, 49
; GCN-NEXT: v_writelane_b32 v23, s10, 50
; GCN-NEXT: v_writelane_b32 v23, s11, 51
; GCN-NEXT: v_writelane_b32 v23, s12, 52
; GCN-NEXT: v_writelane_b32 v23, s13, 53
; GCN-NEXT: v_writelane_b32 v23, s14, 54
; GCN-NEXT: v_writelane_b32 v23, s15, 55
; GCN-NEXT: v_writelane_b32 v23, s16, 56
; GCN-NEXT: v_writelane_b32 v23, s17, 57
; GCN-NEXT: v_writelane_b32 v23, s18, 58
; GCN-NEXT: v_writelane_b32 v23, s19, 59
; GCN-NEXT: v_writelane_b32 v23, s20, 60
; GCN-NEXT: v_writelane_b32 v23, s21, 61
; GCN-NEXT: v_writelane_b32 v23, s22, 62
; GCN-NEXT: v_writelane_b32 v23, s23, 63
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; def s[6:7]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: s_mov_b64 s[8:9], exec
; GCN-NEXT: s_mov_b64 exec, 3
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: v_writelane_b32 v0, s6, 0
; GCN-NEXT: v_writelane_b32 v0, s7, 1
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_mov_b64 exec, s[8:9]
; GCN-NEXT: s_mov_b32 s5, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_cmp_lg_u32 s4, s5
; GCN-NEXT: s_cbranch_scc1 .LBB0_2
; GCN-NEXT: ; %bb.1: ; %bb0
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: s_or_saveexec_b64 s[24:25], -1
; GCN-NEXT: buffer_load_dword v1, off, s[0:3], 0 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[24:25]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readlane_b32 s4, v1, 0
; GCN-NEXT: v_readlane_b32 s5, v1, 1
; GCN-NEXT: v_readlane_b32 s6, v1, 2
; GCN-NEXT: v_readlane_b32 s7, v1, 3
; GCN-NEXT: v_readlane_b32 s8, v1, 4
; GCN-NEXT: v_readlane_b32 s9, v1, 5
; GCN-NEXT: v_readlane_b32 s10, v1, 6
; GCN-NEXT: v_readlane_b32 s11, v1, 7
; GCN-NEXT: v_readlane_b32 s12, v1, 8
; GCN-NEXT: v_readlane_b32 s13, v1, 9
; GCN-NEXT: v_readlane_b32 s14, v1, 10
; GCN-NEXT: v_readlane_b32 s15, v1, 11
; GCN-NEXT: v_readlane_b32 s16, v1, 12
; GCN-NEXT: v_readlane_b32 s17, v1, 13
; GCN-NEXT: v_readlane_b32 s18, v1, 14
; GCN-NEXT: v_readlane_b32 s19, v1, 15
; GCN-NEXT: v_readlane_b32 s4, v23, 0
; GCN-NEXT: v_readlane_b32 s5, v23, 1
; GCN-NEXT: v_readlane_b32 s6, v23, 2
; GCN-NEXT: v_readlane_b32 s7, v23, 3
; GCN-NEXT: v_readlane_b32 s8, v23, 4
; GCN-NEXT: v_readlane_b32 s9, v23, 5
; GCN-NEXT: v_readlane_b32 s10, v23, 6
; GCN-NEXT: v_readlane_b32 s11, v23, 7
; GCN-NEXT: v_readlane_b32 s12, v23, 8
; GCN-NEXT: v_readlane_b32 s13, v23, 9
; GCN-NEXT: v_readlane_b32 s14, v23, 10
; GCN-NEXT: v_readlane_b32 s15, v23, 11
; GCN-NEXT: v_readlane_b32 s16, v23, 12
; GCN-NEXT: v_readlane_b32 s17, v23, 13
; GCN-NEXT: v_readlane_b32 s18, v23, 14
; GCN-NEXT: v_readlane_b32 s19, v23, 15
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[4:19]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_readlane_b32 s4, v1, 16
; GCN-NEXT: v_readlane_b32 s5, v1, 17
; GCN-NEXT: v_readlane_b32 s6, v1, 18
; GCN-NEXT: v_readlane_b32 s7, v1, 19
; GCN-NEXT: v_readlane_b32 s8, v1, 20
; GCN-NEXT: v_readlane_b32 s9, v1, 21
; GCN-NEXT: v_readlane_b32 s10, v1, 22
; GCN-NEXT: v_readlane_b32 s11, v1, 23
; GCN-NEXT: v_readlane_b32 s12, v1, 24
; GCN-NEXT: v_readlane_b32 s13, v1, 25
; GCN-NEXT: v_readlane_b32 s14, v1, 26
; GCN-NEXT: v_readlane_b32 s15, v1, 27
; GCN-NEXT: v_readlane_b32 s16, v1, 28
; GCN-NEXT: v_readlane_b32 s17, v1, 29
; GCN-NEXT: v_readlane_b32 s18, v1, 30
; GCN-NEXT: v_readlane_b32 s19, v1, 31
; GCN-NEXT: v_readlane_b32 s4, v23, 16
; GCN-NEXT: v_readlane_b32 s5, v23, 17
; GCN-NEXT: v_readlane_b32 s6, v23, 18
; GCN-NEXT: v_readlane_b32 s7, v23, 19
; GCN-NEXT: v_readlane_b32 s8, v23, 20
; GCN-NEXT: v_readlane_b32 s9, v23, 21
; GCN-NEXT: v_readlane_b32 s10, v23, 22
; GCN-NEXT: v_readlane_b32 s11, v23, 23
; GCN-NEXT: v_readlane_b32 s12, v23, 24
; GCN-NEXT: v_readlane_b32 s13, v23, 25
; GCN-NEXT: v_readlane_b32 s14, v23, 26
; GCN-NEXT: v_readlane_b32 s15, v23, 27
; GCN-NEXT: v_readlane_b32 s16, v23, 28
; GCN-NEXT: v_readlane_b32 s17, v23, 29
; GCN-NEXT: v_readlane_b32 s18, v23, 30
; GCN-NEXT: v_readlane_b32 s19, v23, 31
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[4:19]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_readlane_b32 s4, v1, 32
; GCN-NEXT: v_readlane_b32 s5, v1, 33
; GCN-NEXT: v_readlane_b32 s6, v1, 34
; GCN-NEXT: v_readlane_b32 s7, v1, 35
; GCN-NEXT: v_readlane_b32 s8, v1, 36
; GCN-NEXT: v_readlane_b32 s9, v1, 37
; GCN-NEXT: v_readlane_b32 s10, v1, 38
; GCN-NEXT: v_readlane_b32 s11, v1, 39
; GCN-NEXT: v_readlane_b32 s12, v1, 40
; GCN-NEXT: v_readlane_b32 s13, v1, 41
; GCN-NEXT: v_readlane_b32 s14, v1, 42
; GCN-NEXT: v_readlane_b32 s15, v1, 43
; GCN-NEXT: v_readlane_b32 s16, v1, 44
; GCN-NEXT: v_readlane_b32 s17, v1, 45
; GCN-NEXT: v_readlane_b32 s18, v1, 46
; GCN-NEXT: v_readlane_b32 s19, v1, 47
; GCN-NEXT: v_readlane_b32 s4, v23, 32
; GCN-NEXT: v_readlane_b32 s5, v23, 33
; GCN-NEXT: v_readlane_b32 s6, v23, 34
; GCN-NEXT: v_readlane_b32 s7, v23, 35
; GCN-NEXT: v_readlane_b32 s8, v23, 36
; GCN-NEXT: v_readlane_b32 s9, v23, 37
; GCN-NEXT: v_readlane_b32 s10, v23, 38
; GCN-NEXT: v_readlane_b32 s11, v23, 39
; GCN-NEXT: v_readlane_b32 s12, v23, 40
; GCN-NEXT: v_readlane_b32 s13, v23, 41
; GCN-NEXT: v_readlane_b32 s14, v23, 42
; GCN-NEXT: v_readlane_b32 s15, v23, 43
; GCN-NEXT: v_readlane_b32 s16, v23, 44
; GCN-NEXT: v_readlane_b32 s17, v23, 45
; GCN-NEXT: v_readlane_b32 s18, v23, 46
; GCN-NEXT: v_readlane_b32 s19, v23, 47
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[4:19]
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_readlane_b32 s8, v1, 48
; GCN-NEXT: v_readlane_b32 s9, v1, 49
; GCN-NEXT: v_readlane_b32 s10, v1, 50
; GCN-NEXT: v_readlane_b32 s11, v1, 51
; GCN-NEXT: v_readlane_b32 s12, v1, 52
; GCN-NEXT: v_readlane_b32 s13, v1, 53
; GCN-NEXT: v_readlane_b32 s14, v1, 54
; GCN-NEXT: v_readlane_b32 s15, v1, 55
; GCN-NEXT: v_readlane_b32 s16, v1, 56
; GCN-NEXT: v_readlane_b32 s17, v1, 57
; GCN-NEXT: v_readlane_b32 s18, v1, 58
; GCN-NEXT: v_readlane_b32 s19, v1, 59
; GCN-NEXT: v_readlane_b32 s20, v1, 60
; GCN-NEXT: v_readlane_b32 s21, v1, 61
; GCN-NEXT: v_readlane_b32 s22, v1, 62
; GCN-NEXT: v_readlane_b32 s23, v1, 63
; GCN-NEXT: v_readlane_b32 s8, v23, 48
; GCN-NEXT: v_readlane_b32 s9, v23, 49
; GCN-NEXT: v_readlane_b32 s10, v23, 50
; GCN-NEXT: v_readlane_b32 s11, v23, 51
; GCN-NEXT: v_readlane_b32 s12, v23, 52
; GCN-NEXT: v_readlane_b32 s13, v23, 53
; GCN-NEXT: v_readlane_b32 s14, v23, 54
; GCN-NEXT: v_readlane_b32 s15, v23, 55
; GCN-NEXT: v_readlane_b32 s16, v23, 56
; GCN-NEXT: v_readlane_b32 s17, v23, 57
; GCN-NEXT: v_readlane_b32 s18, v23, 58
; GCN-NEXT: v_readlane_b32 s19, v23, 59
; GCN-NEXT: v_readlane_b32 s20, v23, 60
; GCN-NEXT: v_readlane_b32 s21, v23, 61
; GCN-NEXT: v_readlane_b32 s22, v23, 62
; GCN-NEXT: v_readlane_b32 s23, v23, 63
; GCN-NEXT: s_mov_b64 s[6:7], exec
; GCN-NEXT: s_mov_b64 exec, 3
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_readlane_b32 s4, v0, 0
; GCN-NEXT: v_readlane_b32 s5, v0, 1
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], 0
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s[8:23]
; GCN-NEXT: ;;#ASMEND
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill-partially-undef.mir
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,10 @@ body: |
liveins: $sgpr4
; CHECK-LABEL: name: sgpr_spill_s64_undef_high32
; CHECK: liveins: $sgpr4
; CHECK: liveins: $sgpr4, $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5
SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
...
Expand All @@ -46,11 +45,10 @@ body: |
liveins: $sgpr5
; CHECK-LABEL: name: sgpr_spill_s64_undef_low32
; CHECK: liveins: $sgpr5
; CHECK: liveins: $sgpr5, $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5, implicit $sgpr4_sgpr5
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5
SI_SPILL_S64_SAVE renamable $sgpr4_sgpr5, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5)
...
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,16 @@ define amdgpu_kernel void @kernel() {
; GCN-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0
; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1
; GCN-NEXT: s_mov_b32 s38, -1
; GCN-NEXT: ; implicit-def: $vgpr3
; GCN-NEXT: s_mov_b32 s39, 0xe00000
; GCN-NEXT: v_writelane_b32 v3, s4, 0
; GCN-NEXT: v_writelane_b32 v40, s4, 0
; GCN-NEXT: s_add_u32 s36, s36, s11
; GCN-NEXT: v_writelane_b32 v3, s5, 1
; GCN-NEXT: v_writelane_b32 v40, s5, 1
; GCN-NEXT: s_addc_u32 s37, s37, 0
; GCN-NEXT: s_mov_b64 s[4:5], s[0:1]
; GCN-NEXT: v_readlane_b32 s0, v3, 0
; GCN-NEXT: v_readlane_b32 s0, v40, 0
; GCN-NEXT: s_mov_b32 s13, s9
; GCN-NEXT: s_mov_b32 s12, s8
; GCN-NEXT: v_readlane_b32 s1, v3, 1
; GCN-NEXT: v_readlane_b32 s1, v40, 1
; GCN-NEXT: s_add_u32 s8, s0, 36
; GCN-NEXT: s_addc_u32 s9, s1, 0
; GCN-NEXT: s_getpc_b64 s[0:1]
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -start-before=si-lower-sgpr-spills -stop-after=prologepilog -o - %s | FileCheck %s
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -amdgpu-spill-sgpr-to-vgpr=false -verify-machineinstrs -run-pass=si-lower-sgpr-spills,prologepilog -o - %s | FileCheck %s

# Check that we allocate 2 emergency stack slots if we're spilling
# SGPRs to memory and potentially have an offset larger than fits in
Expand Down Expand Up @@ -29,7 +29,7 @@ body: |
; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr2
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 $sgpr10, 0, undef $vgpr2
; CHECK-NEXT: $vgpr2 = V_WRITELANE_B32 killed $sgpr10, 0, undef $vgpr2
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
; CHECK-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr2
Expand Down
177 changes: 80 additions & 97 deletions llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions llvm/test/CodeGen/AMDGPU/si-spill-sgpr-stack.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,16 @@
; ALL: s_mov_b32 s[[HI:[0-9]+]], 0xe80000

; Make sure we are handling hazards correctly.
; SGPR: v_mov_b32_e32 v0, vcc_lo
; SGPR-NEXT: s_or_saveexec_b64 [[EXEC_COPY:s\[[0-9]+:[0-9]+\]]], -1
; SGPR-NEXT: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4 ; 4-byte Folded Reload
; SGPR-NEXT: s_mov_b64 exec, [[EXEC_COPY]]
; SGPR: buffer_load_dword [[VHI:v[0-9]+]], off, s[{{[0-9]+:[0-9]+}}], 0 offset:4
; SGPR-NEXT: s_waitcnt vmcnt(0)
; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 0
; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 1
; SGPR-NEXT: v_readlane_b32 s{{[0-9]+}}, [[VHI]], 2
; SGPR-NEXT: v_readlane_b32 s[[HI:[0-9]+]], [[VHI]], 3
; SGPR-NEXT: s_nop 4
; SGPR-NEXT: buffer_load_dword [[VHI]], off, s[96:99], 0
; SGPR-NEXT: s_waitcnt vmcnt(0)
; SGPR-NEXT: s_mov_b64 exec, s[4:5]
; SGPR-NEXT: s_nop 1
; SGPR-NEXT: buffer_store_dword v0, off, s[0:3], 0

; ALL: s_endpgm
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/sibling-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,15 @@ entry:
; GCN-DAG: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12

; GCN-DAG: v_writelane_b32 [[CSRV]], s30, 0
; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-DAG: v_writelane_b32 [[CSRV]], s31, 1


; GCN: s_swappc_b64

; GCN-DAG: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-DAG: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-DAG: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-DAG: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload

; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4
Expand Down
18 changes: 8 additions & 10 deletions llvm/test/CodeGen/AMDGPU/spill-csr-frame-ptr-reg-copy.ll
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,20 @@

; GCN-LABEL: {{^}}spill_csr_s5_copy:
; GCN: s_mov_b32 [[FP_SCRATCH_COPY:s[0-9]+]], s33
; GCN: s_xor_saveexec_b64
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GCN: s_or_saveexec_b64
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec
; GCN: v_writelane_b32 v40, [[FP_SCRATCH_COPY]], 2
; GCN: v_writelane_b32 v41, [[FP_SCRATCH_COPY]], 0
; GCN: s_swappc_b64

; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 9
; GCN: buffer_store_dword [[K]], off, s[0:3], s33{{$}}

; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v40, 2
; GCN: s_xor_saveexec_b64
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GCN: v_readlane_b32 [[FP_SCRATCH_COPY:s[0-9]+]], v41, 0
; GCN: s_or_saveexec_b64
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GCN: s_mov_b64 exec
; GCN: s_mov_b32 s33, [[FP_SCRATCH_COPY]]
; GCN: s_setpc_b64
Expand Down
19 changes: 10 additions & 9 deletions llvm/test/CodeGen/AMDGPU/spill-offset-calculation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -117,16 +117,16 @@ define void @test_sgpr_offset_function_scavenge_fail_func() #2 {
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s32 offset:8 glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_add_i32 s10, s32, 0x40100
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s10 ; 4-byte Folded Spill
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1004
; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Spill
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: s_add_i32 s10, s32, 0x40100
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s10 ; 4-byte Folded Reload
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1004
; MUBUF-NEXT: buffer_load_dword v0, v1, s[0:3], s32 offen ; 4-byte Folded Reload
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
Expand Down Expand Up @@ -199,15 +199,16 @@ define amdgpu_kernel void @test_sgpr_offset_function_scavenge_fail_kernel() #3 {
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], 0 offset:8 glc
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: s_mov_b32 s10, 0x40100
; MUBUF-NEXT: buffer_store_dword v0, off, s[0:3], s10 ; 4-byte Folded Spill
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1004
; MUBUF-NEXT: buffer_store_dword v0, v1, s[0:3], 0 offen ; 4-byte Folded Spill
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
; MUBUF-NEXT: buffer_load_dword v0, off, s[0:3], s10 ; 4-byte Folded Reload
; MUBUF-NEXT: v_mov_b32_e32 v1, 0x1004
; MUBUF-NEXT: buffer_load_dword v0, v1, s[0:3], 0 offen ; 4-byte Folded Reload
; MUBUF-NEXT: s_waitcnt vmcnt(0)
; MUBUF-NEXT: ;;#ASMSTART
; MUBUF-NEXT: ;;#ASMEND
Expand Down Expand Up @@ -638,5 +639,5 @@ entry:

attributes #0 = { nounwind }
attributes #1 = { nounwind "amdgpu-num-sgpr"="17" "amdgpu-num-vgpr"="8" }
attributes #2 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" }
attributes #3 = { nounwind "amdgpu-num-sgpr"="18" "amdgpu-num-vgpr"="8" }
attributes #2 = { nounwind "amdgpu-num-sgpr"="14" "amdgpu-num-vgpr"="8" }
attributes #3 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" }
36 changes: 17 additions & 19 deletions llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -march=amdgcn -mcpu=gfx900 -start-before=si-lower-sgpr-spills -stop-after=prologepilog -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-lower-sgpr-spills,prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s

# Make sure the initial first $sgpr1 = COPY $sgpr2 copy is not deleted
# by the copy propagation after lowering the spill.
Expand All @@ -26,12 +26,11 @@ body: |
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: dead renamable $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: renamable $sgpr8 = COPY renamable $sgpr1
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: renamable $sgpr8 = COPY killed renamable $sgpr1
; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
Expand Down Expand Up @@ -64,11 +63,10 @@ body: |
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
; GCN-NEXT: renamable $vgpr0 = IMPLICIT_DEF
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, killed $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: renamable $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: dead renamable $vgpr0 = V_WRITELANE_B32 $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr0, 0, $vgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr1, 1, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr2, 2, $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr3, 3, $vgpr0, implicit killed $sgpr0_sgpr1_sgpr2_sgpr3
; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
Expand All @@ -95,12 +93,12 @@ body: |
; GCN-LABEL: name: spill_vgpr128_use_subreg
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec
; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
; GCN-NEXT: renamable $vgpr8 = COPY $vgpr2, implicit $exec
; GCN-NEXT: renamable $vgpr8 = COPY killed renamable $vgpr1
; GCN-NEXT: S_ENDPGM 0, implicit $vgpr8
renamable $vgpr1 = COPY $vgpr2
SI_SPILL_V128_SAVE renamable $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
Expand All @@ -125,11 +123,11 @@ body: |
; GCN-LABEL: name: spill_vgpr128_use_kill
; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
; GCN-NEXT: {{ $}}
; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 4, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 8, addrspace 5)
; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
; GCN-NEXT: S_ENDPGM 0
renamable $vgpr1 = COPY $vgpr2
SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
Expand Down
478 changes: 258 additions & 220 deletions llvm/test/CodeGen/AMDGPU/spill-scavenge-offset.ll

Large diffs are not rendered by default.

5 changes: 2 additions & 3 deletions llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,9 @@ body: |
bb.0:
liveins: $sgpr50
; CHECK-LABEL: name: spill_csr_sgpr_argument
; CHECK: liveins: $sgpr50
; CHECK: liveins: $sgpr50, $vgpr0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; CHECK-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr50, 0, [[V_WRITELANE_B32_]]
; CHECK-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr50, 0, $vgpr0
; CHECK-NEXT: S_NOP 0, implicit $sgpr50
; CHECK-NEXT: $sgpr50 = S_MOV_B32 0
S_NOP 0, implicit $sgpr50
Expand Down
45 changes: 27 additions & 18 deletions llvm/test/CodeGen/AMDGPU/spill-sgpr-stack-no-sgpr.ll
Original file line number Diff line number Diff line change
@@ -1,50 +1,59 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=-wavefrontsize32,+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s

; The test was originally written to spill an SGPR to scratch without having spare SGPRs available to save exec.
; This scenario no longer exists when we enabled SGPR spill into virtual VGPRs.
; Spill an SGPR to scratch without having spare SGPRs available to save exec

define amdgpu_kernel void @test() #1 {
; GFX10-LABEL: test:
; GFX10: ; %bb.0:
; GFX10-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
; GFX10-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
; GFX10-NEXT: s_mov_b32 s14, -1
; GFX10-NEXT: s_mov_b32 s15, 0x31e16000
; GFX10-NEXT: s_add_u32 s12, s12, s1
; GFX10-NEXT: s_addc_u32 s13, s13, 0
; GFX10-NEXT: s_mov_b32 s8, SCRATCH_RSRC_DWORD0
; GFX10-NEXT: s_mov_b32 s9, SCRATCH_RSRC_DWORD1
; GFX10-NEXT: s_mov_b32 s10, -1
; GFX10-NEXT: s_mov_b32 s11, 0x31e16000
; GFX10-NEXT: s_add_u32 s8, s8, s1
; GFX10-NEXT: s_addc_u32 s9, s9, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def s[0:7]
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def s[8:12]
; GFX10-NEXT: ;;#ASMEND
; GFX10-NOT: s_not_b64 exec, exec
; GFX10-NEXT: ; implicit-def: $vgpr0
; GFX10-NEXT: s_not_b64 exec, exec
; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0
; GFX10-NEXT: v_writelane_b32 v0, s8, 0
; GFX10-NEXT: v_writelane_b32 v0, s9, 1
; GFX10-NEXT: v_writelane_b32 v0, s10, 2
; GFX10-NEXT: v_writelane_b32 v0, s11, 3
; GFX10-NEXT: v_writelane_b32 v0, s12, 4
; GFX10-NEXT: s_or_saveexec_b64 s[14:15], -1
; GFX10-NEXT: buffer_store_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b64 exec, s[14:15]
; GFX10-NEXT: s_not_b64 exec, exec
; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_not_b64 exec, exec
; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_not_b64 exec, exec
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s[0:7]
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_or_saveexec_b64 s[14:15], -1
; GFX10-NEXT: buffer_load_dword v0, off, s[12:15], 0 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b64 exec, s[14:15]
; GFX10-NEXT: s_mov_b64 s[6:7], exec
; GFX10-NEXT: s_mov_b64 exec, 31
; GFX10-NEXT: buffer_store_dword v0, off, s[8:11], 0
; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: v_readlane_b32 s0, v0, 0
; GFX10-NEXT: v_readlane_b32 s1, v0, 1
; GFX10-NEXT: v_readlane_b32 s2, v0, 2
; GFX10-NEXT: v_readlane_b32 s3, v0, 3
; GFX10-NEXT: v_readlane_b32 s4, v0, 4
; GFX10-NEXT: buffer_load_dword v0, off, s[8:11], 0
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b64 exec, s[6:7]
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s[0:4]
; GFX10-NEXT: ;;#ASMEND
Expand All @@ -58,4 +67,4 @@ define amdgpu_kernel void @test() #1 {
}

attributes #0 = { nounwind }
attributes #1 = { nounwind "amdgpu-num-sgpr"="18" "amdgpu-num-vgpr"="8" }
attributes #1 = { nounwind "amdgpu-num-sgpr"="16" "amdgpu-num-vgpr"="8" }
320 changes: 0 additions & 320 deletions llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir

This file was deleted.

85 changes: 0 additions & 85 deletions llvm/test/CodeGen/AMDGPU/spill-vgpr-to-agpr-update-regscavenger.ll

This file was deleted.

9 changes: 4 additions & 5 deletions llvm/test/CodeGen/AMDGPU/spill-writelane-vgprs.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,16 @@ define void @sgpr_spill_writelane() {
; GCN-LABEL: sgpr_spill_writelane:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1
; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: ; implicit-def: $vgpr0
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v0, s35, 0
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: v_readlane_b32 s35, v0, 0
; GCN-NEXT: s_xor_saveexec_b64 s[6:7], -1
; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
call void asm sideeffect "", "~{s35}"()
Expand Down
29 changes: 16 additions & 13 deletions llvm/test/CodeGen/AMDGPU/spill192.mir
Original file line number Diff line number Diff line change
Expand Up @@ -32,29 +32,32 @@ body: |
; EXPANDED-LABEL: name: spill_restore_sgpr192
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr9, 5, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.1:
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9
bb.0:
S_NOP 0, implicit-def %0:sgpr_192
Expand Down
33 changes: 18 additions & 15 deletions llvm/test/CodeGen/AMDGPU/spill224.mir
Original file line number Diff line number Diff line change
Expand Up @@ -30,31 +30,34 @@ body: |
; EXPANDED-LABEL: name: spill_restore_sgpr224
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr10, 6, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr10, 6, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.1:
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10
bb.0:
S_NOP 0, implicit-def %0:sgpr_224
Expand Down
41 changes: 22 additions & 19 deletions llvm/test/CodeGen/AMDGPU/spill288.mir
Original file line number Diff line number Diff line change
Expand Up @@ -30,35 +30,38 @@ body: |
; EXPANDED-LABEL: name: spill_restore_sgpr288
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr10, 6, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr11, 7, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr12, 8, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr10, 6, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr11, 7, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr12, 8, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.1:
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6
; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 $vgpr0, 7
; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 8
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12
bb.0:
S_NOP 0, implicit-def %0:sgpr_288
Expand Down
45 changes: 24 additions & 21 deletions llvm/test/CodeGen/AMDGPU/spill320.mir
Original file line number Diff line number Diff line change
Expand Up @@ -30,37 +30,40 @@ body: |
; EXPANDED-LABEL: name: spill_restore_sgpr320
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr10, 6, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr11, 7, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr12, 8, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr13, 9, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr10, 6, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr11, 7, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 8, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr13, 9, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.1:
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 9
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6
; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 $vgpr0, 7
; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 8
; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 9
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13
bb.0:
S_NOP 0, implicit-def %0:sgpr_320
Expand Down
49 changes: 26 additions & 23 deletions llvm/test/CodeGen/AMDGPU/spill352.mir
Original file line number Diff line number Diff line change
Expand Up @@ -30,39 +30,42 @@ body: |
; EXPANDED-LABEL: name: spill_restore_sgpr352
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr10, 6, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr11, 7, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr12, 8, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr13, 9, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr14, 10, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr10, 6, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr11, 7, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 8, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 9, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr14, 10, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.1:
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 9
; EXPANDED-NEXT: $sgpr14 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 10
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6
; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 $vgpr0, 7
; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 8
; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 9
; EXPANDED-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 10
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14
bb.0:
S_NOP 0, implicit-def %0:sgpr_352
Expand Down
53 changes: 28 additions & 25 deletions llvm/test/CodeGen/AMDGPU/spill384.mir
Original file line number Diff line number Diff line change
Expand Up @@ -30,41 +30,44 @@ body: |
; EXPANDED-LABEL: name: spill_restore_sgpr384
; EXPANDED: bb.0:
; EXPANDED-NEXT: successors: %bb.1(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 0, implicit-def renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; EXPANDED-NEXT: [[V_WRITELANE_B32_:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr4, 0, [[V_WRITELANE_B32_]], implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr5, 1, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr6, 2, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr7, 3, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr8, 4, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr9, 5, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr10, 6, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr11, 7, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr12, 8, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr13, 9, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 $sgpr14, 10, [[V_WRITELANE_B32_1]], implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: [[V_WRITELANE_B32_1:%[0-9]+]]:vgpr_32 = V_WRITELANE_B32 killed $sgpr15, 11, [[V_WRITELANE_B32_1]], implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr4, 0, $vgpr0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr5, 1, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr6, 2, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr7, 3, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr8, 4, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr9, 5, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr10, 6, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr11, 7, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr12, 8, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr13, 9, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 $sgpr14, 10, $vgpr0, implicit $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $vgpr0 = V_WRITELANE_B32 killed $sgpr15, 11, $vgpr0, implicit killed $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.1:
; EXPANDED-NEXT: successors: %bb.2(0x80000000)
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: S_NOP 1
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: bb.2:
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 6
; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 7
; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 8
; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 9
; EXPANDED-NEXT: $sgpr14 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 10
; EXPANDED-NEXT: $sgpr15 = V_READLANE_B32 [[V_WRITELANE_B32_1]], 11
; EXPANDED-NEXT: liveins: $vgpr0
; EXPANDED-NEXT: {{ $}}
; EXPANDED-NEXT: $sgpr4 = V_READLANE_B32 $vgpr0, 0, implicit-def $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
; EXPANDED-NEXT: $sgpr5 = V_READLANE_B32 $vgpr0, 1
; EXPANDED-NEXT: $sgpr6 = V_READLANE_B32 $vgpr0, 2
; EXPANDED-NEXT: $sgpr7 = V_READLANE_B32 $vgpr0, 3
; EXPANDED-NEXT: $sgpr8 = V_READLANE_B32 $vgpr0, 4
; EXPANDED-NEXT: $sgpr9 = V_READLANE_B32 $vgpr0, 5
; EXPANDED-NEXT: $sgpr10 = V_READLANE_B32 $vgpr0, 6
; EXPANDED-NEXT: $sgpr11 = V_READLANE_B32 $vgpr0, 7
; EXPANDED-NEXT: $sgpr12 = V_READLANE_B32 $vgpr0, 8
; EXPANDED-NEXT: $sgpr13 = V_READLANE_B32 $vgpr0, 9
; EXPANDED-NEXT: $sgpr14 = V_READLANE_B32 $vgpr0, 10
; EXPANDED-NEXT: $sgpr15 = V_READLANE_B32 $vgpr0, 11
; EXPANDED-NEXT: S_NOP 0, implicit killed renamable $sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
bb.0:
S_NOP 0, implicit-def %0:sgpr_384
Expand Down
3 changes: 1 addition & 2 deletions llvm/test/CodeGen/AMDGPU/tail-call-amdgpu-gfx.ll
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,8 @@ define amdgpu_gfx float @caller(float %arg0) {
; GCN-NEXT: s_xor_saveexec_b64 s[34:35], -1
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[34:35]
; GCN-NEXT: ; implicit-def: $vgpr1
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v1, s4, 0
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_writelane_b32 v1, s30, 1
; GCN-NEXT: v_add_f32_e32 v0, 1.0, v0
; GCN-NEXT: s_mov_b32 s4, 2.0
Expand Down
Loading