57 changes: 32 additions & 25 deletions llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX9-NEXT: v_mov_b32_e32 v32, v12
; GFX9: ;;#ASMSTART
; GFX9-NEXT: ;;#ASMEND
; GFX9: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[4:7] dmask:0x1
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9: image_gather4_c_b_cl v[40:43], v[32:39], s[16:23], s[4:7] dmask:0x1
; GFX9-NEXT: s_getpc_b64 s[16:17]
; GFX9-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX9-NEXT: v_writelane_b32 v44, s30, 0
; GFX9: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]

; GFX9: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
Expand All @@ -53,14 +53,14 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX10: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND

; GFX10: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10: image_gather4_c_b_cl v[40:43], v[32:39], s[16:23], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX10: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10-NEXT: s_getpc_b64 s[16:17]
; GFX10-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
; GFX10: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX10: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17]

; GFX10: buffer_load_dword v43, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4
Expand Down Expand Up @@ -100,14 +100,14 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX9-NEXT: v_mov_b32_e32 v40, v12

; GFX9: image_gather4_c_b_cl v[0:3], v[40:47], s[36:43], s[4:7] dmask:0x1
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: s_getpc_b64 s[16:17]
; GFX9-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:47], s[36:43], s[4:7] dmask:0x1

; GFX9: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload
Expand All @@ -127,22 +127,29 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill


; GFX10: image_gather4_c_b_cl v[0:3], v[12:19], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX10: s_getpc_b64 s[16:17]
; GFX10-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
; GFX10-NEXT: s_mov_b32 s37, s36
; GFX10-NEXT: s_mov_b32 s38, s36
; GFX10-NEXT: s_mov_b32 s39, s36
; GFX10-NEXT: s_mov_b32 s40, s36
; GFX10-NEXT: s_mov_b32 s41, s36
; GFX10-NEXT: s_mov_b32 s42, s36
; GFX10-NEXT: s_mov_b32 s43, s36
; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:19], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: v_writelane_b32 v45, s30, 8
; GFX10-NEXT: v_mov_b32_e32 v40, v16
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v41, v15
; GFX10-NEXT: v_mov_b32_e32 v42, v14
; GFX10-NEXT: v_mov_b32_e32 v43, v13
; GFX10-NEXT: v_writelane_b32 v45, s31, 9
; GFX10-NEXT: v_mov_b32_e32 v44, v12
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v44, v43, v42, v41, v40], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D

; GFX10: buffer_load_dword v44, off, s[0:3], s33
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/wave32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1076,7 +1076,7 @@ declare void @external_void_func_void() #1
; GCN-NEXT: s_waitcnt_vscnt

; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}}
; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]+]], -1{{$}}
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_waitcnt_depctr 0xffe3
; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
Expand Down
106 changes: 100 additions & 6 deletions llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,18 @@
# FULL-NEXT: stackPtrOffsetReg: '$sgpr13'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
Expand All @@ -47,10 +55,18 @@
# SIMPLE-NEXT: stackPtrOffsetReg: '$sgpr13'
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: kernel0
Expand Down Expand Up @@ -96,6 +112,16 @@ body: |
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
Expand All @@ -111,6 +137,16 @@ body: |
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:

Expand Down Expand Up @@ -139,6 +175,16 @@ body: |
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
Expand All @@ -154,6 +200,16 @@ body: |
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:

Expand Down Expand Up @@ -183,6 +239,16 @@ body: |
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
Expand All @@ -199,6 +265,16 @@ body: |
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:

Expand Down Expand Up @@ -235,13 +311,31 @@ body: |

# FULL: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: flatScratchInit: { offset: 4 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }

# SIMPLE: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: flatScratchInit: { offset: 4 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
name: fake_stack_arginfo
machineFunctionInfo:
argumentInfo:
Expand Down