255 changes: 113 additions & 142 deletions llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,61 @@
define hidden void @widget() {
; GCN-LABEL: widget:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_load_dword v0, v[0:1]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: s_cbranch_vccz BB0_3
; GCN-NEXT: ; %bb.1: ; %bb4
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: s_cbranch_vccnz BB0_4
; GCN-NEXT: ; %bb.2: ; %bb7
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: s_branch BB0_7
; GCN-NEXT: BB0_3: ; %bb2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 21, v0
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: s_cbranch_vccnz BB0_6
; GCN-NEXT: BB0_4: ; %bb9
; GCN-NEXT: s_getpc_b64 s[16:17]
; GCN-NEXT: s_add_u32 s16, s16, wibble@rel32@lo+4
; GCN-NEXT: s_addc_u32 s17, s17, wibble@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execnz BB0_7
; GCN-NEXT: ; %bb.5: ; %bb9.bb12_crit_edge
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: BB0_6: ; %bb12
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_store_dword v[0:1], v2
; GCN-NEXT: BB0_7: ; %UnifiedReturnBlock
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_load_dword v0, v[0:1]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 21, v0
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: s_cbranch_vccz BB0_3
; GCN-NEXT: ; %bb.1: ; %bb4
; GCN-NEXT: v_cmp_ne_u32_e32 vcc, 9, v0
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: s_cbranch_vccnz BB0_4
; GCN-NEXT: ; %bb.2: ; %bb7
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, wibble@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, wibble@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: s_branch BB0_7
; GCN-NEXT: BB0_3: ; %bb2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 21, v0
; GCN-NEXT: s_and_b64 vcc, exec, vcc
; GCN-NEXT: s_cbranch_vccnz BB0_6
; GCN-NEXT: BB0_4: ; %bb9
; GCN-NEXT: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, wibble@rel32@lo+4
; GCN-NEXT: s_addc_u32 s5, s5, wibble@rel32@hi+12
; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GCN-NEXT: v_cmp_lt_f32_e32 vcc, 0, v0
; GCN-NEXT: s_and_saveexec_b64 s[4:5], vcc
; GCN-NEXT: s_cbranch_execnz BB0_7
; GCN-NEXT: ; %bb.5: ; %bb9.bb12_crit_edge
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: BB0_6: ; %bb12
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_store_dword v[0:1], v2
; GCN-NEXT: BB0_7: ; %UnifiedReturnBlock
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[6:7]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[4:5]
; SI-OPT-LABEL: @widget(
; SI-OPT-NEXT: bb:
; SI-OPT-NEXT: [[TMP:%.*]] = load i32, i32 addrspace(1)* null, align 16
Expand Down Expand Up @@ -186,124 +186,95 @@ define hidden void @blam() {
; GCN-LABEL: blam:
; GCN: ; %bb.0: ; %bb
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_or_saveexec_b64 s[16:17], -1
; GCN-NEXT: buffer_store_dword v44, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[16:17]
; GCN-NEXT: v_writelane_b32 v44, s33, 15
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v43, s33, 4
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x800
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v44, s34, 0
; GCN-NEXT: v_writelane_b32 v44, s35, 1
; GCN-NEXT: v_writelane_b32 v44, s36, 2
; GCN-NEXT: v_writelane_b32 v44, s38, 3
; GCN-NEXT: v_writelane_b32 v44, s39, 4
; GCN-NEXT: v_writelane_b32 v44, s40, 5
; GCN-NEXT: v_writelane_b32 v44, s41, 6
; GCN-NEXT: v_writelane_b32 v44, s42, 7
; GCN-NEXT: v_writelane_b32 v44, s43, 8
; GCN-NEXT: v_writelane_b32 v44, s44, 9
; GCN-NEXT: v_writelane_b32 v44, s45, 10
; GCN-NEXT: v_writelane_b32 v44, s46, 11
; GCN-NEXT: v_writelane_b32 v44, s47, 12
; GCN-NEXT: v_writelane_b32 v44, s48, 13
; GCN-NEXT: v_writelane_b32 v44, s49, 14
; GCN-NEXT: v_mov_b32_e32 v40, v31
; GCN-NEXT: s_mov_b32 s34, s14
; GCN-NEXT: s_mov_b32 s35, s13
; GCN-NEXT: s_mov_b32 s36, s12
; GCN-NEXT: s_mov_b64 s[38:39], s[10:11]
; GCN-NEXT: s_mov_b64 s[40:41], s[8:9]
; GCN-NEXT: s_mov_b64 s[42:43], s[6:7]
; GCN-NEXT: s_mov_b64 s[44:45], s[4:5]
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v43, s34, 0
; GCN-NEXT: v_writelane_b32 v43, s35, 1
; GCN-NEXT: v_writelane_b32 v43, s36, 2
; GCN-NEXT: v_writelane_b32 v43, s37, 3
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: v_and_b32_e32 v2, 0x3ff, v40
; GCN-NEXT: flat_load_dword v41, v[0:1]
; GCN-NEXT: v_mov_b32_e32 v43, 0
; GCN-NEXT: s_getpc_b64 s[48:49]
; GCN-NEXT: s_add_u32 s48, s48, spam@rel32@lo+4
; GCN-NEXT: s_addc_u32 s49, s49, spam@rel32@hi+12
; GCN-NEXT: v_lshlrev_b32_e32 v42, 2, v2
; GCN-NEXT: v_mov_b32_e32 v2, 0
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GCN-NEXT: flat_load_dword v40, v[1:2]
; GCN-NEXT: v_mov_b32_e32 v42, 0
; GCN-NEXT: s_getpc_b64 s[36:37]
; GCN-NEXT: s_add_u32 s36, s36, spam@rel32@lo+4
; GCN-NEXT: s_addc_u32 s37, s37, spam@rel32@hi+12
; GCN-NEXT: v_lshlrev_b32_e32 v41, 2, v0
; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
; GCN-NEXT: v_cmp_eq_f32_e64 s[46:47], 0, v41
; GCN-NEXT: v_cmp_eq_f32_e64 s[34:35], 0, v40
; GCN-NEXT: s_branch BB1_3
; GCN-NEXT: BB1_1: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: BB1_1: ; %bb10
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[6:7]
; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: BB1_2: ; %bb18
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: BB1_2: ; %bb18
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: BB1_3: ; %bb2
; GCN-NEXT: ; =>This Loop Header: Depth=1
; GCN-NEXT: ; Child Loop BB1_4 Depth 2
; GCN-NEXT: BB1_3: ; %bb2
; GCN-NEXT: ; =>This Loop Header: Depth=1
; GCN-NEXT: ; Child Loop BB1_4 Depth 2
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: BB1_4: ; %bb2
; GCN-NEXT: ; Parent Loop BB1_3 Depth=1
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
; GCN-NEXT: flat_load_dword v0, v[42:43]
; GCN-NEXT: BB1_4: ; %bb2
; GCN-NEXT: ; Parent Loop BB1_3 Depth=1
; GCN-NEXT: ; => This Inner Loop Header: Depth=2
; GCN-NEXT: flat_load_dword v0, v[41:42]
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0
; GCN-NEXT: s_waitcnt vmcnt(1)
; GCN-NEXT: v_cmp_gt_i32_e32 vcc, 3, v0
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: s_cbranch_execz BB1_6
; GCN-NEXT: %bb.5: ; %bb8
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
; GCN-NEXT: ; %bb.5: ; %bb8
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0
; GCN-NEXT: s_or_b64 s[6:7], vcc, s[6:7]
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_andn2_b64 exec, exec, s[6:7]
; GCN-NEXT: s_cbranch_execnz BB1_4
; GCN-NEXT: s_branch BB1_1
; GCN-NEXT: BB1_6: ; %bb6
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
; GCN-NEXT: BB1_6: ; %bb6
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 3, v0
; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
; GCN-NEXT: s_cbranch_execnz BB1_4
; GCN-NEXT: %bb.7: ; %bb11
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
; GCN-NEXT: _or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_mov_b64 s[4:5], s[44:45]
; GCN-NEXT: s_mov_b64 s[6:7], s[42:43]
; GCN-NEXT: s_mov_b64 s[8:9], s[40:41]
; GCN-NEXT: s_mov_b64 s[10:11], s[38:39]
; GCN-NEXT: s_mov_b32 s12, s36
; GCN-NEXT: s_mov_b32 s13, s35
; GCN-NEXT: s_mov_b32 s14, s34
; GCN-NEXT: v_mov_b32_e32 v31, v40
; GCN-NEXT: s_swappc_b64 s[30:31], s[48:49]
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: s_cbranch_execnz BB1_4
; GCN-NEXT: ; %bb.8: ; %bb14
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_saveexec_b64 s[4:5], s[46:47]
; GCN-NEXT: s_cbranch_execnz BB1_10
; GCN-NEXT: ; %bb.9: ; %bb16
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: BB1_10: ; %bb17
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], 0
; GCN-NEXT: s_branch BB1_2

; GCN-NEXT: ; %bb.7: ; %bb11
; GCN-NEXT: ; in Loop: Header=BB1_4 Depth=2
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GCN-NEXT: v_cmp_eq_f32_e32 vcc, 0, v0
; GCN-NEXT: s_mov_b64 s[4:5], 0
; GCN-NEXT: s_mov_b64 s[6:7], 0
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: s_cbranch_execnz BB1_4
; GCN-NEXT: ; %bb.8: ; %bb14
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
; GCN-NEXT: s_and_saveexec_b64 s[4:5], s[34:35]
; GCN-NEXT: s_cbranch_execnz BB1_10
; GCN-NEXT: ; %bb.9: ; %bb16
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, 0x7fc00000
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GCN-NEXT: BB1_10: ; %bb17
; GCN-NEXT: ; in Loop: Header=BB1_3 Depth=1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], 0
; GCN-NEXT: s_branch BB1_2
bb:
%tmp = load float, float* null, align 16
br label %bb2
Expand Down
57 changes: 25 additions & 32 deletions llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
Original file line number Diff line number Diff line change
Expand Up @@ -21,14 +21,14 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX9-NEXT: v_mov_b32_e32 v32, v12
; GFX9: ;;#ASMSTART
; GFX9-NEXT: ;;#ASMEND
; GFX9: image_gather4_c_b_cl v[40:43], v[32:39], s[16:23], s[4:7] dmask:0x1
; GFX9-NEXT: s_getpc_b64 s[16:17]
; GFX9-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX9: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[4:7] dmask:0x1
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9-NEXT: v_writelane_b32 v44, s30, 0
; GFX9: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]

; GFX9: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
Expand All @@ -53,14 +53,14 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX10: ;;#ASMSTART
; GFX10-NEXT: ;;#ASMEND

; GFX10: image_gather4_c_b_cl v[40:43], v[32:39], s[16:23], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_getpc_b64 s[16:17]
; GFX10-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
; GFX10: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX10: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]

; GFX10: buffer_load_dword v43, off, s[0:3], s33
; GFX10-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4
Expand Down Expand Up @@ -100,14 +100,14 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX9-NEXT: v_mov_b32_e32 v40, v12

; GFX9: image_gather4_c_b_cl v[0:3], v[40:47], s[36:43], s[4:7] dmask:0x1
; GFX9-NEXT: s_getpc_b64 s[16:17]
; GFX9-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX9-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX9: s_waitcnt vmcnt(0)
; GFX9-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX9-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX9-NEXT: image_gather4_c_b_cl v[0:3], v[40:47], s[36:43], s[4:7] dmask:0x1

; GFX9: buffer_load_dword v44, off, s[0:3], s33 ; 4-byte Folded Reload
Expand All @@ -127,29 +127,22 @@ define <4 x float> @call_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
; GFX10-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v44, off, s[0:3], s33 ; 4-byte Folded Spill

; GFX10: s_getpc_b64 s[16:17]
; GFX10-NEXT: s_add_u32 s16, s16, extern_func@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s17, s17, extern_func@gotpcrel32@hi+12
; GFX10-NEXT: s_mov_b32 s37, s36
; GFX10-NEXT: s_mov_b32 s38, s36
; GFX10-NEXT: s_mov_b32 s39, s36
; GFX10-NEXT: s_mov_b32 s40, s36
; GFX10-NEXT: s_mov_b32 s41, s36
; GFX10-NEXT: s_mov_b32 s42, s36
; GFX10-NEXT: s_mov_b32 s43, s36
; GFX10-NEXT: s_load_dwordx2 s[16:17], s[16:17], 0x0
; GFX10-NEXT: image_gather4_c_b_cl v[0:3], v[12:19], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: v_writelane_b32 v45, s30, 8

; GFX10: image_gather4_c_b_cl v[0:3], v[12:19], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_getpc_b64 s[4:5]
; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+12
; GFX10-NEXT: v_mov_b32_e32 v40, v16
; GFX10-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x0
; GFX10-NEXT: v_mov_b32_e32 v41, v15
; GFX10-NEXT: v_mov_b32_e32 v42, v14
; GFX10-NEXT: v_mov_b32_e32 v43, v13
; GFX10-NEXT: v_writelane_b32 v45, s31, 9
; GFX10-NEXT: v_mov_b32_e32 v44, v12
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: global_store_dwordx4 v[0:1], v[0:3], off
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
; GFX10-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5]
; GFX10-NEXT: image_gather4_c_b_cl v[0:3], [v44, v43, v42, v41, v40], s[36:43], s[4:7] dmask:0x1 dim:SQ_RSRC_IMG_2D

; GFX10: buffer_load_dword v44, off, s[0:3], s33
Expand Down
106 changes: 6 additions & 100 deletions llvm/test/CodeGen/MIR/AMDGPU/machine-function-info-no-ir.mir
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,10 @@
# FULL-NEXT: stackPtrOffsetReg: '$sgpr13'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr6' }
# FULL-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: workItemIDX: { reg: '$vgpr0' }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
Expand All @@ -55,18 +47,10 @@
# SIMPLE-NEXT: stackPtrOffsetReg: '$sgpr13'
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: kernargSegmentPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr6' }
# SIMPLE-NEXT: privateSegmentWaveByteOffset: { reg: '$sgpr7' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr0' }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:
name: kernel0
Expand Down Expand Up @@ -112,16 +96,6 @@ body: |
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
Expand All @@ -137,16 +111,6 @@ body: |
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:

Expand Down Expand Up @@ -175,16 +139,6 @@ body: |
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
Expand All @@ -200,16 +154,6 @@ body: |
# SIMPLE-NEXT: maxKernArgAlign: 1
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:

Expand Down Expand Up @@ -239,16 +183,6 @@ body: |
# FULL-NEXT: stackPtrOffsetReg: '$sp_reg'
# FULL-NEXT: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: mode:
# FULL-NEXT: ieee: true
# FULL-NEXT: dx10-clamp: true
Expand All @@ -265,16 +199,6 @@ body: |
# SIMPLE-NEXT: isEntryFunction: true
# SIMPLE-NEXT: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr31', mask: 1047552 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: occupancy: 10
# SIMPLE-NEXT: body:

Expand Down Expand Up @@ -311,31 +235,13 @@ body: |

# FULL: argumentInfo:
# FULL-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# FULL-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# FULL-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# FULL-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# FULL-NEXT: flatScratchInit: { offset: 4 }
# FULL-NEXT: workGroupIDX: { reg: '$sgpr12' }
# FULL-NEXT: workGroupIDY: { reg: '$sgpr13' }
# FULL-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# FULL-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# FULL-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
# FULL-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# FULL-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }

# SIMPLE: argumentInfo:
# SIMPLE-NEXT: privateSegmentBuffer: { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
# SIMPLE-NEXT: dispatchPtr: { reg: '$sgpr4_sgpr5' }
# SIMPLE-NEXT: queuePtr: { reg: '$sgpr6_sgpr7' }
# SIMPLE-NEXT: dispatchID: { reg: '$sgpr10_sgpr11' }
# SIMPLE-NEXT: flatScratchInit: { offset: 4 }
# SIMPLE-NEXT: workGroupIDX: { reg: '$sgpr12' }
# SIMPLE-NEXT: workGroupIDY: { reg: '$sgpr13' }
# SIMPLE-NEXT: workGroupIDZ: { reg: '$sgpr14' }
# SIMPLE-NEXT: implicitArgPtr: { reg: '$sgpr8_sgpr9' }
# SIMPLE-NEXT: workItemIDX: { reg: '$vgpr31', mask: 1023 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
# SIMPLE-NEXT: workItemIDZ: { reg: '$vgpr31', mask: 1072693248 }
# SIMPLE-NEXT: workItemIDY: { reg: '$vgpr0', mask: 65280 }
name: fake_stack_arginfo
machineFunctionInfo:
argumentInfo:
Expand Down