440 changes: 220 additions & 220 deletions llvm/test/CodeGen/AMDGPU/amdgpu-simplify-libcall-pow-codegen.ll

Large diffs are not rendered by default.

898 changes: 449 additions & 449 deletions llvm/test/CodeGen/AMDGPU/bf16.ll

Large diffs are not rendered by default.

12 changes: 6 additions & 6 deletions llvm/test/CodeGen/AMDGPU/callee-frame-setup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -278,11 +278,11 @@ define void @callee_with_stack_no_fp_elim_csr_vgpr() #1 {
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
; GCN: v_writelane_b32 v0
; GCN: v_writelane_b32 v1
; MUBUF: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:4
; FLATSCR: scratch_store_dword off, v{{[0-9]+}}, s33 offset:4
; GCN: ;;#ASMSTART
; GCN: v_writelane_b32 v0
; GCN: v_writelane_b32 v1

; MUBUF: s_addk_i32 s32, 0x400
; FLATSCR: s_add_i32 s32, s32, 16
Expand Down Expand Up @@ -320,19 +320,19 @@ define void @last_lane_vgpr_for_fp_csr() #1 {
; MUBUF-NEXT: buffer_store_dword [[CSR_VGPR:v[0-9]+]], off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; FLATSCR-NEXT: scratch_store_dword off, [[CSR_VGPR:v[0-9]+]], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
; GCN-COUNT-61: v_writelane_b32 v0,
; GCN-COUNT-61: v_writelane_b32 v1,
; MUBUF: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; FLATSCR: scratch_store_dword off, v41, s33 ; 4-byte Folded Spill
; GCN: v_writelane_b32 v0,
; GCN: v_writelane_b32 v1,
; MUBUF: buffer_store_dword
; FLATSCR: scratch_store_dword
; GCN: ;;#ASMSTART
; GCN: v_writelane_b32 v0,
; GCN: v_writelane_b32 v1,
; MUBUF: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; FLATSCR: scratch_load_dword v41, off, s33 ; 4-byte Folded Reload
; MUBUF: s_addk_i32 s32, 0x400
; FLATSCR: s_add_i32 s32, s32, 16
; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v0
; GCN-COUNT-64: v_readlane_b32 s{{[0-9]+}}, v1
; GCN-NEXT: s_xor_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; MUBUF-NEXT: buffer_load_dword [[CSR_VGPR]], off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; FLATSCR-NEXT: scratch_load_dword [[CSR_VGPR]], off, s33 offset:8 ; 4-byte Folded Reload
Expand Down
80 changes: 40 additions & 40 deletions llvm/test/CodeGen/AMDGPU/dwarf-multi-register-use-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -19,37 +19,37 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: s_mov_b32 s16, s33
; CHECK-NEXT: s_mov_b32 s33, s32
; CHECK-NEXT: s_or_saveexec_b64 s[18:19], -1
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; CHECK-NEXT: s_mov_b64 exec, s[18:19]
; CHECK-NEXT: v_writelane_b32 v40, s16, 16
; CHECK-NEXT: v_writelane_b32 v40, s30, 0
; CHECK-NEXT: v_writelane_b32 v40, s31, 1
; CHECK-NEXT: v_writelane_b32 v40, s34, 2
; CHECK-NEXT: v_writelane_b32 v40, s35, 3
; CHECK-NEXT: v_writelane_b32 v40, s36, 4
; CHECK-NEXT: v_writelane_b32 v40, s37, 5
; CHECK-NEXT: v_writelane_b32 v40, s38, 6
; CHECK-NEXT: v_writelane_b32 v40, s39, 7
; CHECK-NEXT: v_writelane_b32 v40, s40, 8
; CHECK-NEXT: v_writelane_b32 v40, s41, 9
; CHECK-NEXT: v_writelane_b32 v40, s42, 10
; CHECK-NEXT: v_writelane_b32 v40, s43, 11
; CHECK-NEXT: v_writelane_b32 v40, s44, 12
; CHECK-NEXT: v_writelane_b32 v41, s16, 16
; CHECK-NEXT: v_writelane_b32 v41, s30, 0
; CHECK-NEXT: v_writelane_b32 v41, s31, 1
; CHECK-NEXT: v_writelane_b32 v41, s34, 2
; CHECK-NEXT: v_writelane_b32 v41, s35, 3
; CHECK-NEXT: v_writelane_b32 v41, s36, 4
; CHECK-NEXT: v_writelane_b32 v41, s37, 5
; CHECK-NEXT: v_writelane_b32 v41, s38, 6
; CHECK-NEXT: v_writelane_b32 v41, s39, 7
; CHECK-NEXT: v_writelane_b32 v41, s40, 8
; CHECK-NEXT: v_writelane_b32 v41, s41, 9
; CHECK-NEXT: v_writelane_b32 v41, s42, 10
; CHECK-NEXT: v_writelane_b32 v41, s43, 11
; CHECK-NEXT: v_writelane_b32 v41, s44, 12
; CHECK-NEXT: s_addk_i32 s32, 0x400
; CHECK-NEXT: v_writelane_b32 v40, s45, 13
; CHECK-NEXT: v_writelane_b32 v40, s46, 14
; CHECK-NEXT: v_writelane_b32 v41, s45, 13
; CHECK-NEXT: v_writelane_b32 v41, s46, 14
; CHECK-NEXT: s_mov_b64 s[40:41], s[4:5]
; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- undef
; CHECK-NEXT: .Ltmp0:
; CHECK-NEXT: .loc 1 49 9 prologue_end ; dummy:49:9
; CHECK-NEXT: s_getpc_b64 s[4:5]
; CHECK-NEXT: s_add_u32 s4, s4, __kmpc_alloc_shared@gotpcrel32@lo+4
; CHECK-NEXT: s_addc_u32 s5, s5, __kmpc_alloc_shared@gotpcrel32@hi+12
; CHECK-NEXT: v_writelane_b32 v40, s47, 15
; CHECK-NEXT: v_writelane_b32 v41, s47, 15
; CHECK-NEXT: s_load_dwordx2 s[46:47], s[4:5], 0x0
; CHECK-NEXT: s_mov_b64 s[4:5], s[40:41]
; CHECK-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v41, v31
; CHECK-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; CHECK-NEXT: v_mov_b32_e32 v40, v31
; CHECK-NEXT: s_mov_b64 s[34:35], s[6:7]
; CHECK-NEXT: s_mov_b32 s42, s15
; CHECK-NEXT: s_mov_b32 s43, s14
Expand All @@ -67,33 +67,33 @@ define weak_odr void @test(i32 %0) !dbg !34 {
; CHECK-NEXT: s_mov_b32 s13, s44
; CHECK-NEXT: s_mov_b32 s14, s43
; CHECK-NEXT: s_mov_b32 s15, s42
; CHECK-NEXT: v_mov_b32_e32 v31, v41
; CHECK-NEXT: v_mov_b32_e32 v31, v40
; CHECK-NEXT: s_swappc_b64 s[30:31], s[46:47]
; CHECK-NEXT: .Ltmp1:
; CHECK-NEXT: ;DEBUG_VALUE: dummy:dummy <- [$vgpr0_vgpr1+0]
; CHECK-NEXT: .loc 1 0 9 is_stmt 0 ; dummy:0:9
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; CHECK-NEXT: v_mov_b32_e32 v2, 0
; CHECK-NEXT: flat_store_dword v[0:1], v2
; CHECK-NEXT: v_readlane_b32 s47, v40, 15
; CHECK-NEXT: v_readlane_b32 s46, v40, 14
; CHECK-NEXT: v_readlane_b32 s45, v40, 13
; CHECK-NEXT: v_readlane_b32 s44, v40, 12
; CHECK-NEXT: v_readlane_b32 s43, v40, 11
; CHECK-NEXT: v_readlane_b32 s42, v40, 10
; CHECK-NEXT: v_readlane_b32 s41, v40, 9
; CHECK-NEXT: v_readlane_b32 s40, v40, 8
; CHECK-NEXT: v_readlane_b32 s39, v40, 7
; CHECK-NEXT: v_readlane_b32 s38, v40, 6
; CHECK-NEXT: v_readlane_b32 s37, v40, 5
; CHECK-NEXT: v_readlane_b32 s36, v40, 4
; CHECK-NEXT: v_readlane_b32 s35, v40, 3
; CHECK-NEXT: v_readlane_b32 s34, v40, 2
; CHECK-NEXT: v_readlane_b32 s31, v40, 1
; CHECK-NEXT: v_readlane_b32 s30, v40, 0
; CHECK-NEXT: v_readlane_b32 s4, v40, 16
; CHECK-NEXT: v_readlane_b32 s47, v41, 15
; CHECK-NEXT: v_readlane_b32 s46, v41, 14
; CHECK-NEXT: v_readlane_b32 s45, v41, 13
; CHECK-NEXT: v_readlane_b32 s44, v41, 12
; CHECK-NEXT: v_readlane_b32 s43, v41, 11
; CHECK-NEXT: v_readlane_b32 s42, v41, 10
; CHECK-NEXT: v_readlane_b32 s41, v41, 9
; CHECK-NEXT: v_readlane_b32 s40, v41, 8
; CHECK-NEXT: v_readlane_b32 s39, v41, 7
; CHECK-NEXT: v_readlane_b32 s38, v41, 6
; CHECK-NEXT: v_readlane_b32 s37, v41, 5
; CHECK-NEXT: v_readlane_b32 s36, v41, 4
; CHECK-NEXT: v_readlane_b32 s35, v41, 3
; CHECK-NEXT: v_readlane_b32 s34, v41, 2
; CHECK-NEXT: v_readlane_b32 s31, v41, 1
; CHECK-NEXT: v_readlane_b32 s30, v41, 0
; CHECK-NEXT: v_readlane_b32 s4, v41, 16
; CHECK-NEXT: s_or_saveexec_b64 s[6:7], -1
; CHECK-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
; CHECK-NEXT: s_addk_i32 s32, 0xfc00
; CHECK-NEXT: s_mov_b32 s33, s4
Expand Down
1,026 changes: 513 additions & 513 deletions llvm/test/CodeGen/AMDGPU/gfx-callable-argument-types.ll

Large diffs are not rendered by default.

156 changes: 78 additions & 78 deletions llvm/test/CodeGen/AMDGPU/gfx-callable-preserved-registers.ll
Original file line number Diff line number Diff line change
Expand Up @@ -322,30 +322,30 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1)
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: v_writelane_b32 v40, s34, 2
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v41, s34, 2
; GFX9-NEXT: v_writelane_b32 v41, s30, 0
; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_writelane_b32 v41, s31, 1
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def v31
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_mov_b32_e32 v41, v31
; GFX9-NEXT: v_mov_b32_e32 v40, v31
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: v_mov_b32_e32 v31, v41
; GFX9-NEXT: v_mov_b32_e32 v31, v40
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v31
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v40, 2
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s31, v41, 1
; GFX9-NEXT: v_readlane_b32 s30, v41, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 2
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
Expand All @@ -358,31 +358,31 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1)
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s34, 2
; GFX10-NEXT: v_writelane_b32 v41, s34, 2
; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: v_writelane_b32 v40, s30, 0
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: v_writelane_b32 v41, s30, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def v31
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_mov_b32_e32 v41, v31
; GFX10-NEXT: v_writelane_b32 v40, s31, 1
; GFX10-NEXT: v_mov_b32_e32 v40, v31
; GFX10-NEXT: v_writelane_b32 v41, s31, 1
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: v_mov_b32_e32 v31, v41
; GFX10-NEXT: v_mov_b32_e32 v31, v40
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v31
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s31, v40, 1
; GFX10-NEXT: v_readlane_b32 s30, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v40, 2
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s31, v41, 1
; GFX10-NEXT: v_readlane_b32 s30, v41, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 2
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
Expand All @@ -396,31 +396,31 @@ define amdgpu_gfx void @test_call_void_func_void_mayclobber_v31(ptr addrspace(1)
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s0, 2
; GFX11-NEXT: v_writelane_b32 v41, s0, 2
; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill
; GFX11-NEXT: v_writelane_b32 v40, s30, 0
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: v_writelane_b32 v41, s30, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def v31
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_mov_b32_e32 v41, v31
; GFX11-NEXT: v_writelane_b32 v40, s31, 1
; GFX11-NEXT: v_mov_b32_e32 v40, v31
; GFX11-NEXT: v_writelane_b32 v41, s31, 1
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
; GFX11-NEXT: v_mov_b32_e32 v31, v41
; GFX11-NEXT: v_mov_b32_e32 v31, v40
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v31
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: v_readlane_b32 s31, v40, 1
; GFX11-NEXT: v_readlane_b32 s30, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v40, 2
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: v_readlane_b32 s31, v41, 1
; GFX11-NEXT: v_readlane_b32 s30, v41, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 2
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
Expand Down Expand Up @@ -1184,38 +1184,38 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX9-NEXT: s_mov_b32 s34, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: v_writelane_b32 v40, s34, 3
; GFX9-NEXT: v_writelane_b32 v40, s4, 0
; GFX9-NEXT: v_writelane_b32 v40, s30, 1
; GFX9-NEXT: v_writelane_b32 v41, s34, 3
; GFX9-NEXT: v_writelane_b32 v41, s4, 0
; GFX9-NEXT: v_writelane_b32 v41, s30, 1
; GFX9-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
; GFX9-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
; GFX9-NEXT: s_addk_i32 s32, 0x400
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_writelane_b32 v40, s31, 2
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_writelane_b32 v41, s31, 2
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def s40
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_mov_b32 s4, s40
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; def v32
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: v_mov_b32_e32 v41, v32
; GFX9-NEXT: v_mov_b32_e32 v40, v32
; GFX9-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use s4
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ; use v41
; GFX9-NEXT: ; use v40
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s31, v40, 2
; GFX9-NEXT: v_readlane_b32 s30, v40, 1
; GFX9-NEXT: v_readlane_b32 s4, v40, 0
; GFX9-NEXT: v_readlane_b32 s34, v40, 3
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s31, v41, 2
; GFX9-NEXT: v_readlane_b32 s30, v41, 1
; GFX9-NEXT: v_readlane_b32 s4, v41, 0
; GFX9-NEXT: v_readlane_b32 s34, v41, 3
; GFX9-NEXT: s_or_saveexec_b64 s[36:37], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[36:37]
; GFX9-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-NEXT: s_mov_b32 s33, s34
Expand All @@ -1228,39 +1228,39 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX10-NEXT: s_mov_b32 s34, s33
; GFX10-NEXT: s_mov_b32 s33, s32
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: v_writelane_b32 v40, s34, 3
; GFX10-NEXT: v_writelane_b32 v41, s34, 3
; GFX10-NEXT: s_mov_b32 s35, external_void_func_void@abs32@hi
; GFX10-NEXT: s_mov_b32 s34, external_void_func_void@abs32@lo
; GFX10-NEXT: s_addk_i32 s32, 0x200
; GFX10-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: v_writelane_b32 v40, s4, 0
; GFX10-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX10-NEXT: v_writelane_b32 v41, s4, 0
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def s40
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: s_mov_b32 s4, s40
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; def v32
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: v_mov_b32_e32 v41, v32
; GFX10-NEXT: v_writelane_b32 v40, s30, 1
; GFX10-NEXT: v_writelane_b32 v40, s31, 2
; GFX10-NEXT: v_mov_b32_e32 v40, v32
; GFX10-NEXT: v_writelane_b32 v41, s30, 1
; GFX10-NEXT: v_writelane_b32 v41, s31, 2
; GFX10-NEXT: s_swappc_b64 s[30:31], s[34:35]
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use s4
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: ;;#ASMSTART
; GFX10-NEXT: ; use v41
; GFX10-NEXT: ; use v40
; GFX10-NEXT: ;;#ASMEND
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s31, v40, 2
; GFX10-NEXT: v_readlane_b32 s30, v40, 1
; GFX10-NEXT: v_readlane_b32 s4, v40, 0
; GFX10-NEXT: v_readlane_b32 s34, v40, 3
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX10-NEXT: v_readlane_b32 s31, v41, 2
; GFX10-NEXT: v_readlane_b32 s30, v41, 1
; GFX10-NEXT: v_readlane_b32 s4, v41, 0
; GFX10-NEXT: v_readlane_b32 s34, v41, 3
; GFX10-NEXT: s_or_saveexec_b32 s35, -1
; GFX10-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX10-NEXT: s_waitcnt_depctr 0xffe3
; GFX10-NEXT: s_mov_b32 exec_lo, s35
; GFX10-NEXT: s_addk_i32 s32, 0xfe00
Expand All @@ -1274,38 +1274,38 @@ define amdgpu_gfx void @callee_saved_sgpr_vgpr_kernel() #1 {
; GFX11-NEXT: s_mov_b32 s0, s33
; GFX11-NEXT: s_mov_b32 s33, s32
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_store_b32 off, v40, s33 offset:4 ; 4-byte Folded Spill
; GFX11-NEXT: scratch_store_b32 off, v41, s33 offset:4 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: v_writelane_b32 v40, s0, 3
; GFX11-NEXT: v_writelane_b32 v41, s0, 3
; GFX11-NEXT: s_mov_b32 s1, external_void_func_void@abs32@hi
; GFX11-NEXT: s_mov_b32 s0, external_void_func_void@abs32@lo
; GFX11-NEXT: s_add_i32 s32, s32, 16
; GFX11-NEXT: scratch_store_b32 off, v41, s33 ; 4-byte Folded Spill
; GFX11-NEXT: v_writelane_b32 v40, s4, 0
; GFX11-NEXT: scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
; GFX11-NEXT: v_writelane_b32 v41, s4, 0
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def s40
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_mov_b32 s4, s40
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; def v32
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_mov_b32_e32 v41, v32
; GFX11-NEXT: v_writelane_b32 v40, s30, 1
; GFX11-NEXT: v_writelane_b32 v40, s31, 2
; GFX11-NEXT: v_mov_b32_e32 v40, v32
; GFX11-NEXT: v_writelane_b32 v41, s30, 1
; GFX11-NEXT: v_writelane_b32 v41, s31, 2
; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1]
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use s4
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ; use v41
; GFX11-NEXT: ; use v40
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: scratch_load_b32 v41, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: v_readlane_b32 s31, v40, 2
; GFX11-NEXT: v_readlane_b32 s30, v40, 1
; GFX11-NEXT: v_readlane_b32 s4, v40, 0
; GFX11-NEXT: v_readlane_b32 s0, v40, 3
; GFX11-NEXT: scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
; GFX11-NEXT: v_readlane_b32 s31, v41, 2
; GFX11-NEXT: v_readlane_b32 s30, v41, 1
; GFX11-NEXT: v_readlane_b32 s4, v41, 0
; GFX11-NEXT: v_readlane_b32 s0, v41, 3
; GFX11-NEXT: s_or_saveexec_b32 s1, -1
; GFX11-NEXT: scratch_load_b32 v40, off, s33 offset:4 ; 4-byte Folded Reload
; GFX11-NEXT: scratch_load_b32 v41, off, s33 offset:4 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s1
; GFX11-NEXT: s_add_i32 s32, s32, -16
; GFX11-NEXT: s_mov_b32 s33, s0
Expand Down
474 changes: 230 additions & 244 deletions llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll

Large diffs are not rendered by default.

806 changes: 398 additions & 408 deletions llvm/test/CodeGen/AMDGPU/global_atomics_i32_system.ll

Large diffs are not rendered by default.

1,052 changes: 516 additions & 536 deletions llvm/test/CodeGen/AMDGPU/global_atomics_i64_system.ll

Large diffs are not rendered by default.

556 changes: 278 additions & 278 deletions llvm/test/CodeGen/AMDGPU/identical-subrange-spill-infloop.ll

Large diffs are not rendered by default.

284 changes: 142 additions & 142 deletions llvm/test/CodeGen/AMDGPU/indirect-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1042,92 +1042,92 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GCN-NEXT: s_mov_b32 s10, s33
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v40, s30, 0
; GCN-NEXT: v_writelane_b32 v40, s31, 1
; GCN-NEXT: v_writelane_b32 v40, s34, 2
; GCN-NEXT: v_writelane_b32 v40, s35, 3
; GCN-NEXT: v_writelane_b32 v40, s36, 4
; GCN-NEXT: v_writelane_b32 v40, s37, 5
; GCN-NEXT: v_writelane_b32 v40, s38, 6
; GCN-NEXT: v_writelane_b32 v40, s39, 7
; GCN-NEXT: v_writelane_b32 v40, s40, 8
; GCN-NEXT: v_writelane_b32 v40, s41, 9
; GCN-NEXT: v_writelane_b32 v40, s42, 10
; GCN-NEXT: v_writelane_b32 v40, s43, 11
; GCN-NEXT: v_writelane_b32 v40, s44, 12
; GCN-NEXT: v_writelane_b32 v40, s45, 13
; GCN-NEXT: v_writelane_b32 v40, s46, 14
; GCN-NEXT: v_writelane_b32 v40, s47, 15
; GCN-NEXT: v_writelane_b32 v40, s48, 16
; GCN-NEXT: v_writelane_b32 v40, s49, 17
; GCN-NEXT: v_writelane_b32 v40, s50, 18
; GCN-NEXT: v_writelane_b32 v40, s51, 19
; GCN-NEXT: v_writelane_b32 v40, s52, 20
; GCN-NEXT: v_writelane_b32 v40, s53, 21
; GCN-NEXT: v_writelane_b32 v40, s54, 22
; GCN-NEXT: v_writelane_b32 v40, s55, 23
; GCN-NEXT: v_writelane_b32 v40, s56, 24
; GCN-NEXT: v_writelane_b32 v40, s57, 25
; GCN-NEXT: v_writelane_b32 v40, s58, 26
; GCN-NEXT: v_writelane_b32 v40, s59, 27
; GCN-NEXT: v_writelane_b32 v40, s60, 28
; GCN-NEXT: v_writelane_b32 v40, s61, 29
; GCN-NEXT: v_writelane_b32 v40, s62, 30
; GCN-NEXT: v_writelane_b32 v40, s63, 31
; GCN-NEXT: v_mov_b32_e32 v41, v0
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v41, s30, 0
; GCN-NEXT: v_writelane_b32 v41, s31, 1
; GCN-NEXT: v_writelane_b32 v41, s34, 2
; GCN-NEXT: v_writelane_b32 v41, s35, 3
; GCN-NEXT: v_writelane_b32 v41, s36, 4
; GCN-NEXT: v_writelane_b32 v41, s37, 5
; GCN-NEXT: v_writelane_b32 v41, s38, 6
; GCN-NEXT: v_writelane_b32 v41, s39, 7
; GCN-NEXT: v_writelane_b32 v41, s40, 8
; GCN-NEXT: v_writelane_b32 v41, s41, 9
; GCN-NEXT: v_writelane_b32 v41, s42, 10
; GCN-NEXT: v_writelane_b32 v41, s43, 11
; GCN-NEXT: v_writelane_b32 v41, s44, 12
; GCN-NEXT: v_writelane_b32 v41, s45, 13
; GCN-NEXT: v_writelane_b32 v41, s46, 14
; GCN-NEXT: v_writelane_b32 v41, s47, 15
; GCN-NEXT: v_writelane_b32 v41, s48, 16
; GCN-NEXT: v_writelane_b32 v41, s49, 17
; GCN-NEXT: v_writelane_b32 v41, s50, 18
; GCN-NEXT: v_writelane_b32 v41, s51, 19
; GCN-NEXT: v_writelane_b32 v41, s52, 20
; GCN-NEXT: v_writelane_b32 v41, s53, 21
; GCN-NEXT: v_writelane_b32 v41, s54, 22
; GCN-NEXT: v_writelane_b32 v41, s55, 23
; GCN-NEXT: v_writelane_b32 v41, s56, 24
; GCN-NEXT: v_writelane_b32 v41, s57, 25
; GCN-NEXT: v_writelane_b32 v41, s58, 26
; GCN-NEXT: v_writelane_b32 v41, s59, 27
; GCN-NEXT: v_writelane_b32 v41, s60, 28
; GCN-NEXT: v_writelane_b32 v41, s61, 29
; GCN-NEXT: v_writelane_b32 v41, s62, 30
; GCN-NEXT: v_writelane_b32 v41, s63, 31
; GCN-NEXT: v_mov_b32_e32 v40, v0
; GCN-NEXT: s_mov_b64 s[4:5], exec
; GCN-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
; GCN-NEXT: v_readfirstlane_b32 s6, v1
; GCN-NEXT: v_readfirstlane_b32 s7, v2
; GCN-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
; GCN-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GCN-NEXT: v_mov_b32_e32 v0, v41
; GCN-NEXT: v_mov_b32_e32 v0, v40
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GCN-NEXT: ; implicit-def: $vgpr1_vgpr2
; GCN-NEXT: s_xor_b64 exec, exec, s[8:9]
; GCN-NEXT: s_cbranch_execnz .LBB7_1
; GCN-NEXT: ; %bb.2:
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, v41
; GCN-NEXT: v_readlane_b32 s63, v40, 31
; GCN-NEXT: v_readlane_b32 s62, v40, 30
; GCN-NEXT: v_readlane_b32 s61, v40, 29
; GCN-NEXT: v_readlane_b32 s60, v40, 28
; GCN-NEXT: v_readlane_b32 s59, v40, 27
; GCN-NEXT: v_readlane_b32 s58, v40, 26
; GCN-NEXT: v_readlane_b32 s57, v40, 25
; GCN-NEXT: v_readlane_b32 s56, v40, 24
; GCN-NEXT: v_readlane_b32 s55, v40, 23
; GCN-NEXT: v_readlane_b32 s54, v40, 22
; GCN-NEXT: v_readlane_b32 s53, v40, 21
; GCN-NEXT: v_readlane_b32 s52, v40, 20
; GCN-NEXT: v_readlane_b32 s51, v40, 19
; GCN-NEXT: v_readlane_b32 s50, v40, 18
; GCN-NEXT: v_readlane_b32 s49, v40, 17
; GCN-NEXT: v_readlane_b32 s48, v40, 16
; GCN-NEXT: v_readlane_b32 s47, v40, 15
; GCN-NEXT: v_readlane_b32 s46, v40, 14
; GCN-NEXT: v_readlane_b32 s45, v40, 13
; GCN-NEXT: v_readlane_b32 s44, v40, 12
; GCN-NEXT: v_readlane_b32 s43, v40, 11
; GCN-NEXT: v_readlane_b32 s42, v40, 10
; GCN-NEXT: v_readlane_b32 s41, v40, 9
; GCN-NEXT: v_readlane_b32 s40, v40, 8
; GCN-NEXT: v_readlane_b32 s39, v40, 7
; GCN-NEXT: v_readlane_b32 s38, v40, 6
; GCN-NEXT: v_readlane_b32 s37, v40, 5
; GCN-NEXT: v_readlane_b32 s36, v40, 4
; GCN-NEXT: v_readlane_b32 s35, v40, 3
; GCN-NEXT: v_readlane_b32 s34, v40, 2
; GCN-NEXT: v_readlane_b32 s31, v40, 1
; GCN-NEXT: v_readlane_b32 s30, v40, 0
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-NEXT: v_mov_b32_e32 v0, v40
; GCN-NEXT: v_readlane_b32 s63, v41, 31
; GCN-NEXT: v_readlane_b32 s62, v41, 30
; GCN-NEXT: v_readlane_b32 s61, v41, 29
; GCN-NEXT: v_readlane_b32 s60, v41, 28
; GCN-NEXT: v_readlane_b32 s59, v41, 27
; GCN-NEXT: v_readlane_b32 s58, v41, 26
; GCN-NEXT: v_readlane_b32 s57, v41, 25
; GCN-NEXT: v_readlane_b32 s56, v41, 24
; GCN-NEXT: v_readlane_b32 s55, v41, 23
; GCN-NEXT: v_readlane_b32 s54, v41, 22
; GCN-NEXT: v_readlane_b32 s53, v41, 21
; GCN-NEXT: v_readlane_b32 s52, v41, 20
; GCN-NEXT: v_readlane_b32 s51, v41, 19
; GCN-NEXT: v_readlane_b32 s50, v41, 18
; GCN-NEXT: v_readlane_b32 s49, v41, 17
; GCN-NEXT: v_readlane_b32 s48, v41, 16
; GCN-NEXT: v_readlane_b32 s47, v41, 15
; GCN-NEXT: v_readlane_b32 s46, v41, 14
; GCN-NEXT: v_readlane_b32 s45, v41, 13
; GCN-NEXT: v_readlane_b32 s44, v41, 12
; GCN-NEXT: v_readlane_b32 s43, v41, 11
; GCN-NEXT: v_readlane_b32 s42, v41, 10
; GCN-NEXT: v_readlane_b32 s41, v41, 9
; GCN-NEXT: v_readlane_b32 s40, v41, 8
; GCN-NEXT: v_readlane_b32 s39, v41, 7
; GCN-NEXT: v_readlane_b32 s38, v41, 6
; GCN-NEXT: v_readlane_b32 s37, v41, 5
; GCN-NEXT: v_readlane_b32 s36, v41, 4
; GCN-NEXT: v_readlane_b32 s35, v41, 3
; GCN-NEXT: v_readlane_b32 s34, v41, 2
; GCN-NEXT: v_readlane_b32 s31, v41, 1
; GCN-NEXT: v_readlane_b32 s30, v41, 0
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: s_mov_b32 s33, s10
Expand All @@ -1140,92 +1140,92 @@ define i32 @test_indirect_call_vgpr_ptr_arg_and_reuse(i32 %i, ptr %fptr) {
; GISEL-NEXT: s_mov_b32 s10, s33
; GISEL-NEXT: s_mov_b32 s33, s32
; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
; GISEL-NEXT: s_addk_i32 s32, 0x400
; GISEL-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GISEL-NEXT: v_writelane_b32 v40, s30, 0
; GISEL-NEXT: v_writelane_b32 v40, s31, 1
; GISEL-NEXT: v_writelane_b32 v40, s34, 2
; GISEL-NEXT: v_writelane_b32 v40, s35, 3
; GISEL-NEXT: v_writelane_b32 v40, s36, 4
; GISEL-NEXT: v_writelane_b32 v40, s37, 5
; GISEL-NEXT: v_writelane_b32 v40, s38, 6
; GISEL-NEXT: v_writelane_b32 v40, s39, 7
; GISEL-NEXT: v_writelane_b32 v40, s40, 8
; GISEL-NEXT: v_writelane_b32 v40, s41, 9
; GISEL-NEXT: v_writelane_b32 v40, s42, 10
; GISEL-NEXT: v_writelane_b32 v40, s43, 11
; GISEL-NEXT: v_writelane_b32 v40, s44, 12
; GISEL-NEXT: v_writelane_b32 v40, s45, 13
; GISEL-NEXT: v_writelane_b32 v40, s46, 14
; GISEL-NEXT: v_writelane_b32 v40, s47, 15
; GISEL-NEXT: v_writelane_b32 v40, s48, 16
; GISEL-NEXT: v_writelane_b32 v40, s49, 17
; GISEL-NEXT: v_writelane_b32 v40, s50, 18
; GISEL-NEXT: v_writelane_b32 v40, s51, 19
; GISEL-NEXT: v_writelane_b32 v40, s52, 20
; GISEL-NEXT: v_writelane_b32 v40, s53, 21
; GISEL-NEXT: v_writelane_b32 v40, s54, 22
; GISEL-NEXT: v_writelane_b32 v40, s55, 23
; GISEL-NEXT: v_writelane_b32 v40, s56, 24
; GISEL-NEXT: v_writelane_b32 v40, s57, 25
; GISEL-NEXT: v_writelane_b32 v40, s58, 26
; GISEL-NEXT: v_writelane_b32 v40, s59, 27
; GISEL-NEXT: v_writelane_b32 v40, s60, 28
; GISEL-NEXT: v_writelane_b32 v40, s61, 29
; GISEL-NEXT: v_writelane_b32 v40, s62, 30
; GISEL-NEXT: v_writelane_b32 v40, s63, 31
; GISEL-NEXT: v_mov_b32_e32 v41, v0
; GISEL-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill
; GISEL-NEXT: v_writelane_b32 v41, s30, 0
; GISEL-NEXT: v_writelane_b32 v41, s31, 1
; GISEL-NEXT: v_writelane_b32 v41, s34, 2
; GISEL-NEXT: v_writelane_b32 v41, s35, 3
; GISEL-NEXT: v_writelane_b32 v41, s36, 4
; GISEL-NEXT: v_writelane_b32 v41, s37, 5
; GISEL-NEXT: v_writelane_b32 v41, s38, 6
; GISEL-NEXT: v_writelane_b32 v41, s39, 7
; GISEL-NEXT: v_writelane_b32 v41, s40, 8
; GISEL-NEXT: v_writelane_b32 v41, s41, 9
; GISEL-NEXT: v_writelane_b32 v41, s42, 10
; GISEL-NEXT: v_writelane_b32 v41, s43, 11
; GISEL-NEXT: v_writelane_b32 v41, s44, 12
; GISEL-NEXT: v_writelane_b32 v41, s45, 13
; GISEL-NEXT: v_writelane_b32 v41, s46, 14
; GISEL-NEXT: v_writelane_b32 v41, s47, 15
; GISEL-NEXT: v_writelane_b32 v41, s48, 16
; GISEL-NEXT: v_writelane_b32 v41, s49, 17
; GISEL-NEXT: v_writelane_b32 v41, s50, 18
; GISEL-NEXT: v_writelane_b32 v41, s51, 19
; GISEL-NEXT: v_writelane_b32 v41, s52, 20
; GISEL-NEXT: v_writelane_b32 v41, s53, 21
; GISEL-NEXT: v_writelane_b32 v41, s54, 22
; GISEL-NEXT: v_writelane_b32 v41, s55, 23
; GISEL-NEXT: v_writelane_b32 v41, s56, 24
; GISEL-NEXT: v_writelane_b32 v41, s57, 25
; GISEL-NEXT: v_writelane_b32 v41, s58, 26
; GISEL-NEXT: v_writelane_b32 v41, s59, 27
; GISEL-NEXT: v_writelane_b32 v41, s60, 28
; GISEL-NEXT: v_writelane_b32 v41, s61, 29
; GISEL-NEXT: v_writelane_b32 v41, s62, 30
; GISEL-NEXT: v_writelane_b32 v41, s63, 31
; GISEL-NEXT: v_mov_b32_e32 v40, v0
; GISEL-NEXT: s_mov_b64 s[4:5], exec
; GISEL-NEXT: .LBB7_1: ; =>This Inner Loop Header: Depth=1
; GISEL-NEXT: v_readfirstlane_b32 s6, v1
; GISEL-NEXT: v_readfirstlane_b32 s7, v2
; GISEL-NEXT: v_cmp_eq_u64_e32 vcc, s[6:7], v[1:2]
; GISEL-NEXT: s_and_saveexec_b64 s[8:9], vcc
; GISEL-NEXT: v_mov_b32_e32 v0, v41
; GISEL-NEXT: v_mov_b32_e32 v0, v40
; GISEL-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GISEL-NEXT: ; implicit-def: $vgpr1
; GISEL-NEXT: s_xor_b64 exec, exec, s[8:9]
; GISEL-NEXT: s_cbranch_execnz .LBB7_1
; GISEL-NEXT: ; %bb.2:
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
; GISEL-NEXT: v_mov_b32_e32 v0, v41
; GISEL-NEXT: v_readlane_b32 s63, v40, 31
; GISEL-NEXT: v_readlane_b32 s62, v40, 30
; GISEL-NEXT: v_readlane_b32 s61, v40, 29
; GISEL-NEXT: v_readlane_b32 s60, v40, 28
; GISEL-NEXT: v_readlane_b32 s59, v40, 27
; GISEL-NEXT: v_readlane_b32 s58, v40, 26
; GISEL-NEXT: v_readlane_b32 s57, v40, 25
; GISEL-NEXT: v_readlane_b32 s56, v40, 24
; GISEL-NEXT: v_readlane_b32 s55, v40, 23
; GISEL-NEXT: v_readlane_b32 s54, v40, 22
; GISEL-NEXT: v_readlane_b32 s53, v40, 21
; GISEL-NEXT: v_readlane_b32 s52, v40, 20
; GISEL-NEXT: v_readlane_b32 s51, v40, 19
; GISEL-NEXT: v_readlane_b32 s50, v40, 18
; GISEL-NEXT: v_readlane_b32 s49, v40, 17
; GISEL-NEXT: v_readlane_b32 s48, v40, 16
; GISEL-NEXT: v_readlane_b32 s47, v40, 15
; GISEL-NEXT: v_readlane_b32 s46, v40, 14
; GISEL-NEXT: v_readlane_b32 s45, v40, 13
; GISEL-NEXT: v_readlane_b32 s44, v40, 12
; GISEL-NEXT: v_readlane_b32 s43, v40, 11
; GISEL-NEXT: v_readlane_b32 s42, v40, 10
; GISEL-NEXT: v_readlane_b32 s41, v40, 9
; GISEL-NEXT: v_readlane_b32 s40, v40, 8
; GISEL-NEXT: v_readlane_b32 s39, v40, 7
; GISEL-NEXT: v_readlane_b32 s38, v40, 6
; GISEL-NEXT: v_readlane_b32 s37, v40, 5
; GISEL-NEXT: v_readlane_b32 s36, v40, 4
; GISEL-NEXT: v_readlane_b32 s35, v40, 3
; GISEL-NEXT: v_readlane_b32 s34, v40, 2
; GISEL-NEXT: v_readlane_b32 s31, v40, 1
; GISEL-NEXT: v_readlane_b32 s30, v40, 0
; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GISEL-NEXT: v_mov_b32_e32 v0, v40
; GISEL-NEXT: v_readlane_b32 s63, v41, 31
; GISEL-NEXT: v_readlane_b32 s62, v41, 30
; GISEL-NEXT: v_readlane_b32 s61, v41, 29
; GISEL-NEXT: v_readlane_b32 s60, v41, 28
; GISEL-NEXT: v_readlane_b32 s59, v41, 27
; GISEL-NEXT: v_readlane_b32 s58, v41, 26
; GISEL-NEXT: v_readlane_b32 s57, v41, 25
; GISEL-NEXT: v_readlane_b32 s56, v41, 24
; GISEL-NEXT: v_readlane_b32 s55, v41, 23
; GISEL-NEXT: v_readlane_b32 s54, v41, 22
; GISEL-NEXT: v_readlane_b32 s53, v41, 21
; GISEL-NEXT: v_readlane_b32 s52, v41, 20
; GISEL-NEXT: v_readlane_b32 s51, v41, 19
; GISEL-NEXT: v_readlane_b32 s50, v41, 18
; GISEL-NEXT: v_readlane_b32 s49, v41, 17
; GISEL-NEXT: v_readlane_b32 s48, v41, 16
; GISEL-NEXT: v_readlane_b32 s47, v41, 15
; GISEL-NEXT: v_readlane_b32 s46, v41, 14
; GISEL-NEXT: v_readlane_b32 s45, v41, 13
; GISEL-NEXT: v_readlane_b32 s44, v41, 12
; GISEL-NEXT: v_readlane_b32 s43, v41, 11
; GISEL-NEXT: v_readlane_b32 s42, v41, 10
; GISEL-NEXT: v_readlane_b32 s41, v41, 9
; GISEL-NEXT: v_readlane_b32 s40, v41, 8
; GISEL-NEXT: v_readlane_b32 s39, v41, 7
; GISEL-NEXT: v_readlane_b32 s38, v41, 6
; GISEL-NEXT: v_readlane_b32 s37, v41, 5
; GISEL-NEXT: v_readlane_b32 s36, v41, 4
; GISEL-NEXT: v_readlane_b32 s35, v41, 3
; GISEL-NEXT: v_readlane_b32 s34, v41, 2
; GISEL-NEXT: v_readlane_b32 s31, v41, 1
; GISEL-NEXT: v_readlane_b32 s30, v41, 0
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload
; GISEL-NEXT: s_or_saveexec_b64 s[4:5], -1
; GISEL-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GISEL-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GISEL-NEXT: s_mov_b64 exec, s[4:5]
; GISEL-NEXT: s_addk_i32 s32, 0xfc00
; GISEL-NEXT: s_mov_b32 s33, s10
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ define fastcc i32 @foo() {
; CHECK-NEXT: $sgpr16 = S_MOV_B32 $sgpr33
; CHECK-NEXT: $sgpr33 = S_MOV_B32 $sgpr32
; CHECK-NEXT: $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr17
; CHECK-NEXT: $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
Expand All @@ -26,8 +26,8 @@ define fastcc i32 @foo() {
; CHECK-NEXT: BUFFER_GL0_INV implicit $exec
; CHECK-NEXT: BUFFER_GL1_INV implicit $exec
; CHECK-NEXT: renamable $sgpr16_sgpr17 = S_LOAD_DWORDX2_IMM killed renamable $sgpr16_sgpr17, 0, 0 :: (dereferenceable invariant load (s64) from got, addrspace 4)
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, $vgpr40
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, $vgpr40
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr30, 0, killed $vgpr40
; CHECK-NEXT: $vgpr40 = V_WRITELANE_B32 killed $sgpr31, 1, killed $vgpr40
; CHECK-NEXT: S_WAITCNT 49279
; CHECK-NEXT: dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, @bar, csr_amdgpu, implicit killed $sgpr4_sgpr5, implicit killed $sgpr6_sgpr7, implicit killed $sgpr8_sgpr9, implicit killed $sgpr10_sgpr11, implicit killed $sgpr12, implicit killed $sgpr13, implicit killed $sgpr14, implicit killed $sgpr15, implicit killed $vgpr31, implicit $sgpr0_sgpr1_sgpr2_sgpr3
; CHECK-NEXT: $vcc_lo = S_MOV_B32 $exec_lo
Expand All @@ -43,7 +43,7 @@ define fastcc i32 @foo() {
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $sgpr31 = V_READLANE_B32 $vgpr40, 1
; CHECK-NEXT: $sgpr30 = V_READLANE_B32 $vgpr40, 0
; CHECK-NEXT: $sgpr4 = V_READLANE_B32 $vgpr40, 2
; CHECK-NEXT: $sgpr4 = V_READLANE_B32 killed $vgpr40, 2
; CHECK-NEXT: $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
; CHECK-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
; CHECK-NEXT: $exec_lo = S_MOV_B32 killed $sgpr5
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/ipra.ll
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,11 @@ define amdgpu_kernel void @kernel_call() #0 {
; GCN-LABEL: {{^}}func_regular_call:
; GCN-NOT: buffer_load
; GCN-NOT: readlane
; GCN: flat_load_dword v9
; GCN: flat_load_dword v8
; GCN: s_swappc_b64
; GCN-NOT: buffer_load
; GCN-NOT: readlane
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8

; GCN: ; NumSgprs: 34
; GCN: ; NumVgprs: 10
Expand All @@ -72,9 +72,9 @@ define void @func_tail_call() #1 {
}

; GCN-LABEL: {{^}}func_call_tail_call:
; GCN: flat_load_dword v9
; GCN: flat_load_dword v8
; GCN: s_swappc_b64
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v9
; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v8
; GCN: s_setpc_b64

; GCN: ; NumSgprs: 34
Expand Down
54 changes: 27 additions & 27 deletions llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
Original file line number Diff line number Diff line change
Expand Up @@ -191,47 +191,47 @@ define void @slsr1_1(i32 %b.arg, i32 %s.arg) #0 {
; GFX9-NEXT: s_mov_b32 s4, s33
; GFX9-NEXT: s_mov_b32 s33, s32
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: v_writelane_b32 v40, s4, 5
; GFX9-NEXT: v_writelane_b32 v40, s30, 0
; GFX9-NEXT: v_writelane_b32 v40, s31, 1
; GFX9-NEXT: v_writelane_b32 v43, s4, 5
; GFX9-NEXT: v_writelane_b32 v43, s30, 0
; GFX9-NEXT: v_writelane_b32 v43, s31, 1
; GFX9-NEXT: s_addk_i32 s32, 0x800
; GFX9-NEXT: v_writelane_b32 v40, s34, 2
; GFX9-NEXT: v_writelane_b32 v40, s36, 3
; GFX9-NEXT: v_writelane_b32 v43, s34, 2
; GFX9-NEXT: v_writelane_b32 v43, s36, 3
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: s_add_u32 s4, s4, foo@gotpcrel32@lo+4
; GFX9-NEXT: s_addc_u32 s5, s5, foo@gotpcrel32@hi+12
; GFX9-NEXT: v_writelane_b32 v40, s37, 4
; GFX9-NEXT: v_writelane_b32 v43, s37, 4
; GFX9-NEXT: s_load_dwordx2 s[36:37], s[4:5], 0x0
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v43, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_mov_b32_e32 v41, v1
; GFX9-NEXT: v_mov_b32_e32 v42, v0
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v42, v41
; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX9-NEXT: v_mov_b32_e32 v40, v1
; GFX9-NEXT: v_mov_b32_e32 v41, v0
; GFX9-NEXT: v_mul_u32_u24_e32 v0, v41, v40
; GFX9-NEXT: s_mov_b32 s34, s15
; GFX9-NEXT: v_and_b32_e32 v43, 0xffffff, v41
; GFX9-NEXT: v_and_b32_e32 v42, 0xffffff, v40
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: v_mad_u32_u24 v41, v42, v41, v43
; GFX9-NEXT: v_mad_u32_u24 v40, v41, v40, v42
; GFX9-NEXT: s_mov_b32 s15, s34
; GFX9-NEXT: v_mov_b32_e32 v0, v41
; GFX9-NEXT: v_mov_b32_e32 v0, v40
; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: v_add_u32_e32 v0, v41, v43
; GFX9-NEXT: v_add_u32_e32 v0, v40, v42
; GFX9-NEXT: s_mov_b32 s15, s34
; GFX9-NEXT: s_swappc_b64 s[30:31], s[36:37]
; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s37, v40, 4
; GFX9-NEXT: v_readlane_b32 s36, v40, 3
; GFX9-NEXT: v_readlane_b32 s34, v40, 2
; GFX9-NEXT: v_readlane_b32 s31, v40, 1
; GFX9-NEXT: v_readlane_b32 s30, v40, 0
; GFX9-NEXT: v_readlane_b32 s4, v40, 5
; GFX9-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX9-NEXT: v_readlane_b32 s37, v43, 4
; GFX9-NEXT: v_readlane_b32 s36, v43, 3
; GFX9-NEXT: v_readlane_b32 s34, v43, 2
; GFX9-NEXT: v_readlane_b32 s31, v43, 1
; GFX9-NEXT: v_readlane_b32 s30, v43, 0
; GFX9-NEXT: v_readlane_b32 s4, v43, 5
; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v43, off, s[0:3], s33 offset:12 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[6:7]
; GFX9-NEXT: s_addk_i32 s32, 0xf800
; GFX9-NEXT: s_mov_b32 s33, s4
Expand Down
52 changes: 26 additions & 26 deletions llvm/test/CodeGen/AMDGPU/s-getpc-b64-remat.ll
Original file line number Diff line number Diff line change
Expand Up @@ -8,23 +8,23 @@ define void @test_remat_s_getpc_b64() {
; GFX9: ; %bb.0: ; %entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: buffer_store_dword v2, off, s[0:3], s32 ; 4-byte Folded Spill
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: v_writelane_b32 v0, s30, 0
; GFX9-NEXT: v_writelane_b32 v2, s30, 0
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: v_writelane_b32 v0, s31, 1
; GFX9-NEXT: v_writelane_b32 v2, s31, 1
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: ;;#ASMSTART
; GFX9-NEXT: ;;#ASMEND
; GFX9-NEXT: s_getpc_b64 s[4:5]
; GFX9-NEXT: v_mov_b32_e32 v1, s4
; GFX9-NEXT: v_mov_b32_e32 v2, s5
; GFX9-NEXT: global_store_dwordx2 v[1:2], v[1:2], off
; GFX9-NEXT: v_readlane_b32 s31, v0, 1
; GFX9-NEXT: v_readlane_b32 s30, v0, 0
; GFX9-NEXT: v_mov_b32_e32 v0, s4
; GFX9-NEXT: v_mov_b32_e32 v1, s5
; GFX9-NEXT: global_store_dwordx2 v[0:1], v[0:1], off
; GFX9-NEXT: v_readlane_b32 s31, v2, 1
; GFX9-NEXT: v_readlane_b32 s30, v2, 0
; GFX9-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GFX9-NEXT: buffer_load_dword v0, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
; GFX9-NEXT: s_mov_b64 exec, s[4:5]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -33,23 +33,23 @@ define void @test_remat_s_getpc_b64() {
; GFX11: ; %bb.0: ; %entry
; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX11-NEXT: scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: v_writelane_b32 v0, s30, 0
; GFX11-NEXT: v_writelane_b32 v2, s30, 0
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: v_writelane_b32 v0, s31, 1
; GFX11-NEXT: v_writelane_b32 v2, s31, 1
; GFX11-NEXT: ;;#ASMSTART
; GFX11-NEXT: ;;#ASMEND
; GFX11-NEXT: s_getpc_b64 s[0:1]
; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX11-NEXT: v_readlane_b32 s31, v0, 1
; GFX11-NEXT: v_readlane_b32 s30, v0, 0
; GFX11-NEXT: global_store_b64 v[1:2], v[1:2], off
; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX11-NEXT: v_readlane_b32 s31, v2, 1
; GFX11-NEXT: v_readlane_b32 s30, v2, 0
; GFX11-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX11-NEXT: s_xor_saveexec_b32 s0, -1
; GFX11-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload
; GFX11-NEXT: s_mov_b32 exec_lo, s0
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
Expand All @@ -62,26 +62,26 @@ define void @test_remat_s_getpc_b64() {
; GFX12-NEXT: s_wait_bvhcnt 0x0
; GFX12-NEXT: s_wait_kmcnt 0x0
; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
; GFX12-NEXT: scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
; GFX12-NEXT: scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: v_writelane_b32 v0, s30, 0
; GFX12-NEXT: v_writelane_b32 v2, s30, 0
; GFX12-NEXT: s_getpc_b64 s[0:1]
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
; GFX12-NEXT: s_sext_i32_i16 s1, s1
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: v_writelane_b32 v0, s31, 1
; GFX12-NEXT: v_writelane_b32 v2, s31, 1
; GFX12-NEXT: ;;#ASMSTART
; GFX12-NEXT: ;;#ASMEND
; GFX12-NEXT: s_getpc_b64 s[0:1]
; GFX12-NEXT: s_sext_i32_i16 s1, s1
; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX12-NEXT: v_readlane_b32 s31, v0, 1
; GFX12-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0
; GFX12-NEXT: v_readlane_b32 s30, v0, 0
; GFX12-NEXT: global_store_b64 v[1:2], v[1:2], off
; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
; GFX12-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
; GFX12-NEXT: v_readlane_b32 s31, v2, 1
; GFX12-NEXT: v_readlane_b32 s30, v2, 0
; GFX12-NEXT: global_store_b64 v[0:1], v[0:1], off
; GFX12-NEXT: s_xor_saveexec_b32 s0, -1
; GFX12-NEXT: scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
; GFX12-NEXT: scratch_load_b32 v2, off, s32 ; 4-byte Folded Reload
; GFX12-NEXT: s_mov_b32 exec_lo, s0
; GFX12-NEXT: s_wait_loadcnt 0x0
; GFX12-NEXT: s_setpc_b64 s[30:31]
Expand Down
314 changes: 157 additions & 157 deletions llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir

Large diffs are not rendered by default.

44 changes: 22 additions & 22 deletions llvm/test/CodeGen/AMDGPU/sgpr-spills-split-regalloc.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1183,7 +1183,7 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_store_dword v4, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:448 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Spill
Expand Down Expand Up @@ -1297,11 +1297,11 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
; GCN-NEXT: buffer_store_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v255, off, s[0:3], s32 ; 4-byte Folded Spill
; GCN-NEXT: v_writelane_b32 v4, s34, 0
; GCN-NEXT: v_writelane_b32 v4, s35, 1
; GCN-NEXT: v_writelane_b32 v4, s36, 2
; GCN-NEXT: v_writelane_b32 v4, s37, 3
; GCN-NEXT: v_mov_b32_e32 v5, v3
; GCN-NEXT: v_writelane_b32 v5, s34, 0
; GCN-NEXT: v_writelane_b32 v5, s35, 1
; GCN-NEXT: v_writelane_b32 v5, s36, 2
; GCN-NEXT: v_writelane_b32 v5, s37, 3
; GCN-NEXT: v_mov_b32_e32 v4, v3
; GCN-NEXT: v_mov_b32_e32 v3, v1
; GCN-NEXT: ; implicit-def: $sgpr4
; GCN-NEXT: ; implicit-def: $sgpr4
Expand All @@ -1310,30 +1310,30 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
; GCN-NEXT: ; implicit-def: $sgpr4
; GCN-NEXT: ; implicit-def: $sgpr4
; GCN-NEXT: ; kill: def $vgpr2 killed $vgpr2 def $vgpr2_vgpr3 killed $exec
; GCN-NEXT: v_mov_b32_e32 v3, v5
; GCN-NEXT: v_mov_b32_e32 v3, v4
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
; GCN-NEXT: ; implicit-def: $sgpr4_sgpr5
; GCN-NEXT: flat_load_dwordx4 v[5:8], v[2:3]
; GCN-NEXT: flat_load_dwordx4 v[6:9], v[2:3]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v5, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:452 ; 4-byte Folded Spill
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_store_dword v6, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v7, off, s[0:3], s32 offset:456 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v8, off, s[0:3], s32 offset:460 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v9, off, s[0:3], s32 offset:464 ; 4-byte Folded Spill
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v6, off, s[0:3], s32 offset:452 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v7, off, s[0:3], s32 offset:456 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v8, off, s[0:3], s32 offset:460 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v9, off, s[0:3], s32 offset:464 ; 4-byte Folded Reload
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ;;#ASMEND
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: flat_store_dwordx4 v[0:1], v[5:8]
; GCN-NEXT: v_readlane_b32 s37, v4, 3
; GCN-NEXT: v_readlane_b32 s36, v4, 2
; GCN-NEXT: v_readlane_b32 s35, v4, 1
; GCN-NEXT: v_readlane_b32 s34, v4, 0
; GCN-NEXT: flat_store_dwordx4 v[0:1], v[6:9]
; GCN-NEXT: v_readlane_b32 s37, v5, 3
; GCN-NEXT: v_readlane_b32 s36, v5, 2
; GCN-NEXT: v_readlane_b32 s35, v5, 1
; GCN-NEXT: v_readlane_b32 s34, v5, 0
; GCN-NEXT: buffer_load_dword v255, off, s[0:3], s32 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v254, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v253, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
Expand Down Expand Up @@ -1447,7 +1447,7 @@ define void @spill_sgpr_no_free_vgpr(ptr addrspace(1) %out, ptr addrspace(1) %in
; GCN-NEXT: buffer_load_dword v41, off, s[0:3], s32 offset:440 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 offset:444 ; 4-byte Folded Reload
; GCN-NEXT: s_xor_saveexec_b64 s[4:5], -1
; GCN-NEXT: buffer_load_dword v4, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
; GCN-NEXT: buffer_load_dword v5, off, s[0:3], s32 offset:448 ; 4-byte Folded Reload
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_setpc_b64 s[30:31]
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/sibling-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,15 @@ entry:
; GCN-DAG: s_addc_u32 s5, s5, i32_fastcc_i32_i32@gotpcrel32@hi+12

; GCN-DAG: v_writelane_b32 [[CSRV]], s30, 0
; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-DAG: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-DAG: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-DAG: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GCN-DAG: v_writelane_b32 [[CSRV]], s31, 1


; GCN: s_swappc_b64

; GCN-DAG: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-DAG: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GCN-DAG: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GCN-DAG: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload

; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, sibling_call_i32_fastcc_i32_i32@rel32@lo+4
Expand Down
4 changes: 2 additions & 2 deletions llvm/test/CodeGen/AMDGPU/spill-sgpr-csr-live-ins.mir
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@ body: |
bb.0:
liveins: $sgpr50
; CHECK-LABEL: name: spill_csr_sgpr_argument
; CHECK: liveins: $sgpr50, $vgpr0
; CHECK: liveins: $sgpr50, $vgpr63
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr50, 0, $vgpr0
; CHECK-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR $sgpr50, 0, $vgpr63
; CHECK-NEXT: S_NOP 0, implicit $sgpr50
; CHECK-NEXT: $sgpr50 = S_MOV_B32 0
S_NOP 0, implicit $sgpr50
Expand Down
66 changes: 33 additions & 33 deletions llvm/test/CodeGen/AMDGPU/spill-sgpr-to-virtual-vgpr.mir
Original file line number Diff line number Diff line change
Expand Up @@ -53,41 +53,41 @@ body: |
bb.0:
liveins: $sgpr30_sgpr31, $sgpr10, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN-LABEL: name: sgpr_spill_lane_crossover
; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr0, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN: liveins: $sgpr10, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $vgpr63, $sgpr30_sgpr31, $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71, $sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79, $sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87, $sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
; GCN-NEXT: {{ $}}
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr66, 2, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr67, 3, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr68, 4, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr69, 5, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr70, 6, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr71, 7, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr72, 8, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr73, 9, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr74, 10, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr75, 11, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr76, 12, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr77, 13, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 14, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 15, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 16, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 17, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 18, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 19, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 20, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 21, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr86, 22, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr87, 23, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr88, 24, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr89, 25, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr90, 26, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr91, 27, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr92, 28, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr93, 29, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 30, $vgpr0
; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 31, $vgpr0
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr64, 0, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr65, 1, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr66, 2, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr67, 3, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr68, 4, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr69, 5, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr70, 6, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr71, 7, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr72, 8, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr73, 9, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr74, 10, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr75, 11, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr76, 12, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr77, 13, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr78, 14, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr79, 15, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr80, 16, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr81, 17, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr82, 18, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr83, 19, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr84, 20, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr85, 21, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr86, 22, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr87, 23, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr88, 24, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr89, 25, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr90, 26, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr91, 27, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr92, 28, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr93, 29, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr94, 30, $vgpr63
; GCN-NEXT: $vgpr63 = SI_SPILL_S32_TO_VGPR killed $sgpr95, 31, $vgpr63
; GCN-NEXT: S_NOP 0
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR killed $sgpr10, 0, [[DEF]]
; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = SI_SPILL_S32_TO_VGPR $sgpr64, 1, [[DEF]], implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ define void @spill_more_than_wavesize_csr_sgprs() {
}

; CHECK-LABEL: {{^}}spill_more_than_wavesize_csr_sgprs_with_stack_object:
; CHECK-DAG: v_writelane_b32 v0, s98, 63
; CHECK-DAG: v_writelane_b32 v1, s99, 0
; CHECK-DAG: v_writelane_b32 v1, s98, 63
; CHECK-DAG: v_writelane_b32 v2, s99, 0
; CHECK-NOT: dummy
; CHECK-DAG: v_readlane_b32 s99, v1, 0
; CHECK-DAG: v_readlane_b32 s98, v0, 63
; CHECK-DAG: v_readlane_b32 s99, v2, 0
; CHECK-DAG: v_readlane_b32 s98, v1, 63
define void @spill_more_than_wavesize_csr_sgprs_with_stack_object() {
%alloca = alloca i32, align 4, addrspace(5)
store volatile i32 0, ptr addrspace(5) %alloca
Expand Down
28 changes: 14 additions & 14 deletions llvm/test/CodeGen/AMDGPU/stacksave_stackrestore.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1585,17 +1585,17 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s24, s33
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s32
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s16, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Spill
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s16
; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0x1200
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr33 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s30, 0
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s31, 1
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $vgpr32 : SGPR spill to VGPR lane
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s30, 0
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s31, 1
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s16, s32
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s16, 0
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s16, 0
; WAVE32-WWM-PREALLOC-NEXT: s_lshr_b32 s16, s16, 5
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v33, s16, 1
; WAVE32-WWM-PREALLOC-NEXT: v_writelane_b32 v32, s16, 1
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v0, 42
; WAVE32-WWM-PREALLOC-NEXT: buffer_store_dword v0, off, s[0:3], s33
; WAVE32-WWM-PREALLOC-NEXT: s_waitcnt_vscnt null, 0x0
Expand Down Expand Up @@ -1673,18 +1673,18 @@ define void @func_stacksave_stackrestore_call_with_stack_objects() {
; WAVE32-WWM-PREALLOC-NEXT: ; implicit-def: $sgpr18
; WAVE32-WWM-PREALLOC-NEXT: v_mov_b32_e32 v30, s18
; WAVE32-WWM-PREALLOC-NEXT: s_swappc_b64 s[30:31], s[16:17]
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s5, v33, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s4, v33, 0
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s5, v32, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s4, v32, 0
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMSTART
; WAVE32-WWM-PREALLOC-NEXT: ; use s5
; WAVE32-WWM-PREALLOC-NEXT: ;;#ASMEND
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s32, s4
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v32, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s30, v32, 0
; WAVE32-WWM-PREALLOC-NEXT: ; kill: killed $vgpr33
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s31, v33, 1
; WAVE32-WWM-PREALLOC-NEXT: v_readlane_b32 s30, v33, 0
; WAVE32-WWM-PREALLOC-NEXT: ; kill: killed $vgpr32
; WAVE32-WWM-PREALLOC-NEXT: s_xor_saveexec_b32 s4, -1
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v33, off, s[0:3], s33 offset:128 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: buffer_load_dword v32, off, s[0:3], s33 offset:132 ; 4-byte Folded Reload
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 exec_lo, s4
; WAVE32-WWM-PREALLOC-NEXT: s_add_i32 s32, s32, 0xffffee00
; WAVE32-WWM-PREALLOC-NEXT: s_mov_b32 s33, s24
Expand Down
136 changes: 68 additions & 68 deletions llvm/test/CodeGen/AMDGPU/strictfp_f16_abi_promote.ll
Original file line number Diff line number Diff line change
Expand Up @@ -252,31 +252,31 @@ define void @outgoing_f16_return(ptr %ptr) #0 {
; GFX7-NEXT: s_mov_b32 s16, s33
; GFX7-NEXT: s_mov_b32 s33, s32
; GFX7-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX7-NEXT: s_mov_b64 exec, s[18:19]
; GFX7-NEXT: v_writelane_b32 v40, s16, 2
; GFX7-NEXT: v_writelane_b32 v40, s30, 0
; GFX7-NEXT: v_writelane_b32 v42, s16, 2
; GFX7-NEXT: v_writelane_b32 v42, s30, 0
; GFX7-NEXT: s_mov_b32 s17, f16_result@abs32@hi
; GFX7-NEXT: s_mov_b32 s16, f16_result@abs32@lo
; GFX7-NEXT: s_addk_i32 s32, 0x400
; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX7-NEXT: v_writelane_b32 v40, s31, 1
; GFX7-NEXT: v_mov_b32_e32 v42, v1
; GFX7-NEXT: v_mov_b32_e32 v41, v0
; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX7-NEXT: v_writelane_b32 v42, s31, 1
; GFX7-NEXT: v_mov_b32_e32 v41, v1
; GFX7-NEXT: v_mov_b32_e32 v40, v0
; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_readlane_b32 s31, v40, 1
; GFX7-NEXT: v_readlane_b32 s30, v40, 0
; GFX7-NEXT: v_readlane_b32 s4, v40, 2
; GFX7-NEXT: v_readlane_b32 s31, v42, 1
; GFX7-NEXT: v_readlane_b32 s30, v42, 0
; GFX7-NEXT: v_readlane_b32 s4, v42, 2
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: flat_store_short v[41:42], v0
; GFX7-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX7-NEXT: flat_store_short v[40:41], v0
; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX7-NEXT: s_mov_b64 exec, s[6:7]
; GFX7-NEXT: s_addk_i32 s32, 0xfc00
; GFX7-NEXT: s_mov_b32 s33, s4
Expand All @@ -294,37 +294,37 @@ define void @outgoing_v2f16_return(ptr %ptr) #0 {
; GFX7-NEXT: s_mov_b32 s16, s33
; GFX7-NEXT: s_mov_b32 s33, s32
; GFX7-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX7-NEXT: s_mov_b64 exec, s[18:19]
; GFX7-NEXT: v_writelane_b32 v40, s16, 2
; GFX7-NEXT: v_writelane_b32 v40, s30, 0
; GFX7-NEXT: v_writelane_b32 v42, s16, 2
; GFX7-NEXT: v_writelane_b32 v42, s30, 0
; GFX7-NEXT: s_mov_b32 s17, v2f16_result@abs32@hi
; GFX7-NEXT: s_mov_b32 s16, v2f16_result@abs32@lo
; GFX7-NEXT: s_addk_i32 s32, 0x400
; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX7-NEXT: v_writelane_b32 v40, s31, 1
; GFX7-NEXT: v_mov_b32_e32 v42, v1
; GFX7-NEXT: v_mov_b32_e32 v41, v0
; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX7-NEXT: v_writelane_b32 v42, s31, 1
; GFX7-NEXT: v_mov_b32_e32 v41, v1
; GFX7-NEXT: v_mov_b32_e32 v40, v0
; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_readlane_b32 s31, v40, 1
; GFX7-NEXT: v_readlane_b32 s30, v40, 0
; GFX7-NEXT: v_readlane_b32 s31, v42, 1
; GFX7-NEXT: v_readlane_b32 s30, v42, 0
; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1
; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1
; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0
; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0
; GFX7-NEXT: v_readlane_b32 s4, v40, 2
; GFX7-NEXT: v_readlane_b32 s4, v42, 2
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1
; GFX7-NEXT: v_or_b32_e32 v0, v0, v1
; GFX7-NEXT: flat_store_dword v[41:42], v0
; GFX7-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX7-NEXT: flat_store_dword v[40:41], v0
; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX7-NEXT: s_mov_b64 exec, s[6:7]
; GFX7-NEXT: s_addk_i32 s32, 0xfc00
; GFX7-NEXT: s_mov_b32 s33, s4
Expand All @@ -342,18 +342,18 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
; GFX7-NEXT: s_mov_b32 s16, s33
; GFX7-NEXT: s_mov_b32 s33, s32
; GFX7-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX7-NEXT: s_mov_b64 exec, s[18:19]
; GFX7-NEXT: v_writelane_b32 v40, s16, 2
; GFX7-NEXT: v_writelane_b32 v40, s30, 0
; GFX7-NEXT: v_writelane_b32 v42, s16, 2
; GFX7-NEXT: v_writelane_b32 v42, s30, 0
; GFX7-NEXT: s_mov_b32 s17, v4f16_result@abs32@hi
; GFX7-NEXT: s_mov_b32 s16, v4f16_result@abs32@lo
; GFX7-NEXT: s_addk_i32 s32, 0x400
; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX7-NEXT: v_writelane_b32 v40, s31, 1
; GFX7-NEXT: v_mov_b32_e32 v42, v1
; GFX7-NEXT: v_mov_b32_e32 v41, v0
; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX7-NEXT: v_writelane_b32 v42, s31, 1
; GFX7-NEXT: v_mov_b32_e32 v41, v1
; GFX7-NEXT: v_mov_b32_e32 v40, v0
; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
Expand All @@ -375,17 +375,17 @@ define void @outgoing_v4f16_return(ptr %ptr) #0 {
; GFX7-NEXT: v_or_b32_e32 v4, v0, v1
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3
; GFX7-NEXT: v_or_b32_e32 v2, v2, v0
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 4, v41
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v42, vcc
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 4, v40
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v41, vcc
; GFX7-NEXT: flat_store_dword v[0:1], v2
; GFX7-NEXT: flat_store_dword v[41:42], v4
; GFX7-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX7-NEXT: v_readlane_b32 s31, v40, 1
; GFX7-NEXT: v_readlane_b32 s30, v40, 0
; GFX7-NEXT: v_readlane_b32 s4, v40, 2
; GFX7-NEXT: flat_store_dword v[40:41], v4
; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX7-NEXT: v_readlane_b32 s31, v42, 1
; GFX7-NEXT: v_readlane_b32 s30, v42, 0
; GFX7-NEXT: v_readlane_b32 s4, v42, 2
; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX7-NEXT: s_mov_b64 exec, s[6:7]
; GFX7-NEXT: s_addk_i32 s32, 0xfc00
; GFX7-NEXT: s_mov_b32 s33, s4
Expand All @@ -403,18 +403,18 @@ define void @outgoing_v8f16_return(ptr %ptr) #0 {
; GFX7-NEXT: s_mov_b32 s16, s33
; GFX7-NEXT: s_mov_b32 s33, s32
; GFX7-NEXT: s_or_saveexec_b64 s[18:19], -1
; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GFX7-NEXT: s_mov_b64 exec, s[18:19]
; GFX7-NEXT: v_writelane_b32 v40, s16, 2
; GFX7-NEXT: v_writelane_b32 v40, s30, 0
; GFX7-NEXT: v_writelane_b32 v42, s16, 2
; GFX7-NEXT: v_writelane_b32 v42, s30, 0
; GFX7-NEXT: s_mov_b32 s17, v8f16_result@abs32@hi
; GFX7-NEXT: s_mov_b32 s16, v8f16_result@abs32@lo
; GFX7-NEXT: s_addk_i32 s32, 0x400
; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX7-NEXT: v_writelane_b32 v40, s31, 1
; GFX7-NEXT: v_mov_b32_e32 v42, v1
; GFX7-NEXT: v_mov_b32_e32 v41, v0
; GFX7-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GFX7-NEXT: buffer_store_dword v41, off, s[0:3], s33 ; 4-byte Folded Spill
; GFX7-NEXT: v_writelane_b32 v42, s31, 1
; GFX7-NEXT: v_mov_b32_e32 v41, v1
; GFX7-NEXT: v_mov_b32_e32 v40, v0
; GFX7-NEXT: s_swappc_b64 s[30:31], s[16:17]
; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1
; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0
Expand Down Expand Up @@ -456,23 +456,23 @@ define void @outgoing_v8f16_return(ptr %ptr) #0 {
; GFX7-NEXT: v_or_b32_e32 v5, v1, v0
; GFX7-NEXT: v_lshlrev_b32_e32 v0, 16, v3
; GFX7-NEXT: v_or_b32_e32 v3, v4, v0
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 12, v41
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v42, vcc
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 12, v40
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v41, vcc
; GFX7-NEXT: flat_store_dword v[0:1], v3
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 8, v41
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v42, vcc
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 8, v40
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v41, vcc
; GFX7-NEXT: flat_store_dword v[0:1], v5
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 4, v41
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v42, vcc
; GFX7-NEXT: v_add_i32_e32 v0, vcc, 4, v40
; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v41, vcc
; GFX7-NEXT: flat_store_dword v[0:1], v2
; GFX7-NEXT: flat_store_dword v[41:42], v8
; GFX7-NEXT: buffer_load_dword v42, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX7-NEXT: v_readlane_b32 s31, v40, 1
; GFX7-NEXT: v_readlane_b32 s30, v40, 0
; GFX7-NEXT: v_readlane_b32 s4, v40, 2
; GFX7-NEXT: flat_store_dword v[40:41], v8
; GFX7-NEXT: buffer_load_dword v41, off, s[0:3], s33 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:4 ; 4-byte Folded Reload
; GFX7-NEXT: v_readlane_b32 s31, v42, 1
; GFX7-NEXT: v_readlane_b32 s30, v42, 0
; GFX7-NEXT: v_readlane_b32 s4, v42, 2
; GFX7-NEXT: s_or_saveexec_b64 s[6:7], -1
; GFX7-NEXT: buffer_load_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX7-NEXT: buffer_load_dword v42, off, s[0:3], s33 offset:8 ; 4-byte Folded Reload
; GFX7-NEXT: s_mov_b64 exec, s[6:7]
; GFX7-NEXT: s_addk_i32 s32, 0xfc00
; GFX7-NEXT: s_mov_b32 s33, s4
Expand Down
250 changes: 125 additions & 125 deletions llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll

Large diffs are not rendered by default.

1,169 changes: 726 additions & 443 deletions llvm/test/CodeGen/AMDGPU/vgpr-large-tuple-alloc-error.ll

Large diffs are not rendered by default.

264 changes: 132 additions & 132 deletions llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll

Large diffs are not rendered by default.

476 changes: 238 additions & 238 deletions llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll

Large diffs are not rendered by default.

59 changes: 0 additions & 59 deletions llvm/test/CodeGen/Generic/bb-profile-dump.ll

This file was deleted.

102 changes: 102 additions & 0 deletions llvm/test/CodeGen/RISCV/tls-models.ll
Original file line number Diff line number Diff line change
@@ -1,10 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -relocation-model=pic < %s \
; RUN: | FileCheck -check-prefix=RV32-PIC %s
; RUN: llc -mtriple=riscv32 -relocation-model=pic < %s -enable-tlsdesc \
; RUN: | FileCheck -check-prefix=RV32-PIC-TLSDESC %s
; RUN: llc -mtriple=riscv64 -relocation-model=pic < %s \
; RUN: | FileCheck -check-prefix=RV64-PIC %s
; RUN: llc -mtriple=riscv64 -relocation-model=pic -enable-tlsdesc < %s \
; RUN: | FileCheck -check-prefix=RV64-PIC-TLSDESC %s
; RUN: llc -mtriple=riscv32 < %s | FileCheck -check-prefix=RV32-NOPIC %s
; RUN: llc -mtriple=riscv32 < %s -enable-tlsdesc | FileCheck -check-prefix=RV32-NOPIC-TLSDESC %s
; RUN: llc -mtriple=riscv64 < %s | FileCheck -check-prefix=RV64-NOPIC %s
; RUN: llc -mtriple=riscv64 < %s -enable-tlsdesc | FileCheck -check-prefix=RV64-NOPIC-TLSDESC %s

; Check that TLS symbols are lowered correctly based on the specified
; model. Make sure they're external to avoid them all being optimised to Local
Expand Down Expand Up @@ -58,6 +64,42 @@ define ptr @f1() nounwind {
; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi0)(a0)
; RV64-NOPIC-NEXT: add a0, a0, tp
; RV64-NOPIC-NEXT: ret
;
; RV32-PIC-TLSDESC-LABEL: f1:
; RV32-PIC-TLSDESC: # %bb.0: # %entry
; RV32-PIC-TLSDESC-NEXT: .Ltlsdesc_hi0:
; RV32-PIC-TLSDESC-NEXT: auipc a0, %tlsdesc_hi(unspecified)
; RV32-PIC-TLSDESC-NEXT: lw a1, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a0)
; RV32-PIC-TLSDESC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi0)
; RV32-PIC-TLSDESC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi0)
; RV32-PIC-TLSDESC-NEXT: add a0, a0, tp
; RV32-PIC-TLSDESC-NEXT: ret
;
; RV64-PIC-TLSDESC-LABEL: f1:
; RV64-PIC-TLSDESC: # %bb.0: # %entry
; RV64-PIC-TLSDESC-NEXT: .Ltlsdesc_hi0:
; RV64-PIC-TLSDESC-NEXT: auipc a0, %tlsdesc_hi(unspecified)
; RV64-PIC-TLSDESC-NEXT: ld a1, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a0)
; RV64-PIC-TLSDESC-NEXT: addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi0)
; RV64-PIC-TLSDESC-NEXT: jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi0)
; RV64-PIC-TLSDESC-NEXT: add a0, a0, tp
; RV64-PIC-TLSDESC-NEXT: ret
;
; RV32-NOPIC-TLSDESC-LABEL: f1:
; RV32-NOPIC-TLSDESC: # %bb.0: # %entry
; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi0:
; RV32-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(unspecified)
; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi0)(a0)
; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp
; RV32-NOPIC-TLSDESC-NEXT: ret
;
; RV64-NOPIC-TLSDESC-LABEL: f1:
; RV64-NOPIC-TLSDESC: # %bb.0: # %entry
; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi0:
; RV64-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(unspecified)
; RV64-NOPIC-TLSDESC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi0)(a0)
; RV64-NOPIC-TLSDESC-NEXT: add a0, a0, tp
; RV64-NOPIC-TLSDESC-NEXT: ret
entry:
ret ptr @unspecified
}
Expand Down Expand Up @@ -144,6 +186,38 @@ define ptr @f3() nounwind {
; RV64-NOPIC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi2)(a0)
; RV64-NOPIC-NEXT: add a0, a0, tp
; RV64-NOPIC-NEXT: ret
;
; RV32-PIC-TLSDESC-LABEL: f3:
; RV32-PIC-TLSDESC: # %bb.0: # %entry
; RV32-PIC-TLSDESC-NEXT: .Lpcrel_hi0:
; RV32-PIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie)
; RV32-PIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi0)(a0)
; RV32-PIC-TLSDESC-NEXT: add a0, a0, tp
; RV32-PIC-TLSDESC-NEXT: ret
;
; RV64-PIC-TLSDESC-LABEL: f3:
; RV64-PIC-TLSDESC: # %bb.0: # %entry
; RV64-PIC-TLSDESC-NEXT: .Lpcrel_hi0:
; RV64-PIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie)
; RV64-PIC-TLSDESC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi0)(a0)
; RV64-PIC-TLSDESC-NEXT: add a0, a0, tp
; RV64-PIC-TLSDESC-NEXT: ret
;
; RV32-NOPIC-TLSDESC-LABEL: f3:
; RV32-NOPIC-TLSDESC: # %bb.0: # %entry
; RV32-NOPIC-TLSDESC-NEXT: .Lpcrel_hi2:
; RV32-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie)
; RV32-NOPIC-TLSDESC-NEXT: lw a0, %pcrel_lo(.Lpcrel_hi2)(a0)
; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp
; RV32-NOPIC-TLSDESC-NEXT: ret
;
; RV64-NOPIC-TLSDESC-LABEL: f3:
; RV64-NOPIC-TLSDESC: # %bb.0: # %entry
; RV64-NOPIC-TLSDESC-NEXT: .Lpcrel_hi2:
; RV64-NOPIC-TLSDESC-NEXT: auipc a0, %tls_ie_pcrel_hi(ie)
; RV64-NOPIC-TLSDESC-NEXT: ld a0, %pcrel_lo(.Lpcrel_hi2)(a0)
; RV64-NOPIC-TLSDESC-NEXT: add a0, a0, tp
; RV64-NOPIC-TLSDESC-NEXT: ret
entry:
ret ptr @ie
}
Expand Down Expand Up @@ -179,6 +253,34 @@ define ptr @f4() nounwind {
; RV64-NOPIC-NEXT: add a0, a0, tp, %tprel_add(le)
; RV64-NOPIC-NEXT: addi a0, a0, %tprel_lo(le)
; RV64-NOPIC-NEXT: ret
;
; RV32-PIC-TLSDESC-LABEL: f4:
; RV32-PIC-TLSDESC: # %bb.0: # %entry
; RV32-PIC-TLSDESC-NEXT: lui a0, %tprel_hi(le)
; RV32-PIC-TLSDESC-NEXT: add a0, a0, tp, %tprel_add(le)
; RV32-PIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(le)
; RV32-PIC-TLSDESC-NEXT: ret
;
; RV64-PIC-TLSDESC-LABEL: f4:
; RV64-PIC-TLSDESC: # %bb.0: # %entry
; RV64-PIC-TLSDESC-NEXT: lui a0, %tprel_hi(le)
; RV64-PIC-TLSDESC-NEXT: add a0, a0, tp, %tprel_add(le)
; RV64-PIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(le)
; RV64-PIC-TLSDESC-NEXT: ret
;
; RV32-NOPIC-TLSDESC-LABEL: f4:
; RV32-NOPIC-TLSDESC: # %bb.0: # %entry
; RV32-NOPIC-TLSDESC-NEXT: lui a0, %tprel_hi(le)
; RV32-NOPIC-TLSDESC-NEXT: add a0, a0, tp, %tprel_add(le)
; RV32-NOPIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(le)
; RV32-NOPIC-TLSDESC-NEXT: ret
;
; RV64-NOPIC-TLSDESC-LABEL: f4:
; RV64-NOPIC-TLSDESC: # %bb.0: # %entry
; RV64-NOPIC-TLSDESC-NEXT: lui a0, %tprel_hi(le)
; RV64-NOPIC-TLSDESC-NEXT: add a0, a0, tp, %tprel_add(le)
; RV64-NOPIC-TLSDESC-NEXT: addi a0, a0, %tprel_lo(le)
; RV64-NOPIC-TLSDESC-NEXT: ret
entry:
ret ptr @le
}
1 change: 1 addition & 0 deletions llvm/test/DebugInfo/salvage-limit-expr-size.ll
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
; RUN: opt %s -passes=dce -S | FileCheck %s
; RUN: opt %s -passes=dce -S --try-experimental-debuginfo-iterators | FileCheck %s

;; Tests that a DIExpression will only be salvaged up to a certain length, and
;; will produce an undef value if an expression would need to exceed that length.
Expand Down
27 changes: 27 additions & 0 deletions llvm/test/MC/LoongArch/Relocations/align-non-executable.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
## A label difference separated by an alignment directive, when the
## referenced symbols are in a non-executable section with instructions,
## should generate ADD/SUB relocations.
## https://github.com/llvm/llvm-project/pull/76552

# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s \
# RUN: | llvm-readobj -r - | FileCheck --check-prefixes=CHECK,RELAX %s
# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s \
# RUN: | llvm-readobj -r - | FileCheck %s

.section ".dummy", "a"
.L1:
la.pcrel $t0, sym
.p2align 3
.L2:
.dword .L2 - .L1

# CHECK: Relocations [
# CHECK-NEXT: Section ({{.*}}) .rela.dummy {
# CHECK-NEXT: 0x0 R_LARCH_PCALA_HI20 sym 0x0
# RELAX-NEXT: 0x0 R_LARCH_RELAX - 0x0
# CHECK-NEXT: 0x4 R_LARCH_PCALA_LO12 sym 0x0
# RELAX-NEXT: 0x4 R_LARCH_RELAX - 0x0
# RELAX-NEXT: 0x8 R_LARCH_ADD64 .L2 0x0
# RELAX-NEXT: 0x8 R_LARCH_SUB64 .L1 0x0
# CHECK-NEXT: }
# CHECK-NEXT: ]
15 changes: 13 additions & 2 deletions llvm/test/MC/LoongArch/Relocations/relax-addsub.s
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,23 @@

# RELAX: Relocations [
# RELAX-NEXT: Section ({{.*}}) .rela.text {
# RELAX-NEXT: 0x4 R_LARCH_ALIGN {{.*}} 0x4
# RELAX-NEXT: 0x10 R_LARCH_PCALA_HI20 .L1 0x0
# RELAX-NEXT: 0x10 R_LARCH_RELAX - 0x0
# RELAX-NEXT: 0x14 R_LARCH_PCALA_LO12 .L1 0x0
# RELAX-NEXT: 0x14 R_LARCH_RELAX - 0x0
# RELAX-NEXT: }
# RELAX-NEXT: Section ({{.*}}) .rela.data {
# RELAX-NEXT: 0x10 R_LARCH_ADD8 .L3 0x0
# RELAX-NEXT: 0x10 R_LARCH_SUB8 .L2 0x0
# RELAX-NEXT: 0x11 R_LARCH_ADD16 .L3 0x0
# RELAX-NEXT: 0x11 R_LARCH_SUB16 .L2 0x0
# RELAX-NEXT: 0x13 R_LARCH_ADD32 .L3 0x0
# RELAX-NEXT: 0x13 R_LARCH_SUB32 .L2 0x0
# RELAX-NEXT: 0x17 R_LARCH_ADD64 .L3 0x0
# RELAX-NEXT: 0x17 R_LARCH_SUB64 .L2 0x0
# RELAX-NEXT: 0x1F R_LARCH_ADD_ULEB128 .L3 0x0
# RELAX-NEXT: 0x1F R_LARCH_SUB_ULEB128 .L2 0x0
# RELAX-NEXT: 0x20 R_LARCH_ADD8 .L4 0x0
# RELAX-NEXT: 0x20 R_LARCH_SUB8 .L3 0x0
# RELAX-NEXT: 0x21 R_LARCH_ADD16 .L4 0x0
Expand All @@ -57,7 +68,7 @@

# RELAX: Hex dump of section '.data':
# RELAX-NEXT: 0x00000000 04040004 00000004 00000000 00000004
# RELAX-NEXT: 0x00000010 0c0c000c 0000000c 00000000 0000000c
# RELAX-NEXT: 0x00000010 00000000 00000000 00000000 00000000
# RELAX-NEXT: 0x00000020 00000000 00000000 00000000 00000000
# RELAX-NEXT: 0x00000030 00000000 00000000 00000000 000000

Expand All @@ -78,7 +89,7 @@
.word .L2 - .L1
.dword .L2 - .L1
.uleb128 .L2 - .L1
## TODO Handle alignment directive.
## With relaxation, emit relocs because the .align makes the diff variable.
.byte .L3 - .L2
.short .L3 - .L2
.word .L3 - .L2
Expand Down
79 changes: 79 additions & 0 deletions llvm/test/MC/LoongArch/Relocations/relax-align.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
## The file testing Nop insertion with R_LARCH_ALIGN for relaxation.

# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=-relax %s -o %t
# RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=INSTR
# RUN: llvm-readobj -r %t | FileCheck %s --check-prefix=RELOC
# RUN: llvm-mc --filetype=obj --triple=loongarch64 --mattr=+relax %s -o %t.r
# RUN: llvm-objdump -d %t.r | FileCheck %s --check-prefixes=INSTR,RELAX-INSTR
# RUN: llvm-readobj -r %t.r | FileCheck %s --check-prefixes=RELOC,RELAX-RELOC

.text
break 0
# INSTR: break 0

## Not emit R_LARCH_ALIGN if alignment directive is less than or equal to
## minimum code alignment(a.k.a 4).
.p2align 2
.p2align 1
.p2align 0

## Not emit instructions if max emit bytes less than min nop size.
.p2align 4, , 2

## Not emit R_LARCH_ALIGN if alignment directive with specific padding value.
## The behavior is the same as GNU assembler.
break 1
.p2align 4, 1
# INSTR-NEXT: break 1
# INSTR-COUNT-2: 01 01 01 01

break 2
.p2align 4, 1, 12
# INSTR-NEXT: break 2
# INSTR-COUNT-3: 01 01 01 01

break 3
.p2align 4
# INSTR-NEXT: break 3
# INSTR-COUNT-3: nop

break 4
.p2align 5
.p2align 4
# INSTR-NEXT: break 4
# INSTR-COUNT-3: nop
# RELAX-INSTR-COUNT-7: nop

break 5
.p2align 4, , 11
# INSTR-NEXT: break 5
# RELAX-INSTR-COUNT-3: nop

break 6
## Not emit the third parameter.
.p2align 4, , 12
# INSTR-NEXT: break 6
# INSTR-NEXT: nop
# INSTR-NEXT: nop
# RELAX-INSTR-NEXT: nop

ret
# INSNR-NEXT: ret

## Test the symbol index is different from .text.
.section .text2, "ax"
.p2align 4
break 7

# RELOC: Relocations [
# RELAX-RELOC-NEXT: Section ({{.*}}) .rela.text {
# RELAX-RELOC-NEXT: 0x24 R_LARCH_ALIGN .Lla-relax-align0 0x4
# RELAX-RELOC-NEXT: 0x34 R_LARCH_ALIGN .Lla-relax-align0 0x5
# RELAX-RELOC-NEXT: 0x50 R_LARCH_ALIGN .Lla-relax-align0 0x4
# RELAX-RELOC-NEXT: 0x60 R_LARCH_ALIGN .Lla-relax-align0 0xB04
# RELAX-RELOC-NEXT: 0x70 R_LARCH_ALIGN .Lla-relax-align0 0x4
# RELAX-RELOC-NEXT: }
# RELAX-RELOC-NEXT: Section ({{.*}}) .rela.text2 {
# RELAX-RELOC-NEXT: 0x0 R_LARCH_ALIGN .Lla-relax-align1 0x4
# RELAX-RELOC-NEXT: }
# RELOC-NEXT: ]
21 changes: 21 additions & 0 deletions llvm/test/MC/RISCV/relocations.s
Original file line number Diff line number Diff line change
Expand Up @@ -176,3 +176,24 @@ bgeu a0, a1, foo
# RELOC: R_RISCV_JAL
# INSTR: bgeu a0, a1, foo
# FIXUP: fixup A - offset: 0, value: foo, kind: fixup_riscv_branch

.L5:
auipc a0, %tlsdesc_hi(a_symbol)
# RELOC: R_RISCV_TLSDESC_HI20
# INST: auipc a0, 0x0
# FIXUP: fixup A - offset: 0, value: %tlsdesc_hi(a_symbol), kind: fixup_riscv_tlsdesc_hi20

lw a1, %tlsdesc_load_lo(.L5)(a0)
# RELOC: R_RISCV_TLSDESC_LOAD_LO12
# INST: lw a1, 0x0(a0)
# FIXUP: fixup A - offset: 0, value: %tlsdesc_load_lo(.L5), kind: fixup_riscv_tlsdesc_load_lo12

addi a0, a0, %tlsdesc_add_lo(.L5)
# RELOC: R_RISCV_TLSDESC_ADD_LO12
# INST: addi a0, a0, 0x0
# FIXUP: fixup A - offset: 0, value: %tlsdesc_add_lo(.L5), kind: fixup_riscv_tlsdesc_add_lo12

jalr t0, 0(a1), %tlsdesc_call(.L5)
# RELOC: R_RISCV_TLSDESC_CALL
# INST: jalr t0, 0x0(a1)
# FIXUP: fixup A - offset: 0, value: %tlsdesc_call(.L5), kind: fixup_riscv_tlsdesc_call
50 changes: 50 additions & 0 deletions llvm/test/MC/RISCV/tlsdesc.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# RUN: llvm-mc -filetype=obj -triple riscv32 < %s --defsym RV32=1 | llvm-objdump -dr -M no-aliases - | FileCheck %s --check-prefixes=INST,RV32
# RUN: llvm-mc -filetype=obj -triple riscv64 < %s | llvm-objdump -dr -M no-aliases - | FileCheck %s --check-prefixes=INST,RV64

# RUN: not llvm-mc -triple riscv32 < %s --defsym RV32=1 --defsym ERR=1 2>&1 | FileCheck %s --check-prefixes=ERR
# RUN: not llvm-mc -triple riscv64 < %s --defsym ERR=1 2>&1 | FileCheck %s --check-prefixes=ERR

start: # @start
# %bb.0: # %entry
.Ltlsdesc_hi0:
auipc a0, %tlsdesc_hi(a-4)
# INST: auipc a0, 0x0
# INST-NEXT: R_RISCV_TLSDESC_HI20 a-0x4
auipc a0, %tlsdesc_hi(unspecified)
# INST-NEXT: auipc a0, 0x0
# INST-NEXT: R_RISCV_TLSDESC_HI20 unspecified
.ifdef RV32
lw a1, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a0)
# RV32: lw a1, 0x0(a0)
# RV32-NEXT: R_RISCV_TLSDESC_LOAD_LO12 .Ltlsdesc_hi0
.else
ld a1, %tlsdesc_load_lo(.Ltlsdesc_hi0)(a0)
# RV64: ld a1, 0x0(a0)
# RV64-NEXT: R_RISCV_TLSDESC_LOAD_LO12 .Ltlsdesc_hi0
.endif
addi a0, a0, %tlsdesc_add_lo(.Ltlsdesc_hi0)
# INST: addi a0, a0, 0x0
# INST-NEXT: R_RISCV_TLSDESC_ADD_LO12 .Ltlsdesc_hi0
jalr t0, 0(a1), %tlsdesc_call(.Ltlsdesc_hi0)
# INST-NEXT: jalr t0, 0x0(a1)
# INST-NEXT: R_RISCV_TLSDESC_CALL .Ltlsdesc_hi0
add a0, a0, tp
# INST-NEXT: add a0, a0, tp
ret

## Check invalid usage
.ifdef ERR
auipc x1, %tlsdesc_call(foo) # ERR: :[[#@LINE]]:12: error: operand must be a symbol with a %pcrel_hi/%got_pcrel_hi/%tls_ie_pcrel_hi/%tls_gd_pcrel_hi modifier or an integer in the range
auipc x1, %tlsdesc_call(1234) # ERR: :[[#@LINE]]:12: error: operand must be a symbol with a %pcrel_hi/%got_pcrel_hi/%tls_ie_pcrel_hi/%tls_gd_pcrel_hi modifier or an integer in the range
auipc a0, %tlsdesc_hi(a+b) # ERR: :[[#@LINE]]:12: error: operand must be a symbol with a %pcrel_hi/%got_pcrel_hi/%tls_ie_pcrel_hi/%tls_gd_pcrel_hi modifier or an integer in the range

lw a0, t0, %tlsdesc_load_lo(a_symbol) # ERR: :[[#@LINE]]:15: error: invalid operand for instruction
lw a0, t0, %tlsdesc_load_lo(a_symbol)(a4) # ERR: :[[#@LINE]]:15: error: invalid operand for instruction

addi a0, t0, %tlsdesc_add_lo(a_symbol)(a4) # ERR: :[[#@LINE]]:41: error: invalid operand for instruction
addi a0, %tlsdesc_add_lo(a_symbol) # ERR: :[[#@LINE]]:11: error: invalid operand for instruction
addi x1, %tlsdesc_load_lo(a_symbol)(a0) # ERR: :[[#@LINE]]:11: error: invalid operand for instruction

jalr x5, 0(a1), %tlsdesc_hi(a_symbol) # ERR: :[[#@LINE]]:18: error: operand must be a symbol with %tlsdesc_call modifier
jalr x1, 0(a1), %tlsdesc_call(a_symbol) # ERR: :[[#@LINE]]:13: error: the output operand must be t0/x5 when using %tlsdesc_call modifier
.endif
Loading