42 changes: 21 additions & 21 deletions llvm/test/CodeGen/AMDGPU/stack-realign.ll
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,17 @@ define void @needs_align16_default_stack_align(i32 %idx) #0 {
}

; GCN-LABEL: {{^}}needs_align16_stack_align4:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}}
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0x3c0{{$}}
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffffc00

; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
; GCN: s_add_u32 s32, s32, 0x2800{{$}}
; GCN: s_addk_i32 s32, 0x2800{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen

; GCN: s_sub_u32 s32, s32, 0x2800
; GCN: s_addk_i32 s32, 0xd800

; GCN: ; ScratchSize: 160
define void @needs_align16_stack_align4(i32 %idx) #2 {
Expand All @@ -53,17 +53,17 @@ define void @needs_align16_stack_align4(i32 %idx) #2 {
}

; GCN-LABEL: {{^}}needs_align32:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}}
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0x7c0{{$}}
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xfffff800

; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: v_or_b32_e32 v{{[0-9]+}}, 12
; GCN: s_add_u32 s32, s32, 0x3000{{$}}
; GCN: s_addk_i32 s32, 0x3000{{$}}
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen

; GCN: s_sub_u32 s32, s32, 0x3000
; GCN: s_addk_i32 s32, 0xd000

; GCN: ; ScratchSize: 192
define void @needs_align32(i32 %idx) #0 {
Expand All @@ -74,12 +74,12 @@ define void @needs_align32(i32 %idx) #0 {
}

; GCN-LABEL: {{^}}force_realign4:
; GCN: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}}
; GCN: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xc0{{$}}
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffffff00
; GCN: s_add_u32 s32, s32, 0xd00{{$}}
; GCN: s_addk_i32 s32, 0xd00{{$}}

; GCN: buffer_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[0:3], 0 offen
; GCN: s_sub_u32 s32, s32, 0xd00
; GCN: s_addk_i32 s32, 0xf300

; GCN: ; ScratchSize: 52
define void @force_realign4(i32 %idx) #1 {
Expand Down Expand Up @@ -125,12 +125,12 @@ define amdgpu_kernel void @kernel_call_align4_from_5() {

; GCN-LABEL: {{^}}default_realign_align128:
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_add_u32 s33, s32, 0x1fc0
; GCN-NEXT: s_add_i32 s33, s32, 0x1fc0
; GCN-NEXT: s_and_b32 s33, s33, 0xffffe000
; GCN-NEXT: s_add_u32 s32, s32, 0x4000
; GCN-NEXT: s_addk_i32 s32, 0x4000
; GCN-NOT: s33
; GCN: buffer_store_dword v0, off, s[0:3], s33{{$}}
; GCN: s_sub_u32 s32, s32, 0x4000
; GCN: s_addk_i32 s32, 0xc000
; GCN: s_mov_b32 s33, [[FP_COPY]]
define void @default_realign_align128(i32 %idx) #0 {
%alloca.align = alloca i32, align 128, addrspace(5)
Expand Down Expand Up @@ -159,19 +159,19 @@ define void @func_call_align1024_bp_gets_vgpr_spill(<32 x i32> %a, i32 %b) #0 {
; GCN: buffer_store_dword [[VGPR_REG:v[0-9]+]], off, s[0:3], s32 offset:1028 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 [[VGPR_REG]], s33, 2
; GCN-DAG: s_add_u32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
; GCN-DAG: s_add_i32 [[SCRATCH_REG:s[0-9]+]], s32, 0xffc0
; GCN-DAG: v_writelane_b32 [[VGPR_REG]], s34, 3
; GCN: s_and_b32 s33, [[SCRATCH_REG]], 0xffff0000
; GCN: s_mov_b32 s34, s32
; GCN: v_mov_b32_e32 v32, 0
; GCN: buffer_store_dword v32, off, s[0:3], s33 offset:1024
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: buffer_load_dword v{{[0-9]+}}, off, s[0:3], s34
; GCN-DAG: s_add_u32 s32, s32, 0x30000
; GCN-DAG: s_add_i32 s32, s32, 0x30000
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s32
; GCN: s_swappc_b64 s[30:31], s[4:5]

; GCN: s_sub_u32 s32, s32, 0x30000
; GCN: s_add_i32 s32, s32, 0xfffd0000
; GCN-NEXT: v_readlane_b32 s33, [[VGPR_REG]], 2
; GCN-NEXT: v_readlane_b32 s34, [[VGPR_REG]], 3
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
Expand All @@ -193,17 +193,17 @@ define i32 @needs_align1024_stack_args_used_inside_loop(%struct.Data addrspace(5

; GCN-LABEL: needs_align1024_stack_args_used_inside_loop:
; GCN: s_mov_b32 [[FP_COPY:s[0-9]+]], s33
; GCN-NEXT: s_add_u32 s33, s32, 0xffc0
; GCN-NEXT: s_add_i32 s33, s32, 0xffc0
; GCN-NEXT: s_mov_b32 [[BP_COPY:s[0-9]+]], s34
; GCN-NEXT: s_mov_b32 s34, s32
; GCN-NEXT: s_and_b32 s33, s33, 0xffff0000
; GCN-NEXT: v_mov_b32_e32 v{{[0-9]+}}, 0
; GCN-NEXT: v_lshrrev_b32_e64 [[VGPR_REG:v[0-9]+]], 6, s34
; GCN: s_add_u32 s32, s32, 0x30000
; GCN: s_add_i32 s32, s32, 0x30000
; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s33 offset:1024
; GCN: buffer_load_dword v{{[0-9]+}}, [[VGPR_REG]], s[0:3], 0 offen
; GCN: v_add_u32_e32 [[VGPR_REG]], vcc, 4, [[VGPR_REG]]
; GCN: s_sub_u32 s32, s32, 0x30000
; GCN: s_add_i32 s32, s32, 0xfffd0000
; GCN-NEXT: s_mov_b32 s33, [[FP_COPY]]
; GCN-NEXT: s_mov_b32 s34, [[BP_COPY]]
; GCN-NEXT: s_setpc_b64 s[30:31]
Expand Down Expand Up @@ -290,16 +290,16 @@ define void @spill_bp_to_memory_scratch_reg_needed_mubuf_offset(<32 x i32> %a, i

; GCN-LABEL: spill_bp_to_memory_scratch_reg_needed_mubuf_offset
; GCN: s_or_saveexec_b64 s[4:5], -1
; GCN-NEXT: s_add_u32 s6, s32, 0x42100
; GCN-NEXT: s_add_i32 s6, s32, 0x42100
; GCN-NEXT: buffer_store_dword v39, off, s[0:3], s6 ; 4-byte Folded Spill
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_mov_b32_e32 v0, s33
; GCN-NOT: v_mov_b32_e32 v0, 0x1088
; GCN-NEXT: s_add_u32 s6, s32, 0x42200
; GCN-NEXT: s_add_i32 s6, s32, 0x42200
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
; GCN-NEXT: v_mov_b32_e32 v0, s34
; GCN-NOT: v_mov_b32_e32 v0, 0x108c
; GCN-NEXT: s_add_u32 s6, s32, 0x42300
; GCN-NEXT: s_add_i32 s6, s32, 0x42300
; GCN-NEXT: s_mov_b32 s34, s32
; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
%local_val = alloca i32, align 128, addrspace(5)
Expand Down
6 changes: 3 additions & 3 deletions llvm/test/CodeGen/AMDGPU/unstructured-cfg-def-use-issue.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ define hidden void @widget() {
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0x400
; GCN-NEXT: v_mov_b32_e32 v0, 0
; GCN-NEXT: v_mov_b32_e32 v1, 0
; GCN-NEXT: flat_load_dword v0, v[0:1]
Expand Down Expand Up @@ -53,7 +53,7 @@ define hidden void @widget() {
; GCN-NEXT: BB0_7: ; %UnifiedReturnBlock
; GCN-NEXT: v_readlane_b32 s4, v40, 0
; GCN-NEXT: v_readlane_b32 s5, v40, 1
; GCN-NEXT: s_sub_u32 s32, s32, 0x400
; GCN-NEXT: s_addk_i32 s32, 0xfc00
; GCN-NEXT: v_readlane_b32 s33, v40, 2
; GCN-NEXT: s_or_saveexec_b64 s[6:7], -1
; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
Expand Down Expand Up @@ -191,7 +191,7 @@ define hidden void @blam() {
; GCN-NEXT: s_mov_b64 exec, s[4:5]
; GCN-NEXT: v_writelane_b32 v43, s33, 4
; GCN-NEXT: s_mov_b32 s33, s32
; GCN-NEXT: s_add_u32 s32, s32, 0x800
; GCN-NEXT: s_addk_i32 s32, 0x800
; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s33 offset:8 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v41, off, s[0:3], s33 offset:4 ; 4-byte Folded Spill
; GCN-NEXT: buffer_store_dword v42, off, s[0:3], s33 ; 4-byte Folded Spill
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AMDGPU/wave32.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1129,8 +1129,8 @@ declare void @external_void_func_void() #1

; GCN-NEXT: v_writelane_b32 v40, s33, 2
; GCN: s_mov_b32 s33, s32
; GFX1064: s_add_u32 s32, s32, 0x400
; GFX1032: s_add_u32 s32, s32, 0x200
; GFX1064: s_addk_i32 s32, 0x400
; GFX1032: s_addk_i32 s32, 0x200


; GCN-DAG: v_writelane_b32 v40, s30, 0
Expand All @@ -1140,8 +1140,8 @@ declare void @external_void_func_void() #1
; GCN-DAG: v_readlane_b32 s5, v40, 1


; GFX1064: s_sub_u32 s32, s32, 0x400
; GFX1032: s_sub_u32 s32, s32, 0x200
; GFX1064: s_addk_i32 s32, 0xfc00
; GFX1032: s_addk_i32 s32, 0xfe00
; GCN: v_readlane_b32 s33, v40, 2
; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AMDGPU/wwm-reserved-spill.ll
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O0-NEXT: v_writelane_b32 v3, s33, 7
; GFX9-O0-NEXT: s_mov_b32 s33, s32
; GFX9-O0-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-O0-NEXT: s_addk_i32 s32, 0x400
; GFX9-O0-NEXT: v_writelane_b32 v3, s30, 0
; GFX9-O0-NEXT: v_writelane_b32 v3, s31, 1
; GFX9-O0-NEXT: v_writelane_b32 v3, s8, 2
Expand Down Expand Up @@ -395,7 +395,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O0-NEXT: v_mov_b32_e32 v0, v1
; GFX9-O0-NEXT: buffer_store_dword v0, off, s[4:7], s8 offset:4
; GFX9-O0-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-O0-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-O0-NEXT: v_readlane_b32 s33, v3, 7
; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-O0-NEXT: buffer_load_dword v3, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
Expand All @@ -414,7 +414,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O3-NEXT: s_mov_b32 s14, s33
; GFX9-O3-NEXT: s_mov_b32 s33, s32
; GFX9-O3-NEXT: s_add_u32 s32, s32, 0x400
; GFX9-O3-NEXT: s_addk_i32 s32, 0x400
; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31]
; GFX9-O3-NEXT: v_mov_b32_e32 v2, s8
; GFX9-O3-NEXT: s_not_b64 exec, exec
Expand All @@ -431,7 +431,7 @@ define amdgpu_gfx void @strict_wwm_call(<4 x i32> inreg %tmp14, i32 inreg %arg)
; GFX9-O3-NEXT: s_mov_b64 exec, s[8:9]
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v1
; GFX9-O3-NEXT: buffer_store_dword v0, off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_sub_u32 s32, s32, 0x400
; GFX9-O3-NEXT: s_addk_i32 s32, 0xfc00
; GFX9-O3-NEXT: s_mov_b32 s33, s14
; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-O3-NEXT: buffer_load_dword v2, off, s[0:3], s32 ; 4-byte Folded Reload
Expand Down Expand Up @@ -555,7 +555,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O0-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O0-NEXT: v_writelane_b32 v11, s33, 9
; GFX9-O0-NEXT: s_mov_b32 s33, s32
; GFX9-O0-NEXT: s_add_u32 s32, s32, 0xc00
; GFX9-O0-NEXT: s_addk_i32 s32, 0xc00
; GFX9-O0-NEXT: v_writelane_b32 v11, s30, 0
; GFX9-O0-NEXT: v_writelane_b32 v11, s31, 1
; GFX9-O0-NEXT: v_writelane_b32 v11, s9, 2
Expand Down Expand Up @@ -621,7 +621,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O0-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O0-NEXT: s_mov_b32 s8, 0
; GFX9-O0-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], s8 offset:4
; GFX9-O0-NEXT: s_sub_u32 s32, s32, 0xc00
; GFX9-O0-NEXT: s_addk_i32 s32, 0xf400
; GFX9-O0-NEXT: v_readlane_b32 s33, v11, 9
; GFX9-O0-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-O0-NEXT: buffer_load_dword v11, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
Expand Down Expand Up @@ -663,7 +663,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O3-NEXT: s_mov_b64 exec, s[10:11]
; GFX9-O3-NEXT: s_mov_b32 s14, s33
; GFX9-O3-NEXT: s_mov_b32 s33, s32
; GFX9-O3-NEXT: s_add_u32 s32, s32, 0x800
; GFX9-O3-NEXT: s_addk_i32 s32, 0x800
; GFX9-O3-NEXT: s_mov_b64 s[10:11], s[30:31]
; GFX9-O3-NEXT: v_mov_b32_e32 v6, s8
; GFX9-O3-NEXT: v_mov_b32_e32 v7, s9
Expand All @@ -688,7 +688,7 @@ define amdgpu_gfx void @strict_wwm_call_i64(<4 x i32> inreg %tmp14, i64 inreg %a
; GFX9-O3-NEXT: v_mov_b32_e32 v0, v2
; GFX9-O3-NEXT: v_mov_b32_e32 v1, v3
; GFX9-O3-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 offset:4
; GFX9-O3-NEXT: s_sub_u32 s32, s32, 0x800
; GFX9-O3-NEXT: s_addk_i32 s32, 0xf800
; GFX9-O3-NEXT: s_mov_b32 s33, s14
; GFX9-O3-NEXT: s_or_saveexec_b64 s[4:5], -1
; GFX9-O3-NEXT: buffer_load_dword v6, off, s[0:3], s32 ; 4-byte Folded Reload
Expand Down