Skip to content

Commit

Permalink
[AMDGPU] Move frame pointer from s34 to s33
Browse files Browse the repository at this point in the history
Remove the gap left between the stack pointer (s32) and frame pointer
(s34) now that the scratch wave offset is no longer a part of the
calling convention ABI.

Update llvm/docs/AMDGPUUsage.rst to reflect the change.

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D75657
  • Loading branch information
slinder1 committed Mar 19, 2020
1 parent 60b1967 commit 0e9368c
Show file tree
Hide file tree
Showing 20 changed files with 188 additions and 188 deletions.
4 changes: 2 additions & 2 deletions llvm/docs/AMDGPUUsage.rst
Expand Up @@ -6067,7 +6067,7 @@ Frame Pointer
+++++++++++++

If the kernel needs a frame pointer for the reasons defined in
``SIFrameLowering`` then SGPR34 is used and is always set to ``0`` in the
``SIFrameLowering`` then SGPR33 is used and is always set to ``0`` in the
kernel prolog. If a frame pointer is not required then all uses of the frame
pointer are replaced with immediate ``0`` offsets.

Expand Down Expand Up @@ -8897,7 +8897,7 @@ registers and some in memory.
The following is not part of the AMDGPU function calling convention but
describes how the AMDGPU implements function calls:

1. SGPR34 is used as a frame pointer (FP) if necessary. Like the SP it is an
1. SGPR33 is used as a frame pointer (FP) if necessary. Like the SP it is an
unswizzled scratch address. It is only needed if runtime sized ``alloca``
are used, or for the reasons defined in ``SIFrameLowering``.
2. Runtime stack alignment is not currently supported.
Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -1950,7 +1950,7 @@ static void reservePrivateMemoryRegs(const TargetMachine &TM,
// finalized, because it does not rely on the known stack size, only
// properties like whether variable sized objects are present.
if (ST.getFrameLowering()->hasFP(MF)) {
Info.setFrameOffsetReg(AMDGPU::SGPR34);
Info.setFrameOffsetReg(AMDGPU::SGPR33);
}
}

Expand Down
2 changes: 1 addition & 1 deletion llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp
Expand Up @@ -81,7 +81,7 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const MachineFunction &MF)
ScratchRSrcReg = AMDGPU::SGPR0_SGPR1_SGPR2_SGPR3;

// TODO: Pick a high register, and shift down, similar to a kernel.
FrameOffsetReg = AMDGPU::SGPR34;
FrameOffsetReg = AMDGPU::SGPR33;
StackPtrOffsetReg = AMDGPU::SGPR32;

ArgInfo.PrivateSegmentBuffer =
Expand Down
18 changes: 9 additions & 9 deletions llvm/test/CodeGen/AMDGPU/call-graph-register-usage.ll
Expand Up @@ -13,14 +13,14 @@ define void @use_vcc() #1 {
}

; GCN-LABEL: {{^}}indirect_use_vcc:
; GCN: v_writelane_b32 v32, s34, 2
; GCN: v_writelane_b32 v32, s33, 2
; GCN: v_writelane_b32 v32, s30, 0
; GCN: v_writelane_b32 v32, s31, 1
; GCN: s_swappc_b64
; GCN: v_readlane_b32 s4, v32, 0
; GCN: v_readlane_b32 s5, v32, 1
; GCN: v_readlane_b32 s34, v32, 2
; GCN: ; NumSgprs: 37
; GCN: v_readlane_b32 s33, v32, 2
; GCN: ; NumSgprs: 36
; GCN: ; NumVgprs: 33
define void @indirect_use_vcc() #1 {
call void @use_vcc()
Expand All @@ -29,8 +29,8 @@ define void @indirect_use_vcc() #1 {

; GCN-LABEL: {{^}}indirect_2level_use_vcc_kernel:
; GCN: is_dynamic_callstack = 0
; CI: ; NumSgprs: 39
; VI-NOBUG: ; NumSgprs: 41
; CI: ; NumSgprs: 38
; VI-NOBUG: ; NumSgprs: 40
; VI-BUG: ; NumSgprs: 96
; GCN: ; NumVgprs: 33
define amdgpu_kernel void @indirect_2level_use_vcc_kernel(i32 addrspace(1)* %out) #0 {
Expand All @@ -48,8 +48,8 @@ define void @use_flat_scratch() #1 {
}

; GCN-LABEL: {{^}}indirect_use_flat_scratch:
; CI: ; NumSgprs: 39
; VI: ; NumSgprs: 41
; CI: ; NumSgprs: 38
; VI: ; NumSgprs: 40
; GCN: ; NumVgprs: 33
define void @indirect_use_flat_scratch() #1 {
call void @use_flat_scratch()
Expand All @@ -58,8 +58,8 @@ define void @indirect_use_flat_scratch() #1 {

; GCN-LABEL: {{^}}indirect_2level_use_flat_scratch_kernel:
; GCN: is_dynamic_callstack = 0
; CI: ; NumSgprs: 39
; VI-NOBUG: ; NumSgprs: 41
; CI: ; NumSgprs: 38
; VI-NOBUG: ; NumSgprs: 40
; VI-BUG: ; NumSgprs: 96
; GCN: ; NumVgprs: 33
define amdgpu_kernel void @indirect_2level_use_flat_scratch_kernel(i32 addrspace(1)* %out) #0 {
Expand Down
22 changes: 11 additions & 11 deletions llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
Expand Up @@ -23,9 +23,9 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_

; GCN-LABEL: {{^}}test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GCN: buffer_store_dword
; GCN: v_writelane_b32 v32, s34, 4
; GCN: v_writelane_b32 v32, s36, 0
; GCN: v_writelane_b32 v32, s37, 1
; GCN: v_writelane_b32 v32, s33, 4
; GCN: v_writelane_b32 v32, s34, 0
; GCN: v_writelane_b32 v32, s35, 1
; GCN: v_writelane_b32 v32, s30, 2
; GCN: v_writelane_b32 v32, s31, 3

Expand All @@ -35,10 +35,10 @@ define amdgpu_kernel void @test_kernel_call_external_void_func_void_clobber_s30_
; GCN-NEXT: s_swappc_b64
; GCN-DAG: v_readlane_b32 s4, v32, 2
; GCN-DAG: v_readlane_b32 s5, v32, 3
; GCN: v_readlane_b32 s37, v32, 1
; GCN: v_readlane_b32 s36, v32, 0
; GCN: v_readlane_b32 s35, v32, 1
; GCN: v_readlane_b32 s34, v32, 0

; GCN: v_readlane_b32 s34, v32, 4
; GCN: v_readlane_b32 s33, v32, 4
; GCN: buffer_load_dword
; GCN: s_setpc_b64
define void @test_func_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void() #0 {
Expand All @@ -50,14 +50,14 @@ define void @test_func_call_external_void_func_void_clobber_s30_s31_call_externa

; GCN-LABEL: {{^}}test_func_call_external_void_funcx2:
; GCN: buffer_store_dword v32
; GCN: v_writelane_b32 v32, s34, 4
; GCN: v_writelane_b32 v32, s33, 4

; GCN: s_mov_b32 s34, s32
; GCN: s_mov_b32 s33, s32
; GCN: s_add_u32 s32, s32, 0x400
; GCN: s_swappc_b64
; GCN-NEXT: s_swappc_b64

; GCN: v_readlane_b32 s34, v32, 4
; GCN: v_readlane_b32 s33, v32, 4
; GCN: buffer_load_dword v32,
define void @test_func_call_external_void_funcx2() #0 {
call void @external_void_func_void()
Expand Down Expand Up @@ -125,6 +125,8 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace
ret void
}

; FIXME: What is the expected behavior for reserved registers here?

; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
; GCN: s_getpc_b64 s[4:5]
; GCN-NEXT: s_add_u32 s4, s4, external_void_func_void@rel32@lo+4
Expand All @@ -146,8 +148,6 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s33(i32 addrspace(
ret void
}

; FIXME: What is the expected behavior for reserved registers here?

; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s34: {{.*}}
; GCN-NOT: s34

Expand Down

0 comments on commit 0e9368c

Please sign in to comment.