Skip to content

Commit

Permalink
AMDGPU: Don't fix emergency stack slot at offset 0
Browse files Browse the repository at this point in the history
This forced the caller to be aware of this, which is an ugly ABI
feature.

Partially reverts r295877. The original reasons for doing this are
mostly fixed. Alloca is now in a non-0 address space, so it should be
OK to have 0 as a valid pointer. Since we treat the absolute address
as the pointer value, this part only really needed to apply to
kernels.

Since r357093, we avoid the need to increment/decrement the offset
register in more cases, and since r354816 the scavenger can fail
without spilling, so it's less critical that we try to avoid an offset
that fits in the MUBUF offset.

Restrict to callable functions for now to split this into 2 steps to
limit thte number of test updates and in case anything breaks.

llvm-svn: 362665
  • Loading branch information
arsenm committed Jun 5, 2019
1 parent c72fbe5 commit 34c8b83
Show file tree
Hide file tree
Showing 19 changed files with 496 additions and 442 deletions.
27 changes: 11 additions & 16 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,22 +773,17 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
!AllSGPRSpilledToVGPRs || !allStackObjectsAreDead(MFI)) {
assert(RS && "RegScavenger required if spilling");

// We force this to be at offset 0 so no user object ever has 0 as an
// address, so we may use 0 as an invalid pointer value. This is because
// LLVM assumes 0 is an invalid pointer in address space 0. Because alloca
// is required to be address space 0, we are forced to accept this for
// now. Ideally we could have the stack in another address space with 0 as a
// valid pointer, and -1 as the null value.
//
// This will also waste additional space when user stack objects require > 4
// byte alignment.
//
// The main cost here is losing the offset for addressing modes. However
// this also ensures we shouldn't need a register for the offset when
// emergency scavenging.
int ScavengeFI = MFI.CreateFixedObject(
TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
RS->addScavengingFrameIndex(ScavengeFI);
if (FuncInfo->isEntryFunction()) {
int ScavengeFI = MFI.CreateFixedObject(
TRI.getSpillSize(AMDGPU::SGPR_32RegClass), 0, false);
RS->addScavengingFrameIndex(ScavengeFI);
} else {
int ScavengeFI = MFI.CreateStackObject(
TRI.getSpillSize(AMDGPU::SGPR_32RegClass),
TRI.getSpillAlignment(AMDGPU::SGPR_32RegClass),
false);
RS->addScavengingFrameIndex(ScavengeFI);
}
}
}

Expand Down
10 changes: 0 additions & 10 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1940,12 +1940,6 @@ SDValue SITargetLowering::LowerFormalArguments(
bool IsKernel = AMDGPU::isKernel(CallConv);
bool IsEntryFunc = AMDGPU::isEntryFunctionCC(CallConv);

if (!IsEntryFunc) {
// 4 bytes are reserved at offset 0 for the emergency stack slot. Skip over
// this when allocating argument fixed offsets.
CCInfo.AllocateStack(4, 4);
}

if (IsShader) {
processShaderInputArgs(Splits, CallConv, Ins, Skipped, FType, Info);

Expand Down Expand Up @@ -2551,7 +2545,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
"unsupported call from graphics shader of function ");
}

// The first 4 bytes are reserved for the callee's emergency stack slot.
if (IsTailCall) {
IsTailCall = isEligibleForTailCallOptimization(
Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG);
Expand All @@ -2578,9 +2571,6 @@ SDValue SITargetLowering::LowerCall(CallLoweringInfo &CLI,
CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, IsVarArg);

// The first 4 bytes are reserved for the callee's emergency stack slot.
CCInfo.AllocateStack(4, 4);

CCInfo.AnalyzeCallOperands(Outs, AssignFn);

// Get a count of how many bytes are to be pushed on the stack.
Expand Down
132 changes: 66 additions & 66 deletions llvm/test/CodeGen/AMDGPU/byval-frame-setup.ll
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@
%struct.ByValStruct = type { [4 x i32] }

; GCN-LABEL: {{^}}void_func_byval_struct:
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32 offset:4{{$}}
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
; GCN-NOT: s32
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
; GCN-NOT: s32

; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:20{{$}}
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
; GCN-NOT: s32
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:20{{$}}
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
; GCN-NOT: s32
define hidden void @void_func_byval_struct(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 4 %arg1) #1 {
entry:
Expand All @@ -34,16 +34,16 @@ entry:
; GCN-DAG: buffer_store_dword v33
; GCN-NOT: v_writelane_b32 v{{[0-9]+}}, s32
; GCN-DAG: v_writelane_b32
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:4{{$}}
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}}
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD0:v[0-9]+]], vcc, 1, [[LOAD0]]
; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5 offset:4{{$}}
; GCN-DAG: buffer_store_dword [[ADD0]], off, s[0:3], s5{{$}}

; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:20{{$}}
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:16{{$}}
; GCN-DAG: v_add_{{[iu]}}32_e32 [[ADD1:v[0-9]+]], vcc, 2, [[LOAD1]]

; GCN: s_swappc_b64

; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:20{{$}}
; GCN: buffer_store_dword [[ADD1]], off, s[0:3], s5 offset:16{{$}}

; GCN: v_readlane_b32
; GCN-NOT: v_readlane_b32 s32
Expand Down Expand Up @@ -74,31 +74,31 @@ entry:
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13

; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5{{$}}
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:16

; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}}
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:4
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:8
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:12

; GCN-NOT: s_add_u32 s32, s32, 0x800


; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12

; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36
; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:16
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:20
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:24
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:28

; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28

; GCN: s_swappc_b64
; GCN-NOT: v_readlane_b32 s32
Expand Down Expand Up @@ -144,20 +144,20 @@ entry:
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20

; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:4{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:8
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:12
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12

; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36

; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:20
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:24
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:28
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:32
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28


; GCN: s_swappc_b64
Expand All @@ -182,14 +182,14 @@ entry:
}

; GCN-LABEL: {{^}}void_func_byval_struct_align8:
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32 offset:8{{$}}
; GCN: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s32{{$}}
; GCN-NOT: s32
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
; GCN-NOT: s32

; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:24{{$}}
; GCN: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s32 offset:16{{$}}
; GCN-NOT: s32
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:24{{$}}
; GCN: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:16{{$}}
; GCN-NOT: s32
define hidden void @void_func_byval_struct_align8(%struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg0, %struct.ByValStruct addrspace(5)* byval noalias nocapture align 8 %arg1) #1 {
entry:
Expand Down Expand Up @@ -222,20 +222,20 @@ entry:
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s33 offset:16
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s33 offset:20

; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12

; GCN-DAG: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s33 offset:24
; GCN-DAG: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s33 offset:28
; GCN-DAG: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s33 offset:32
; GCN-DAG: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s33 offset:36

; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28


; GCN: s_swappc_b64
Expand Down Expand Up @@ -267,30 +267,30 @@ entry:
; GCN-DAG: v_mov_b32_e32 [[NINE:v[0-9]+]], 9
; GCN-DAG: v_mov_b32_e32 [[THIRTEEN:v[0-9]+]], 13

; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5 offset:8
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:24
; GCN-DAG: buffer_store_dword [[NINE]], off, s[0:3], s5{{$}}
; GCN-DAG: buffer_store_dword [[THIRTEEN]], off, s[0:3], s5 offset:16

; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5 offset:8
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:12
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:16
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:20
; GCN-DAG: buffer_load_dword [[LOAD0:v[0-9]+]], off, s[0:3], s5{{$}}
; GCN-DAG: buffer_load_dword [[LOAD1:v[0-9]+]], off, s[0:3], s5 offset:4
; GCN-DAG: buffer_load_dword [[LOAD2:v[0-9]+]], off, s[0:3], s5 offset:8
; GCN-DAG: buffer_load_dword [[LOAD3:v[0-9]+]], off, s[0:3], s5 offset:12

; GCN-NOT: s_add_u32 s32, s32, 0x800

; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32 offset:8{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:12
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:20

; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:24
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:28
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:32
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:36

; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:24
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:28
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:32
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:36
; GCN-DAG: buffer_store_dword [[LOAD0]], off, s[0:3], s32{{$}}
; GCN-DAG: buffer_store_dword [[LOAD1]], off, s[0:3], s32 offset:4
; GCN-DAG: buffer_store_dword [[LOAD2]], off, s[0:3], s32 offset:8
; GCN-DAG: buffer_store_dword [[LOAD3]], off, s[0:3], s32 offset:12

; GCN: buffer_load_dword [[LOAD4:v[0-9]+]], off, s[0:3], s5 offset:16
; GCN: buffer_load_dword [[LOAD5:v[0-9]+]], off, s[0:3], s5 offset:20
; GCN: buffer_load_dword [[LOAD6:v[0-9]+]], off, s[0:3], s5 offset:24
; GCN: buffer_load_dword [[LOAD7:v[0-9]+]], off, s[0:3], s5 offset:28
; GCN: s_waitcnt vmcnt(0)
; GCN-DAG: buffer_store_dword [[LOAD4]], off, s[0:3], s32 offset:16
; GCN-DAG: buffer_store_dword [[LOAD5]], off, s[0:3], s32 offset:20
; GCN-DAG: buffer_store_dword [[LOAD6]], off, s[0:3], s32 offset:24
; GCN-DAG: buffer_store_dword [[LOAD7]], off, s[0:3], s32 offset:28

; GCN: s_swappc_b64
; GCN-NOT: v_readlane_b32 s32
Expand Down

0 comments on commit 34c8b83

Please sign in to comment.