Skip to content

Commit 6e8105c

Browse files
committed
For emitEpilogue, emitCSRSpillRestores' argument in FPSaved case to handle chain functions in same way as non-FPSaved case, an offset. Remove prior hardcode to s33 and do not restore FP for chain functions.
1 parent 76189d6 commit 6e8105c

File tree

3 files changed

+11
-6
lines changed

3 files changed

+11
-6
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1408,11 +1408,12 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
14081408
LiveUnits.addReg(FramePtrRegScratchCopy);
14091409
}
14101410

1411-
emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
1411+
emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits,
1412+
FuncInfo->isChainFunction() ? Register() : FramePtrReg,
14121413
FramePtrRegScratchCopy);
14131414
}
14141415

1415-
if (FPSaved) {
1416+
if (FPSaved && !FuncInfo->isChainFunction()) {
14161417
// Insert the copy to restore FP.
14171418
Register SrcReg = SGPRForFPSaveRestoreCopy ? SGPRForFPSaveRestoreCopy
14181419
: FramePtrRegScratchCopy;

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,6 @@ SIMachineFunctionInfo::SIMachineFunctionInfo(const Function &F,
9898
// set one up. For now, we can use s32 to match what amdgpu_gfx functions
9999
// would use if called, but this can be revisited.
100100
// FIXME: Only reserve this if we actually need it.
101-
FrameOffsetReg = AMDGPU::SGPR33;
102101
StackPtrOffsetReg = AMDGPU::SGPR32;
103102

104103
ScratchRSrcReg = AMDGPU::SGPR48_SGPR49_SGPR50_SGPR51;

llvm/test/CodeGen/AMDGPU/amdgpu-cs-chain-frame-pointer.ll

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -50,10 +50,15 @@ define amdgpu_cs_chain void @indirect(ptr %callee) {
5050
; CHECK-NEXT: s_mov_b64 s[4:5], 0
5151
; CHECK-NEXT: v_readlane_b32 s3, v41, 0
5252
; CHECK-NEXT: s_xor_saveexec_b64 s[6:7], -1
53-
; CHECK-NEXT: scratch_load_dword v40, off, s33 ; 4-byte Folded Reload
54-
; CHECK-NEXT: scratch_load_dword v41, off, s33 offset:4 ; 4-byte Folded Reload
53+
; CHECK-NEXT: scratch_load_dword v40, off, off ; 4-byte Folded Reload
54+
; CHECK-NEXT: scratch_load_dword v41, off, off offset:4 ; 4-byte Folded Reload
5555
; CHECK-NEXT: s_mov_b64 exec, s[6:7]
56-
; CHECK-NEXT: s_mov_b32 s33, s3
56+
; CHECK-NEXT: s_waitcnt vmcnt(0)
57+
; CHECK-NEXT: v_readlane_b32 s3, v41, 0
58+
; CHECK-NEXT: s_xor_saveexec_b64 s[8:9], -1
59+
; CHECK-NEXT: scratch_load_dword v40, off, off ; 4-byte Folded Reload
60+
; CHECK-NEXT: scratch_load_dword v41, off, off offset:4 ; 4-byte Folded Reload
61+
; CHECK-NEXT: s_mov_b64 exec, s[8:9]
5762
; CHECK-NEXT: s_mov_b64 exec, 0
5863
; CHECK-NEXT: s_setpc_b64 s[4:5]
5964
call void @indirect(ptr null)

0 commit comments

Comments
 (0)