Skip to content

Commit 8d1dbf2

Browse files
epilkslinder1RamNalamothu
committed
[AMDGPU] Implement -amdgpu-spill-cfi-saved-regs
These spills need special CFI anyway, so implementing them directly where CFI is emitted avoids the need to invent a mechanism to track them from ISel. Co-authored-by: Scott Linder <scott.linder@amd.com> Co-authored-by: Venkata Ramanaiah Nalamothu <VenkataRamanaiah.Nalamothu@amd.com>
1 parent 5b29c7f commit 8d1dbf2

File tree

8 files changed

+2623
-14
lines changed

8 files changed

+2623
-14
lines changed

llvm/lib/Target/AMDGPU/SIFrameLowering.cpp

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -663,12 +663,21 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
663663
}
664664

665665
// Note SGPRSpill stack IDs should only be used for SGPR spilling to VGPRs, not
666-
// memory. They should have been removed by now.
667-
static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
666+
// memory. They should have been removed by now, except CFI Saved Reg spills.
667+
static bool allStackObjectsAreDead(const MachineFunction &MF) {
668+
const MachineFrameInfo &MFI = MF.getFrameInfo();
669+
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
668670
for (int I = MFI.getObjectIndexBegin(), E = MFI.getObjectIndexEnd();
669671
I != E; ++I) {
670-
if (!MFI.isDeadObjectIndex(I))
672+
if (!MFI.isDeadObjectIndex(I)) {
673+
// determineCalleeSaves() might have added the SGPRSpill stack IDs for
674+
// CFI saves into scratch VGPR, ignore them
675+
if (MFI.getStackID(I) == TargetStackID::SGPRSpill &&
676+
FuncInfo->checkIndexInPrologEpilogSGPRSpills(I)) {
677+
continue;
678+
}
671679
return false;
680+
}
672681
}
673682

674683
return true;
@@ -688,8 +697,8 @@ Register SIFrameLowering::getEntryFunctionReservedScratchRsrcReg(
688697

689698
Register ScratchRsrcReg = MFI->getScratchRSrcReg();
690699

691-
if (!ScratchRsrcReg || (!MRI.isPhysRegUsed(ScratchRsrcReg) &&
692-
allStackObjectsAreDead(MF.getFrameInfo())))
700+
if (!ScratchRsrcReg ||
701+
(!MRI.isPhysRegUsed(ScratchRsrcReg) && allStackObjectsAreDead(MF)))
693702
return Register();
694703

695704
if (ST.hasSGPRInitBug() ||
@@ -916,7 +925,7 @@ void SIFrameLowering::emitEntryFunctionPrologue(MachineFunction &MF,
916925
bool NeedsFlatScratchInit =
917926
MFI->getUserSGPRInfo().hasFlatScratchInit() &&
918927
(MRI.isPhysRegUsed(AMDGPU::FLAT_SCR) || FrameInfo.hasCalls() ||
919-
(!allStackObjectsAreDead(FrameInfo) && ST.enableFlatScratch()));
928+
(!allStackObjectsAreDead(MF) && ST.enableFlatScratch()));
920929

921930
if ((NeedsFlatScratchInit || ScratchRsrcReg) &&
922931
PreloadedScratchWaveOffsetReg && !ST.flatScratchIsArchitected()) {
@@ -1309,6 +1318,11 @@ void SIFrameLowering::emitCSRSpillStores(MachineFunction &MF,
13091318
LiveUnits.addReg(Reg);
13101319
}
13111320
}
1321+
1322+
// Remove the spill entry created for EXEC. It is needed only for CFISaves in
1323+
// the prologue.
1324+
if (TRI.isCFISavedRegsSpillEnabled())
1325+
FuncInfo->removePrologEpilogSGPRSpillEntry(TRI.getExec());
13121326
}
13131327

13141328
void SIFrameLowering::emitCSRSpillRestores(
@@ -1790,14 +1804,14 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
17901804
// can. Any remaining SGPR spills will go to memory, so move them back to the
17911805
// default stack.
17921806
bool HaveSGPRToVMemSpill =
1793-
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ true);
1807+
FuncInfo->removeDeadFrameIndices(MF, /*ResetSGPRSpillStackIDs*/ true);
17941808
assert(allSGPRSpillsAreDead(MF) &&
17951809
"SGPR spill should have been removed in SILowerSGPRSpills");
17961810

17971811
// FIXME: The other checks should be redundant with allStackObjectsAreDead,
17981812
// but currently hasNonSpillStackObjects is set only from source
17991813
// allocas. Stack temps produced from legalization are not counted currently.
1800-
if (!allStackObjectsAreDead(MFI)) {
1814+
if (!allStackObjectsAreDead(MF)) {
18011815
assert(RS && "RegScavenger required if spilling");
18021816

18031817
// Add an emergency spill slot
@@ -1897,6 +1911,18 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
18971911
MFI->setSGPRForEXECCopy(AMDGPU::NoRegister);
18981912
}
18991913

1914+
if (TRI->isCFISavedRegsSpillEnabled()) {
1915+
Register Exec = TRI->getExec();
1916+
assert(!MFI->hasPrologEpilogSGPRSpillEntry(Exec) &&
1917+
"Re-reserving spill slot for EXEC");
1918+
// FIXME: Machine Copy Propagation currently optimizes away the EXEC copy to
1919+
// the scratch as we emit it only in the prolog. This optimization should
1920+
// not happen for frame related instructions. Until this is fixed ignore
1921+
// copy to scratch SGPR.
1922+
getVGPRSpillLaneOrTempRegister(MF, LiveUnits, Exec, RC,
1923+
/*IncludeScratchCopy=*/false);
1924+
}
1925+
19001926
// hasFP only knows about stack objects that already exist. We're now
19011927
// determining the stack slots that will be created, so we have to predict
19021928
// them. Stack objects force FP usage with calls.
@@ -1906,8 +1932,7 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
19061932
//
19071933
// FIXME: Is this really hasReservedCallFrame?
19081934
const bool WillHaveFP =
1909-
FrameInfo.hasCalls() &&
1910-
(SavedVGPRs.any() || !allStackObjectsAreDead(FrameInfo));
1935+
FrameInfo.hasCalls() && (SavedVGPRs.any() || !allStackObjectsAreDead(MF));
19111936

19121937
if (WillHaveFP || hasFP(MF)) {
19131938
Register FramePtrReg = MFI->getFrameOffsetReg();

llvm/lib/Target/AMDGPU/SIFrameLowering.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,13 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
114114
public:
115115
bool requiresStackPointerReference(const MachineFunction &MF) const;
116116

117+
/// If '-amdgpu-spill-cfi-saved-regs' is enabled, emit RA/EXEC spills to
118+
/// a free VGPR (lanes) or memory and corresponding CFI rules.
119+
void emitCFISavedRegSpills(MachineFunction &MF, MachineBasicBlock &MBB,
120+
MachineBasicBlock::iterator MBBI,
121+
LiveRegUnits &LiveRegs,
122+
bool emitSpillsToMem) const;
123+
117124
/// Create a CFI index for CFIInst and build a MachineInstr around it.
118125
MachineInstr *
119126
buildCFI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,

llvm/lib/Target/AMDGPU/SILowerSGPRSpills.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -531,7 +531,7 @@ bool SILowerSGPRSpills::run(MachineFunction &MF) {
531531
// free frame index ids by the later pass(es) like "stack slot coloring"
532532
// which in turn could mess-up with the book keeping of "frame index to VGPR
533533
// lane".
534-
FuncInfo->removeDeadFrameIndices(MFI, /*ResetSGPRSpillStackIDs*/ false);
534+
FuncInfo->removeDeadFrameIndices(MF, /*ResetSGPRSpillStackIDs*/ false);
535535

536536
MadeChange = true;
537537
}

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -566,7 +566,8 @@ bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
566566
}
567567

568568
bool SIMachineFunctionInfo::removeDeadFrameIndices(
569-
MachineFrameInfo &MFI, bool ResetSGPRSpillStackIDs) {
569+
MachineFunction &MF, bool ResetSGPRSpillStackIDs) {
570+
MachineFrameInfo &MFI = MF.getFrameInfo();
570571
// Remove dead frame indices from function frame, however keep FP & BP since
571572
// spills for them haven't been inserted yet. And also make sure to remove the
572573
// frame indices from `SGPRSpillsToVirtualVGPRLanes` data structure,

llvm/lib/Target/AMDGPU/SIMachineFunctionInfo.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -757,6 +757,16 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
757757
}) != PrologEpilogSGPRSpills.end();
758758
}
759759

760+
// Remove if an entry created for \p Reg.
761+
void removePrologEpilogSGPRSpillEntry(Register Reg) {
762+
auto I = find_if(PrologEpilogSGPRSpills,
763+
[&Reg](const auto &Spill) { return Spill.first == Reg; });
764+
if (I == PrologEpilogSGPRSpills.end())
765+
return;
766+
767+
PrologEpilogSGPRSpills.erase(I);
768+
}
769+
760770
const PrologEpilogSGPRSaveRestoreInfo &
761771
getPrologEpilogSGPRSaveRestoreInfo(Register Reg) const {
762772
const auto *I = find_if(PrologEpilogSGPRSpills, [&Reg](const auto &Spill) {
@@ -835,8 +845,7 @@ class SIMachineFunctionInfo final : public AMDGPUMachineFunction,
835845

836846
/// If \p ResetSGPRSpillStackIDs is true, reset the stack ID from sgpr-spill
837847
/// to the default stack.
838-
bool removeDeadFrameIndices(MachineFrameInfo &MFI,
839-
bool ResetSGPRSpillStackIDs);
848+
bool removeDeadFrameIndices(MachineFunction &MF, bool ResetSGPRSpillStackIDs);
840849

841850
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI);
842851
std::optional<int> getOptionalScavengeFI() const { return ScavengeFI; }

llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ static cl::opt<bool> EnableSpillSGPRToVGPR(
3535
cl::ReallyHidden,
3636
cl::init(true));
3737

38+
static cl::opt<bool> EnableSpillCFISavedRegs(
39+
"amdgpu-spill-cfi-saved-regs",
40+
cl::desc("Enable spilling the registers required for CFI emission"),
41+
cl::ReallyHidden, cl::init(false), cl::ZeroOrMore);
42+
3843
std::array<std::vector<int16_t>, 32> SIRegisterInfo::RegSplitParts;
3944
std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
4045

@@ -561,6 +566,10 @@ unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
561566
return SubRegFromChannelTable[NumRegIndex - 1][Channel];
562567
}
563568

569+
bool SIRegisterInfo::isCFISavedRegsSpillEnabled() const {
570+
return EnableSpillCFISavedRegs;
571+
}
572+
564573
MCRegister
565574
SIRegisterInfo::getAlignedHighSGPRForRC(const MachineFunction &MF,
566575
const unsigned Align,

llvm/lib/Target/AMDGPU/SIRegisterInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
8080
return SpillSGPRToVGPR;
8181
}
8282

83+
bool isCFISavedRegsSpillEnabled() const;
84+
8385
/// Return the largest available SGPR aligned to \p Align for the register
8486
/// class \p RC.
8587
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF,

0 commit comments

Comments
 (0)