Skip to content

Commit

Permalink
Revert "[AMDGPU][SILowerSGPRSpills] Spill SGPRs to virtual VGPRs"
Browse files Browse the repository at this point in the history
This reverts commit 40ba094.
  • Loading branch information
cdevadas committed Dec 21, 2022
1 parent e73b356 commit a302823
Show file tree
Hide file tree
Showing 82 changed files with 3,235 additions and 5,175 deletions.
3 changes: 0 additions & 3 deletions llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
Expand Up @@ -1450,9 +1450,6 @@ bool GCNTargetMachine::parseMachineFunctionInfo(
if (parseOptionalRegister(YamlMFI.VGPRForAGPRCopy, MFI->VGPRForAGPRCopy))
return true;

if (parseOptionalRegister(YamlMFI.SGPRForEXECCopy, MFI->SGPRForEXECCopy))
return true;

auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
// Create a diagnostic for a the register string literal.
const MemoryBuffer &Buffer =
Expand Down
7 changes: 0 additions & 7 deletions llvm/lib/Target/AMDGPU/SIDefines.h
Expand Up @@ -909,13 +909,6 @@ enum Offset_COV5 : unsigned {
};

} // namespace ImplicitArg

namespace VirtRegFlag {
// Virtual Register Flags.
enum Register_Flag : uint8_t { WWM_REG = 0 };

} // namespace VirtRegFlag

} // namespace AMDGPU

#define R_00B028_SPI_SHADER_PGM_RSRC1_PS 0x00B028
Expand Down
45 changes: 7 additions & 38 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Expand Up @@ -66,8 +66,7 @@ static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,

static void getVGPRSpillLaneOrTempRegister(
MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
bool IncludeScratchCopy = true) {
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
MachineFrameInfo &FrameInfo = MF.getFrameInfo();

Expand All @@ -78,12 +77,9 @@ static void getVGPRSpillLaneOrTempRegister(

// We need to save and restore the given SGPR.

Register ScratchSGPR;
// 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
// should have all the callee saved registers marked as used. For certain
// cases we skip copy to scratch SGPR.
if (IncludeScratchCopy)
ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
// should have all the callee saved registers marked as used.
Register ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);

if (!ScratchSGPR) {
int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
Expand Down Expand Up @@ -1354,8 +1350,8 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
TII->getNamedOperand(MI, AMDGPU::OpName::vdata)->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI,
TRI->isAGPR(MRI, VReg))) {
RS->enterBasicBlockEnd(MBB);
RS->backward(MI);
// FIXME: change to enterBasicBlockEnd()
RS->enterBasicBlock(MBB);
TRI->eliminateFrameIndex(MI, 0, FIOp, RS);
SpillFIs.set(FI);
continue;
Expand Down Expand Up @@ -1452,10 +1448,8 @@ void SIFrameLowering::processFunctionBeforeFrameIndicesReplaced(
// The special SGPR spills like the one needed for FP, BP or any reserved
// registers delayed until frame lowering.
void SIFrameLowering::determinePrologEpilogSGPRSaves(
MachineFunction &MF, BitVector &SavedVGPRs,
bool NeedExecCopyReservedReg) const {
MachineFunction &MF, BitVector &SavedVGPRs) const {
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
Expand All @@ -1467,27 +1461,6 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
for (unsigned I = 0; CSRegs[I]; ++I)
LiveRegs.addReg(CSRegs[I]);

if (NeedExecCopyReservedReg) {
Register ReservedReg = MFI->getSGPRForEXECCopy();
assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
const TargetRegisterClass &RC = ST.isWave32()
? AMDGPU::SReg_32_XM0_XEXECRegClass
: AMDGPU::SGPR_64RegClass;
Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC);
if (UnusedScratchReg) {
// If found any unused scratch SGPR, reserve the register itself for Exec
// copy and there is no need for any spills in that case.
MFI->setSGPRForEXECCopy(UnusedScratchReg);
LiveRegs.addReg(UnusedScratchReg);
} else {
// Needs spill.
assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
"Re-reserving spill slot for EXEC copy register");
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC,
/* IncludeScratchCopy */ false);
}
}

// hasFP only knows about stack objects that already exist. We're now
// determining the stack slots that will be created, so we have to predict
// them. Stack objects force FP usage with calls.
Expand Down Expand Up @@ -1526,8 +1499,6 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,

const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
const SIInstrInfo *TII = ST.getInstrInfo();
bool NeedExecCopyReservedReg = false;

for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
Expand All @@ -1545,8 +1516,6 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
MFI->allocateWWMSpill(MF, MI.getOperand(0).getReg());
else if (MI.getOpcode() == AMDGPU::V_READLANE_B32)
MFI->allocateWWMSpill(MF, MI.getOperand(1).getReg());
else if (TII->isWWMRegSpillOpcode(MI.getOpcode()))
NeedExecCopyReservedReg = true;
}
}

Expand All @@ -1559,7 +1528,7 @@ void SIFrameLowering::determineCalleeSaves(MachineFunction &MF,
if (!ST.hasGFX90AInsts())
SavedVGPRs.clearBitsInMask(TRI->getAllAGPRRegMask());

determinePrologEpilogSGPRSaves(MF, SavedVGPRs, NeedExecCopyReservedReg);
determinePrologEpilogSGPRSaves(MF, SavedVGPRs);

// The Whole-Wave VGPRs need to be specially inserted in the prolog, so don't
// allow the default insertion to handle them.
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.h
Expand Up @@ -34,8 +34,8 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
RegScavenger *RS = nullptr) const override;
void determineCalleeSavesSGPR(MachineFunction &MF, BitVector &SavedRegs,
RegScavenger *RS = nullptr) const;
void determinePrologEpilogSGPRSaves(MachineFunction &MF, BitVector &SavedRegs,
bool NeedExecCopyReservedReg) const;
void determinePrologEpilogSGPRSaves(MachineFunction &MF,
BitVector &SavedRegs) const;
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
LivePhysRegs &LiveRegs, Register FrameReg,
Expand Down
8 changes: 0 additions & 8 deletions llvm/lib/Target/AMDGPU/SIISelLowering.cpp
Expand Up @@ -12589,14 +12589,6 @@ void SITargetLowering::finalizeLowering(MachineFunction &MF) const {
}
}

// Reserve the SGPR(s) to save/restore EXEC for WWM spill/copy handling.
unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
Register SReg =
ST.isWave32()
? AMDGPU::SGPR_32RegClass.getRegister(MaxNumSGPRs - 1)
: AMDGPU::SGPR_64RegClass.getRegister((MaxNumSGPRs / 2) - 1);
Info->setSGPRForEXECCopy(SReg);

TargetLoweringBase::finalizeLowering(MF);
}

Expand Down
58 changes: 10 additions & 48 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Expand Up @@ -1567,28 +1567,6 @@ static unsigned getAVSpillSaveOpcode(unsigned Size) {
}
}

static unsigned getWWMRegSpillSaveOpcode(unsigned Size) {
// Currently, there is only 32-bit WWM register spills needed.
if (Size != 4)
llvm_unreachable("unknown wwm register spill size");

return AMDGPU::SI_SPILL_WWM_V32_SAVE;
}

static unsigned getVectorRegSpillSaveOpcode(Register Reg,
const TargetRegisterClass *RC,
unsigned Size,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &MFI) {
// Choose the right opcode if spilling a WWM register.
if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
return getWWMRegSpillSaveOpcode(Size);

return TRI.isVectorSuperClass(RC) ? getAVSpillSaveOpcode(Size)
: TRI.isAGPRClass(RC) ? getAGPRSpillSaveOpcode(Size)
: getVGPRSpillSaveOpcode(Size);
}

void SIInstrInfo::storeRegToStackSlot(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg,
bool isKill, int FrameIndex, const TargetRegisterClass *RC,
Expand Down Expand Up @@ -1633,8 +1611,11 @@ void SIInstrInfo::storeRegToStackSlot(
return;
}

unsigned Opcode = getVectorRegSpillSaveOpcode(VReg ? VReg : SrcReg, RC,
SpillSize, RI, *MFI);
unsigned Opcode = RI.isVectorSuperClass(RC)
? getAVSpillSaveOpcode(SpillSize)
: RI.isAGPRClass(RC)
? getAGPRSpillSaveOpcode(SpillSize)
: getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();

BuildMI(MBB, MI, DL, get(Opcode))
Expand Down Expand Up @@ -1785,27 +1766,6 @@ static unsigned getAVSpillRestoreOpcode(unsigned Size) {
}
}

static unsigned getWWMRegSpillRestoreOpcode(unsigned Size) {
// Currently, there is only 32-bit WWM register spills needed.
if (Size != 4)
llvm_unreachable("unknown wwm register spill size");

return AMDGPU::SI_SPILL_WWM_V32_RESTORE;
}

static unsigned
getVectorRegSpillRestoreOpcode(Register Reg, const TargetRegisterClass *RC,
unsigned Size, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &MFI) {
// Choose the right opcode if restoring a WWM register.
if (MFI.checkFlag(Reg, AMDGPU::VirtRegFlag::WWM_REG))
return getWWMRegSpillRestoreOpcode(Size);

return TRI.isVectorSuperClass(RC) ? getAVSpillRestoreOpcode(Size)
: TRI.isAGPRClass(RC) ? getAGPRSpillRestoreOpcode(Size)
: getVGPRSpillRestoreOpcode(Size);
}

void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI,
Register DestReg, int FrameIndex,
Expand Down Expand Up @@ -1849,9 +1809,11 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}

unsigned Opcode = getVectorRegSpillRestoreOpcode(VReg ? VReg : DestReg, RC,
SpillSize, RI, *MFI);

unsigned Opcode = RI.isVectorSuperClass(RC)
? getAVSpillRestoreOpcode(SpillSize)
: RI.isAGPRClass(RC)
? getAGPRSpillRestoreOpcode(SpillSize)
: getVGPRSpillRestoreOpcode(SpillSize);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
Expand Down
5 changes: 0 additions & 5 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Expand Up @@ -627,11 +627,6 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return get(Opcode).TSFlags & SIInstrFlags::SGPRSpill;
}

static bool isWWMRegSpillOpcode(uint16_t Opcode) {
return Opcode == AMDGPU::SI_SPILL_WWM_V32_SAVE ||
Opcode == AMDGPU::SI_SPILL_WWM_V32_RESTORE;
}

static bool isDPP(const MachineInstr &MI) {
return MI.getDesc().TSFlags & SIInstrFlags::DPP;
}
Expand Down
2 changes: 0 additions & 2 deletions llvm/lib/Target/AMDGPU/SIInstructions.td
Expand Up @@ -890,8 +890,6 @@ defm SI_SPILL_AV384 : SI_SPILL_VGPR <AV_384, 1>;
defm SI_SPILL_AV512 : SI_SPILL_VGPR <AV_512, 1>;
defm SI_SPILL_AV1024 : SI_SPILL_VGPR <AV_1024, 1>;

defm SI_SPILL_WWM_V32 : SI_SPILL_VGPR <VGPR_32>;

def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),
(ins si_ga:$ptr_lo, si_ga:$ptr_hi),
Expand Down

0 comments on commit a302823

Please sign in to comment.