Skip to content
This repository has been archived by the owner. It is now read-only.
Permalink
Browse files
[AMDGPU] gfx908 agpr spilling
Differential Revision: https://reviews.llvm.org/D64594

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@365833 91177308-0d34-0410-b5e6-96231b3b80d8
  • Loading branch information
rampitec committed Jul 11, 2019
1 parent 52d4c04 commit cb57db03360f8247a475e77dc895f7adb573c0b1
@@ -913,7 +913,6 @@ static bool allStackObjectsAreDead(const MachineFrameInfo &MFI) {
return true;
}


#ifndef NDEBUG
static bool allSGPRSpillsAreDead(const MachineFrameInfo &MFI,
Optional<int> FramePointerSaveIndex) {
@@ -947,6 +946,7 @@ void SIFrameLowering::processFunctionBeforeFrameFinalized(
const SIRegisterInfo *TRI = ST.getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

FuncInfo->removeDeadFrameIndices(MFI);
assert(allSGPRSpillsAreDead(MFI, None) &&
"SGPR spill should have been removed in SILowerSGPRSpills");

@@ -976,6 +976,8 @@ static unsigned getSGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S256_SAVE;
case 64:
return AMDGPU::SI_SPILL_S512_SAVE;
case 128:
return AMDGPU::SI_SPILL_S1024_SAVE;
default:
llvm_unreachable("unknown register size");
}
@@ -997,6 +999,25 @@ static unsigned getVGPRSpillSaveOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_V256_SAVE;
case 64:
return AMDGPU::SI_SPILL_V512_SAVE;
case 128:
return AMDGPU::SI_SPILL_V1024_SAVE;
default:
llvm_unreachable("unknown register size");
}
}

static unsigned getAGPRSpillSaveOpcode(unsigned Size) {
switch (Size) {
case 4:
return AMDGPU::SI_SPILL_A32_SAVE;
case 8:
return AMDGPU::SI_SPILL_A64_SAVE;
case 16:
return AMDGPU::SI_SPILL_A128_SAVE;
case 64:
return AMDGPU::SI_SPILL_A512_SAVE;
case 128:
return AMDGPU::SI_SPILL_A1024_SAVE;
default:
llvm_unreachable("unknown register size");
}
@@ -1055,17 +1076,22 @@ void SIInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
return;
}

assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");

unsigned Opcode = getVGPRSpillSaveOpcode(SpillSize);
unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillSaveOpcode(SpillSize)
: getVGPRSpillSaveOpcode(SpillSize);
MFI->setHasSpilledVGPRs();
BuildMI(MBB, MI, DL, get(Opcode))
.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
.addImm(0) // offset
.addMemOperand(MMO);

auto MIB = BuildMI(MBB, MI, DL, get(Opcode));
if (RI.hasAGPRs(RC)) {
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MIB.addReg(Tmp, RegState::Define);
}
MIB.addReg(SrcReg, getKillRegState(isKill)) // data
.addFrameIndex(FrameIndex) // addr
.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
.addImm(0) // offset
.addMemOperand(MMO);
}

static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
@@ -1084,6 +1110,8 @@ static unsigned getSGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_S256_RESTORE;
case 64:
return AMDGPU::SI_SPILL_S512_RESTORE;
case 128:
return AMDGPU::SI_SPILL_S1024_RESTORE;
default:
llvm_unreachable("unknown register size");
}
@@ -1105,6 +1133,25 @@ static unsigned getVGPRSpillRestoreOpcode(unsigned Size) {
return AMDGPU::SI_SPILL_V256_RESTORE;
case 64:
return AMDGPU::SI_SPILL_V512_RESTORE;
case 128:
return AMDGPU::SI_SPILL_V1024_RESTORE;
default:
llvm_unreachable("unknown register size");
}
}

static unsigned getAGPRSpillRestoreOpcode(unsigned Size) {
switch (Size) {
case 4:
return AMDGPU::SI_SPILL_A32_RESTORE;
case 8:
return AMDGPU::SI_SPILL_A64_RESTORE;
case 16:
return AMDGPU::SI_SPILL_A128_RESTORE;
case 64:
return AMDGPU::SI_SPILL_A512_RESTORE;
case 128:
return AMDGPU::SI_SPILL_A1024_RESTORE;
default:
llvm_unreachable("unknown register size");
}
@@ -1156,15 +1203,19 @@ void SIInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
return;
}

assert(RI.hasVGPRs(RC) && "Only VGPR spilling expected");

unsigned Opcode = getVGPRSpillRestoreOpcode(SpillSize);
BuildMI(MBB, MI, DL, get(Opcode), DestReg)
.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
.addImm(0) // offset
.addMemOperand(MMO);
unsigned Opcode = RI.hasAGPRs(RC) ? getAGPRSpillRestoreOpcode(SpillSize)
: getVGPRSpillRestoreOpcode(SpillSize);
auto MIB = BuildMI(MBB, MI, DL, get(Opcode), DestReg);
if (RI.hasAGPRs(RC)) {
MachineRegisterInfo &MRI = MF->getRegInfo();
unsigned Tmp = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MIB.addReg(Tmp, RegState::Define);
}
MIB.addFrameIndex(FrameIndex) // vaddr
.addReg(MFI->getScratchRSrcReg()) // scratch_rsrc
.addReg(MFI->getStackPtrOffsetReg()) // scratch_offset
.addImm(0) // offset
.addMemOperand(MMO);
}

/// \param @Offset Offset in bytes of the FrameIndex being spilled
@@ -513,6 +513,7 @@ defm SI_SPILL_S128 : SI_SPILL_SGPR <SReg_128>;
defm SI_SPILL_S160 : SI_SPILL_SGPR <SReg_160>;
defm SI_SPILL_S256 : SI_SPILL_SGPR <SReg_256>;
defm SI_SPILL_S512 : SI_SPILL_SGPR <SReg_512>;
defm SI_SPILL_S1024 : SI_SPILL_SGPR <SReg_1024>;

multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let UseNamedOperandTable = 1, VGPRSpill = 1,
@@ -524,7 +525,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let mayStore = 1;
let mayLoad = 0;
// (2 * 4) + (8 * num_subregs) bytes maximum
let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
// Size field is unsigned char and cannot fit more.
let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}

def _RESTORE : VPseudoInstSI <
@@ -535,7 +538,9 @@ multiclass SI_SPILL_VGPR <RegisterClass vgpr_class> {
let mayLoad = 1;

// (2 * 4) + (8 * num_subregs) bytes maximum
let Size = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 3), 8);
// Size field is unsigned char and cannot fit more.
let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}
} // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
}
@@ -547,6 +552,44 @@ defm SI_SPILL_V128 : SI_SPILL_VGPR <VReg_128>;
defm SI_SPILL_V160 : SI_SPILL_VGPR <VReg_160>;
defm SI_SPILL_V256 : SI_SPILL_VGPR <VReg_256>;
defm SI_SPILL_V512 : SI_SPILL_VGPR <VReg_512>;
defm SI_SPILL_V1024 : SI_SPILL_VGPR <VReg_1024>;

multiclass SI_SPILL_AGPR <RegisterClass vgpr_class> {
let UseNamedOperandTable = 1, VGPRSpill = 1,
Constraints = "@earlyclobber $tmp",
SchedRW = [WriteVMEM] in {
def _SAVE : VPseudoInstSI <
(outs VGPR_32:$tmp),
(ins vgpr_class:$vdata, i32imm:$vaddr, SReg_128:$srsrc,
SReg_32:$soffset, i32imm:$offset)> {
let mayStore = 1;
let mayLoad = 0;
// (2 * 4) + (16 * num_subregs) bytes maximum
int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
// Size field is unsigned char and cannot fit more.
let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}

def _RESTORE : VPseudoInstSI <
(outs vgpr_class:$vdata, VGPR_32:$tmp),
(ins i32imm:$vaddr, SReg_128:$srsrc, SReg_32:$soffset,
i32imm:$offset)> {
let mayStore = 0;
let mayLoad = 1;

// (2 * 4) + (16 * num_subregs) bytes maximum
int MaxSize = !add(!shl(!srl(vgpr_class.Size, 5), 4), 8);
// Size field is unsigned char and cannot fit more.
let Size = !if(!le(MaxSize, 256), MaxSize, 252);
}
} // End UseNamedOperandTable = 1, VGPRSpill = 1, SchedRW = [WriteVMEM]
}

defm SI_SPILL_A32 : SI_SPILL_AGPR <AGPR_32>;
defm SI_SPILL_A64 : SI_SPILL_AGPR <AReg_64>;
defm SI_SPILL_A128 : SI_SPILL_AGPR <AReg_128>;
defm SI_SPILL_A512 : SI_SPILL_AGPR <AReg_512>;
defm SI_SPILL_A1024 : SI_SPILL_AGPR <AReg_1024>;

def SI_PC_ADD_REL_OFFSET : SPseudoInstSI <
(outs SReg_64:$dst),
@@ -37,6 +37,12 @@ using MBBVector = SmallVector<MachineBasicBlock *, 4>;

namespace {

static cl::opt<bool> EnableSpillVGPRToAGPR(
"amdgpu-spill-vgpr-to-agpr",
cl::desc("Enable spilling VGPRs to AGPRs"),
cl::ReallyHidden,
cl::init(true));

class SILowerSGPRSpills : public MachineFunctionPass {
private:
const SIRegisterInfo *TRI = nullptr;
@@ -242,10 +248,22 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
return false;
}

MachineRegisterInfo &MRI = MF.getRegInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
bool AllSGPRSpilledToVGPRs = false;
const bool SpillVGPRToAGPR = ST.hasMAIInsts() && FuncInfo->hasSpilledVGPRs()
&& EnableSpillVGPRToAGPR;

bool MadeChange = false;

if (TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) {
const bool SpillToAGPR = EnableSpillVGPRToAGPR && ST.hasMAIInsts();

// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
// handled as SpilledToReg in regular PrologEpilogInserter.
if ((TRI->spillSGPRToVGPR() && (HasCSRs || FuncInfo->hasSpilledSGPRs())) ||
SpillVGPRToAGPR) {
AllSGPRSpilledToVGPRs = true;

// Process all SGPR spills before frame offsets are finalized. Ideally SGPRs
// are spilled to VGPRs, in which case we can eliminate the stack usage.
//
@@ -257,6 +275,18 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
MachineInstr &MI = *I;
Next = std::next(I);

if (SpillToAGPR && TII->isVGPRSpill(MI)) {
// Try to eliminate stack used by VGPR spills before frame
// finalization.
unsigned FIOp = AMDGPU::getNamedOperandIdx(MI.getOpcode(),
AMDGPU::OpName::vaddr);
int FI = MI.getOperand(FIOp).getIndex();
unsigned VReg = TII->getNamedOperand(MI, AMDGPU::OpName::vdata)
->getReg();
if (FuncInfo->allocateVGPRSpillToAGPR(MF, FI, TRI->isAGPR(MRI, VReg)))
TRI->eliminateFrameIndex(MI, 0, FIOp, nullptr);
}

if (!TII->isSGPRSpill(MI))
continue;

@@ -266,18 +296,24 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex(MI, FI, nullptr);
(void)Spilled;
assert(Spilled && "failed to spill SGPR to VGPR when allocated");
}

} else
AllSGPRSpilledToVGPRs = false;
}
}

for (MachineBasicBlock &MBB : MF) {
for (auto SSpill : FuncInfo->getSGPRSpillVGPRs())
MBB.addLiveIn(SSpill.VGPR);

for (MCPhysReg Reg : FuncInfo->getVGPRSpillAGPRs())
MBB.addLiveIn(Reg);

for (MCPhysReg Reg : FuncInfo->getAGPRSpillVGPRs())
MBB.addLiveIn(Reg);

MBB.sortUniqueLiveIns();
}

FuncInfo->removeSGPRToVGPRFrameIndices(MFI);
MadeChange = true;
}

@@ -319,7 +319,75 @@ bool SIMachineFunctionInfo::allocateSGPRSpillToVGPR(MachineFunction &MF,
return true;
}

void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI) {
/// Reserve AGPRs or VGPRs to support spilling for FrameIndex \p FI.
/// Either AGPR is spilled to VGPR to vice versa.
/// Returns true if a \p FI can be eliminated completely.
bool SIMachineFunctionInfo::allocateVGPRSpillToAGPR(MachineFunction &MF,
int FI,
bool isAGPRtoVGPR) {
MachineRegisterInfo &MRI = MF.getRegInfo();
MachineFrameInfo &FrameInfo = MF.getFrameInfo();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();

assert(ST.hasMAIInsts() && FrameInfo.isSpillSlotObjectIndex(FI));

auto &Spill = VGPRToAGPRSpills[FI];

// This has already been allocated.
if (!Spill.Lanes.empty())
return Spill.FullyAllocated;

unsigned Size = FrameInfo.getObjectSize(FI);
unsigned NumLanes = Size / 4;
Spill.Lanes.resize(NumLanes, AMDGPU::NoRegister);

const TargetRegisterClass &RC =
isAGPRtoVGPR ? AMDGPU::VGPR_32RegClass : AMDGPU::AGPR_32RegClass;
auto Regs = RC.getRegisters();

auto &SpillRegs = isAGPRtoVGPR ? SpillAGPR : SpillVGPR;
const SIRegisterInfo *TRI = ST.getRegisterInfo();
Spill.FullyAllocated = true;

// FIXME: Move allocation logic out of MachineFunctionInfo and initialize
// once.
BitVector OtherUsedRegs;
OtherUsedRegs.resize(TRI->getNumRegs());

const uint32_t *CSRMask =
TRI->getCallPreservedMask(MF, MF.getFunction().getCallingConv());
if (CSRMask)
OtherUsedRegs.setBitsInMask(CSRMask);

// TODO: Should include register tuples, but doesn't matter with current
// usage.
for (MCPhysReg Reg : SpillAGPR)
OtherUsedRegs.set(Reg);
for (MCPhysReg Reg : SpillVGPR)
OtherUsedRegs.set(Reg);

SmallVectorImpl<MCPhysReg>::const_iterator NextSpillReg = Regs.begin();
for (unsigned I = 0; I < NumLanes; ++I) {
NextSpillReg = std::find_if(
NextSpillReg, Regs.end(), [&MRI, &OtherUsedRegs](MCPhysReg Reg) {
return MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg) &&
!OtherUsedRegs[Reg];
});

if (NextSpillReg == Regs.end()) { // Registers exhausted
Spill.FullyAllocated = false;
break;
}

OtherUsedRegs.set(*NextSpillReg);
SpillRegs.push_back(*NextSpillReg);
Spill.Lanes[I] = *NextSpillReg++;
}

return Spill.FullyAllocated;
}

void SIMachineFunctionInfo::removeDeadFrameIndices(MachineFrameInfo &MFI) {
// The FP spill hasn't been inserted yet, so keep it around.
for (auto &R : SGPRToVGPRSpills) {
if (R.first != FramePointerSaveIndex)
@@ -332,6 +400,11 @@ void SIMachineFunctionInfo::removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI)
++i)
if (i != FramePointerSaveIndex)
MFI.setStackID(i, TargetStackID::Default);

for (auto &R : VGPRToAGPRSpills) {
if (R.second.FullyAllocated)
MFI.RemoveStackObject(R.first);
}
}

MCPhysReg SIMachineFunctionInfo::getNextUserSGPR() const {

0 comments on commit cb57db0

Please sign in to comment.