190 changes: 97 additions & 93 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#include "GCNSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIMachineFunctionInfo.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
#include "llvm/Target/TargetMachine.h"
Expand All @@ -26,13 +26,17 @@ static cl::opt<bool> EnableSpillVGPRToAGPR(
cl::ReallyHidden,
cl::init(true));

// Find a register matching \p RC from \p LiveRegs which is unused and available
// throughout the function. On failure, returns AMDGPU::NoRegister.
// Find a register matching \p RC from \p LiveUnits which is unused and
// available throughout the function. On failure, returns AMDGPU::NoRegister.
// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
// MCRegisters. This should reduce the number of iterations and avoid redundant
// checking.
static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
const LivePhysRegs &LiveRegs,
const LiveRegUnits &LiveUnits,
const TargetRegisterClass &RC) {
for (MCRegister Reg : RC) {
if (!MRI.isPhysRegUsed(Reg) && LiveRegs.available(MRI, Reg))
if (!MRI.isPhysRegUsed(Reg) && LiveUnits.available(Reg) &&
!MRI.isReserved(Reg))
return Reg;
}
return MCRegister();
Expand All @@ -42,32 +46,31 @@ static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
// callee-save registers since they may appear to be free when this is called
// from canUseAsPrologue (during shrink wrapping), but then no longer be free
// when this is called from emitPrologue.
static MCRegister findScratchNonCalleeSaveRegister(MachineRegisterInfo &MRI,
LivePhysRegs &LiveRegs,
const TargetRegisterClass &RC,
bool Unused = false) {
static MCRegister findScratchNonCalleeSaveRegister(
MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
const TargetRegisterClass &RC, bool Unused = false) {
// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs();
for (unsigned i = 0; CSRegs[i]; ++i)
LiveRegs.addReg(CSRegs[i]);
LiveUnits.addReg(CSRegs[i]);

// We are looking for a register that can be used throughout the entire
// function, so any use is unacceptable.
if (Unused)
return findUnusedRegister(MRI, LiveRegs, RC);
return findUnusedRegister(MRI, LiveUnits, RC);

for (MCRegister Reg : RC) {
if (LiveRegs.available(MRI, Reg))
if (LiveUnits.available(Reg) && !MRI.isReserved(Reg))
return Reg;
}

return MCRegister();
}

/// Query target location for spilling SGPRs
/// \p IncludeScratchCopy : Also look for free scratch SGPRs
/// \p IncludeScratchCopy : Also look for free scratch SGPRs
static void getVGPRSpillLaneOrTempRegister(
MachineFunction &MF, LivePhysRegs &LiveRegs, Register SGPR,
MachineFunction &MF, LiveRegUnits &LiveUnits, Register SGPR,
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
bool IncludeScratchCopy = true) {
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
Expand All @@ -81,11 +84,11 @@ static void getVGPRSpillLaneOrTempRegister(
// We need to save and restore the given SGPR.

Register ScratchSGPR;
// 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
// should have all the callee saved registers marked as used. For certain
// cases we skip copy to scratch SGPR.
// 1: Try to save the given register into an unused scratch SGPR. The
// LiveUnits should have all the callee saved registers marked as used. For
// certain cases we skip copy to scratch SGPR.
if (IncludeScratchCopy)
ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveRegs, RC);
ScratchSGPR = findUnusedRegister(MF.getRegInfo(), LiveUnits, RC);

if (!ScratchSGPR) {
int FI = FrameInfo.CreateStackObject(Size, Alignment, true, nullptr,
Expand Down Expand Up @@ -118,7 +121,7 @@ static void getVGPRSpillLaneOrTempRegister(
MFI->addToPrologEpilogSGPRSpills(
SGPR, PrologEpilogSGPRSaveRestoreInfo(
SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
LiveRegs.addReg(ScratchSGPR);
LiveUnits.addReg(ScratchSGPR);
LLVM_DEBUG(dbgs() << "Saving " << printReg(SGPR, TRI) << " with copy to "
<< printReg(ScratchSGPR, TRI) << '\n');
}
Expand All @@ -129,7 +132,7 @@ static void getVGPRSpillLaneOrTempRegister(
// use.
static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
LiveRegUnits &LiveUnits, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register SpillReg, int FI, Register FrameReg,
Expand All @@ -142,18 +145,18 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
MachineMemOperand *MMO = MF.getMachineMemOperand(
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
LiveRegs.addReg(SpillReg);
LiveUnits.addReg(SpillReg);
bool IsKill = !MBB.isLiveIn(SpillReg);
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
DwordOff, MMO, nullptr, &LiveRegs);
DwordOff, MMO, nullptr, &LiveUnits);
if (IsKill)
LiveRegs.removeReg(SpillReg);
LiveUnits.removeReg(SpillReg);
}

static void buildEpilogRestore(const GCNSubtarget &ST,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs, MachineFunction &MF,
LiveRegUnits &LiveUnits, MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register SpillReg, int FI,
Expand All @@ -167,7 +170,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize(FI),
FrameInfo.getObjectAlign(FI));
TRI.buildSpillLoadStore(MBB, I, DL, Opc, FI, SpillReg, false, FrameReg,
DwordOff, MMO, nullptr, &LiveRegs);
DwordOff, MMO, nullptr, &LiveUnits);
}

static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Expand Down Expand Up @@ -195,18 +198,18 @@ static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg(GitPtrLo);
}

static void initLiveRegs(LivePhysRegs &LiveRegs, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo *FuncInfo,
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsProlog) {
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
static void initLiveUnits(LiveRegUnits &LiveUnits, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo *FuncInfo,
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsProlog) {
if (LiveUnits.empty()) {
LiveUnits.init(TRI);
if (IsProlog) {
LiveRegs.addLiveIns(MBB);
LiveUnits.addLiveIns(MBB);
} else {
// In epilog.
LiveRegs.addLiveOuts(MBB);
LiveRegs.stepBackward(*MBBI);
LiveUnits.addLiveOuts(MBB);
LiveUnits.stepBackward(*MBBI);
}
}
}
Expand All @@ -228,7 +231,7 @@ class PrologEpilogSGPRSpillBuilder {
const SIRegisterInfo &TRI;
Register SuperReg;
const PrologEpilogSGPRSaveRestoreInfo SI;
LivePhysRegs &LiveRegs;
LiveRegUnits &LiveUnits;
const DebugLoc &DL;
Register FrameReg;
ArrayRef<int16_t> SplitParts;
Expand All @@ -239,10 +242,10 @@ class PrologEpilogSGPRSpillBuilder {
MachineRegisterInfo &MRI = MF.getRegInfo();
assert(!MFI.isDeadObjectIndex(FI));

initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);
initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ true);

MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");

Expand All @@ -253,7 +256,7 @@ class PrologEpilogSGPRSpillBuilder {
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg(SubReg);

buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL, TmpVGPR,
FI, FrameReg, DwordOff);
DwordOff += 4;
}
Expand Down Expand Up @@ -287,9 +290,9 @@ class PrologEpilogSGPRSpillBuilder {
void restoreFromMemory(const int FI) {
MachineRegisterInfo &MRI = MF.getRegInfo();

initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MI, /*IsProlog*/ false);
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::VGPR_32RegClass);
MRI, LiveUnits, AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error("failed to find free scratch register");

Expand All @@ -298,8 +301,8 @@ class PrologEpilogSGPRSpillBuilder {
? SuperReg
: Register(TRI.getSubReg(SuperReg, SplitParts[I]));

buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MI, DL, TmpVGPR,
FI, FrameReg, DwordOff);
buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MI, DL,
TmpVGPR, FI, FrameReg, DwordOff);
BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32), SubReg)
.addReg(TmpVGPR, RegState::Kill);
DwordOff += 4;
Expand Down Expand Up @@ -335,11 +338,12 @@ class PrologEpilogSGPRSpillBuilder {
MachineBasicBlock::iterator MI,
const DebugLoc &DL, const SIInstrInfo *TII,
const SIRegisterInfo &TRI,
LivePhysRegs &LiveRegs, Register FrameReg)
LiveRegUnits &LiveUnits, Register FrameReg)
: MI(MI), MBB(MBB), MF(*MBB.getParent()),
ST(MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL), FrameReg(FrameReg) {
SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
FrameReg(FrameReg) {
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
SplitParts = TRI.getRegSplitParts(RC, EltSize);
NumSubRegs = SplitParts.empty() ? 1 : SplitParts.size();
Expand Down Expand Up @@ -396,9 +400,9 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(

if (ST.isAmdPalOS()) {
// Extract the scratch offset from the descriptor in the GIT
LivePhysRegs LiveRegs;
LiveRegs.init(*TRI);
LiveRegs.addLiveIns(MBB);
LiveRegUnits LiveUnits;
LiveUnits.init(*TRI);
LiveUnits.addLiveIns(MBB);

// Find unused reg to load flat scratch init into
MachineRegisterInfo &MRI = MF.getRegInfo();
Expand All @@ -409,8 +413,8 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
std::min(static_cast<unsigned>(AllSGPR64s.size()), NumPreloaded));
Register GITPtrLoReg = MFI->getGITPtrLoReg(MF);
for (MCPhysReg Reg : AllSGPR64s) {
if (LiveRegs.available(MRI, Reg) && MRI.isAllocatable(Reg) &&
!TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
if (LiveUnits.available(Reg) && !MRI.isReserved(Reg) &&
MRI.isAllocatable(Reg) && !TRI->isSubRegisterEq(Reg, GITPtrLoReg)) {
FlatScrInit = Reg;
break;
}
Expand Down Expand Up @@ -872,7 +876,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {

// Activate only the inactive lanes when \p EnableInactiveLanes is true.
// Otherwise, activate all lanes. It returns the saved exec.
static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
static Register buildScratchExecCopy(LiveRegUnits &LiveUnits,
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
Expand All @@ -885,14 +889,14 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
const SIRegisterInfo &TRI = TII->getRegisterInfo();
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();

initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, IsProlog);

ScratchExecCopy = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, *TRI.getWaveMaskRegClass());
MRI, LiveUnits, *TRI.getWaveMaskRegClass());
if (!ScratchExecCopy)
report_fatal_error("failed to find free scratch register");

LiveRegs.addReg(ScratchExecCopy);
LiveUnits.addReg(ScratchExecCopy);

const unsigned SaveExecOpc =
ST.isWave32() ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
Expand All @@ -908,7 +912,7 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,

void SIFrameLowering::emitCSRSpillStores(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
Register FrameReg, Register FramePtrRegScratchCopy) const {
SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Expand All @@ -923,15 +927,15 @@ void SIFrameLowering::emitCSRSpillStores(
FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty())
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ true, /*EnableInactiveLanes*/ true);

auto StoreWWMRegisters =
[&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first;
int FI = Reg.second;
buildPrologSpill(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
buildPrologSpill(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
Expand All @@ -942,7 +946,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ true,
/*EnableInactiveLanes*/ false);
}
Expand All @@ -954,7 +958,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned ExecMov = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(ExecMov), TRI.getExec())
.addReg(ScratchExecCopy, RegState::Kill);
LiveRegs.addReg(ScratchExecCopy);
LiveUnits.addReg(ScratchExecCopy);
}

Register FramePtrReg = FuncInfo->getFrameOffsetReg();
Expand All @@ -970,7 +974,7 @@ void SIFrameLowering::emitCSRSpillStores(
continue;

PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
LiveRegs, FrameReg);
LiveUnits, FrameReg);
SB.save();
}

Expand All @@ -985,16 +989,16 @@ void SIFrameLowering::emitCSRSpillStores(

MBB.sortUniqueLiveIns();
}
if (!LiveRegs.empty()) {
if (!LiveUnits.empty()) {
for (MCPhysReg Reg : ScratchSGPRs)
LiveRegs.addReg(Reg);
LiveUnits.addReg(Reg);
}
}
}

void SIFrameLowering::emitCSRSpillRestores(
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs,
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits,
Register FrameReg, Register FramePtrRegScratchCopy) const {
const SIMachineFunctionInfo *FuncInfo = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
Expand All @@ -1014,7 +1018,7 @@ void SIFrameLowering::emitCSRSpillRestores(
continue;

PrologEpilogSGPRSpillBuilder SB(Reg, Spill.second, MBB, MBBI, DL, TII, TRI,
LiveRegs, FrameReg);
LiveUnits, FrameReg);
SB.restore();
}

Expand All @@ -1026,15 +1030,15 @@ void SIFrameLowering::emitCSRSpillRestores(
FuncInfo->splitWWMSpillRegisters(MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty())
ScratchExecCopy =
buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false, /*EnableInactiveLanes*/ true);

auto RestoreWWMRegisters =
[&](SmallVectorImpl<std::pair<Register, int>> &WWMRegs) {
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first;
int FI = Reg.second;
buildEpilogRestore(ST, TRI, *FuncInfo, LiveRegs, MF, MBB, MBBI, DL,
buildEpilogRestore(ST, TRI, *FuncInfo, LiveUnits, MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
Expand All @@ -1045,7 +1049,7 @@ void SIFrameLowering::emitCSRSpillRestores(
unsigned MovOpc = ST.isWave32() ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI(MBB, MBBI, DL, TII->get(MovOpc), TRI.getExec()).addImm(-1);
} else {
ScratchExecCopy = buildScratchExecCopy(LiveRegs, MF, MBB, MBBI, DL,
ScratchExecCopy = buildScratchExecCopy(LiveUnits, MF, MBB, MBBI, DL,
/*IsProlog*/ false,
/*EnableInactiveLanes*/ false);
}
Expand Down Expand Up @@ -1078,7 +1082,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrReg = FuncInfo->getFrameOffsetReg();
Register BasePtrReg =
TRI.hasBasePointer(MF) ? TRI.getBaseRegister() : Register();
LivePhysRegs LiveRegs;
LiveRegUnits LiveUnits;

MachineBasicBlock::iterator MBBI = MBB.begin();
// DebugLoc must be unknown since the first instruction with DebugLoc is used
Expand All @@ -1096,33 +1100,33 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrRegScratchCopy;
if (!HasFP && !hasFP(MF)) {
// Emit the CSR spill stores with SP base register.
emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
FramePtrRegScratchCopy);
} else {
// CSR spill stores will use FP as base register.
Register SGPRForFPSaveRestoreCopy =
FuncInfo->getScratchSGPRCopyDstReg(FramePtrReg);

initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ true);
if (SGPRForFPSaveRestoreCopy) {
// Copy FP to the scratch register now and emit the CFI entry. It avoids
// the extra FP copy needed in the other two cases when FP is spilled to
// memory or to a VGPR lane.
PrologEpilogSGPRSpillBuilder SB(
FramePtrReg,
FuncInfo->getPrologEpilogSGPRSaveRestoreInfo(FramePtrReg), MBB, MBBI,
DL, TII, TRI, LiveRegs, FramePtrReg);
DL, TII, TRI, LiveUnits, FramePtrReg);
SB.save();
LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
} else {
// Copy FP into a new scratch register so that its previous value can be
// spilled after setting up the new frame.
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error("failed to find free scratch register");

LiveRegs.addReg(FramePtrRegScratchCopy);
LiveUnits.addReg(FramePtrRegScratchCopy);
BuildMI(MBB, MBBI, DL, TII->get(AMDGPU::COPY), FramePtrRegScratchCopy)
.addReg(FramePtrReg);
}
Expand All @@ -1132,9 +1136,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
const unsigned Alignment = MFI.getMaxAlign().value();

RoundedSize += Alignment;
if (LiveRegs.empty()) {
LiveRegs.init(TRI);
LiveRegs.addLiveIns(MBB);
if (LiveUnits.empty()) {
LiveUnits.init(TRI);
LiveUnits.addLiveIns(MBB);
}

// s_add_i32 s33, s32, NumBytes
Expand All @@ -1157,10 +1161,10 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,

// If FP is used, emit the CSR spills with FP base register.
if (HasFP) {
emitCSRSpillStores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
emitCSRSpillStores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
FramePtrRegScratchCopy);
if (FramePtrRegScratchCopy)
LiveRegs.removeReg(FramePtrRegScratchCopy);
LiveUnits.removeReg(FramePtrRegScratchCopy);
}

// If we need a base pointer, set it up here. It's whatever the value of
Expand Down Expand Up @@ -1209,7 +1213,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
const SIInstrInfo *TII = ST.getInstrInfo();
const SIRegisterInfo &TRI = TII->getRegisterInfo();
MachineRegisterInfo &MRI = MF.getRegInfo();
LivePhysRegs LiveRegs;
LiveRegUnits LiveUnits;
// Get the insert location for the epilogue. If there were no terminators in
// the block, get the last instruction.
MachineBasicBlock::iterator MBBI = MBB.end();
Expand Down Expand Up @@ -1239,19 +1243,19 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
// SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
// into a new scratch register and copy to FP later when other registers are
// restored from the current stack frame.
initLiveRegs(LiveRegs, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
initLiveUnits(LiveUnits, TRI, FuncInfo, MF, MBB, MBBI, /*IsProlog*/ false);
if (SGPRForFPSaveRestoreCopy) {
LiveRegs.addReg(SGPRForFPSaveRestoreCopy);
LiveUnits.addReg(SGPRForFPSaveRestoreCopy);
} else {
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister(
MRI, LiveRegs, AMDGPU::SReg_32_XM0_XEXECRegClass);
MRI, LiveUnits, AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error("failed to find free scratch register");

LiveRegs.addReg(FramePtrRegScratchCopy);
LiveUnits.addReg(FramePtrRegScratchCopy);
}

emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, FramePtrReg,
emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, FramePtrReg,
FramePtrRegScratchCopy);
}

Expand All @@ -1274,7 +1278,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MIB.setMIFlag(MachineInstr::FrameDestroy);
} else {
// Insert the CSR spill restores with SP as the base register.
emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveRegs, StackPtrReg,
emitCSRSpillRestores(MF, MBB, MBBI, DL, LiveUnits, StackPtrReg,
FramePtrRegScratchCopy);
}
}
Expand Down Expand Up @@ -1471,30 +1475,30 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo();
LivePhysRegs LiveRegs;
LiveRegs.init(*TRI);
LiveRegUnits LiveUnits;
LiveUnits.init(*TRI);
// Initially mark callee saved registers as used so we will not choose them
// while looking for scratch SGPRs.
const MCPhysReg *CSRegs = MF.getRegInfo().getCalleeSavedRegs();
for (unsigned I = 0; CSRegs[I]; ++I)
LiveRegs.addReg(CSRegs[I]);
LiveUnits.addReg(CSRegs[I]);

const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass();

if (NeedExecCopyReservedReg) {
Register ReservedReg = MFI->getSGPRForEXECCopy();
assert(ReservedReg && "Should have reserved an SGPR for EXEC copy.");
Register UnusedScratchReg = findUnusedRegister(MRI, LiveRegs, RC);
Register UnusedScratchReg = findUnusedRegister(MRI, LiveUnits, RC);
if (UnusedScratchReg) {
// If found any unused scratch SGPR, reserve the register itself for Exec
// copy and there is no need for any spills in that case.
MFI->setSGPRForEXECCopy(UnusedScratchReg);
LiveRegs.addReg(UnusedScratchReg);
LiveUnits.addReg(UnusedScratchReg);
} else {
// Needs spill.
assert(!MFI->hasPrologEpilogSGPRSpillEntry(ReservedReg) &&
"Re-reserving spill slot for EXEC copy register");
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, ReservedReg, RC,
getVGPRSpillLaneOrTempRegister(MF, LiveUnits, ReservedReg, RC,
/*IncludeScratchCopy=*/false);
}
}
Expand All @@ -1515,14 +1519,14 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
Register FramePtrReg = MFI->getFrameOffsetReg();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(FramePtrReg) &&
"Re-reserving spill slot for FP");
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, FramePtrReg);
getVGPRSpillLaneOrTempRegister(MF, LiveUnits, FramePtrReg);
}

if (TRI->hasBasePointer(MF)) {
Register BasePtrReg = TRI->getBaseRegister();
assert(!MFI->hasPrologEpilogSGPRSpillEntry(BasePtrReg) &&
"Re-reserving spill slot for BP");
getVGPRSpillLaneOrTempRegister(MF, LiveRegs, BasePtrReg);
getVGPRSpillLaneOrTempRegister(MF, LiveUnits, BasePtrReg);
}
}

Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AMDGPU/SIFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,11 @@ class SIFrameLowering final : public AMDGPUFrameLowering {
bool NeedExecCopyReservedReg) const;
void emitCSRSpillStores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
LivePhysRegs &LiveRegs, Register FrameReg,
LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
void emitCSRSpillRestores(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL,
LivePhysRegs &LiveRegs, Register FrameReg,
LiveRegUnits &LiveUnits, Register FrameReg,
Register FramePtrRegScratchCopy) const;
bool
assignCalleeSavedSpillSlots(MachineFunction &MF,
Expand Down
18 changes: 9 additions & 9 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include "SIMachineFunctionInfo.h"
#include "SIRegisterInfo.h"
#include "llvm/CodeGen/LiveIntervals.h"
#include "llvm/CodeGen/LivePhysRegs.h"
#include "llvm/CodeGen/LiveRegUnits.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/RegisterScavenging.h"
Expand Down Expand Up @@ -1309,8 +1309,8 @@ void SIRegisterInfo::buildSpillLoadStore(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
RegScavenger *RS, LivePhysRegs *LiveRegs) const {
assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
RegScavenger *RS, LiveRegUnits *LiveUnits) const {
assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");

MachineFunction *MF = MBB.getParent();
const SIInstrInfo *TII = ST.getInstrInfo();
Expand Down Expand Up @@ -1398,18 +1398,18 @@ void SIRegisterInfo::buildSpillLoadStore(
SOffset = MCRegister();

// We don't have access to the register scavenger if this function is called
// during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
// during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
// TODO: Clobbering SCC is not necessary for scratch instructions in the
// entry.
if (RS) {
SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);

// Piggy back on the liveness scan we just did see if SCC is dead.
CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
} else if (LiveRegs) {
CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC);
} else if (LiveUnits) {
CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
if (LiveRegs->available(MF->getRegInfo(), Reg)) {
if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
SOffset = Reg;
break;
}
Expand All @@ -1425,9 +1425,9 @@ void SIRegisterInfo::buildSpillLoadStore(
if (RS) {
TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
} else {
assert(LiveRegs);
assert(LiveUnits);
for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
if (LiveRegs->available(MF->getRegInfo(), Reg)) {
if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
TmpOffsetVGPR = Reg;
break;
}
Expand Down
6 changes: 3 additions & 3 deletions llvm/lib/Target/AMDGPU/SIRegisterInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace llvm {

class GCNSubtarget;
class LiveIntervals;
class LivePhysRegs;
class LiveRegUnits;
class RegisterBank;
struct SGPRSpillBuilder;

Expand Down Expand Up @@ -415,14 +415,14 @@ class SIRegisterInfo final : public AMDGPUGenRegisterInfo {
// Insert spill or restore instructions.
// When lowering spill pseudos, the RegScavenger should be set.
// For creating spill instructions during frame lowering, where no scavenger
// is available, LiveRegs can be used.
// is available, LiveUnits can be used.
void buildSpillLoadStore(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI, const DebugLoc &DL,
unsigned LoadStoreOp, int Index, Register ValueReg,
bool ValueIsKill, MCRegister ScratchOffsetReg,
int64_t InstrOffset, MachineMemOperand *MMO,
RegScavenger *RS,
LivePhysRegs *LiveRegs = nullptr) const;
LiveRegUnits *LiveUnits = nullptr) const;

// Return alignment in register file of first register in a register tuple.
unsigned getRegClassAlignmentNumBits(const TargetRegisterClass *RC) const {
Expand Down