Expand Up
@@ -11,7 +11,7 @@
#include " GCNSubtarget.h"
#include " MCTargetDesc/AMDGPUMCTargetDesc.h"
#include " SIMachineFunctionInfo.h"
#include " llvm/CodeGen/LivePhysRegs .h"
#include " llvm/CodeGen/LiveRegUnits .h"
#include " llvm/CodeGen/MachineFrameInfo.h"
#include " llvm/CodeGen/RegisterScavenging.h"
#include " llvm/Target/TargetMachine.h"
Expand All
@@ -26,13 +26,17 @@ static cl::opt<bool> EnableSpillVGPRToAGPR(
cl::ReallyHidden,
cl::init(true ));
// Find a register matching \p RC from \p LiveRegs which is unused and available
// throughout the function. On failure, returns AMDGPU::NoRegister.
// Find a register matching \p RC from \p LiveUnits which is unused and
// available throughout the function. On failure, returns AMDGPU::NoRegister.
// TODO: Rewrite the loop here to iterate over MCRegUnits instead of
// MCRegisters. This should reduce the number of iterations and avoid redundant
// checking.
static MCRegister findUnusedRegister (MachineRegisterInfo &MRI,
const LivePhysRegs &LiveRegs ,
const LiveRegUnits &LiveUnits ,
const TargetRegisterClass &RC) {
for (MCRegister Reg : RC) {
if (!MRI.isPhysRegUsed (Reg) && LiveRegs.available (MRI, Reg))
if (!MRI.isPhysRegUsed (Reg) && LiveUnits.available (Reg) &&
!MRI.isReserved (Reg))
return Reg;
}
return MCRegister ();
Expand All
@@ -42,32 +46,31 @@ static MCRegister findUnusedRegister(MachineRegisterInfo &MRI,
// callee-save registers since they may appear to be free when this is called
// from canUseAsPrologue (during shrink wrapping), but then no longer be free
// when this is called from emitPrologue.
static MCRegister findScratchNonCalleeSaveRegister (MachineRegisterInfo &MRI,
LivePhysRegs &LiveRegs,
const TargetRegisterClass &RC,
bool Unused = false ) {
static MCRegister findScratchNonCalleeSaveRegister (
MachineRegisterInfo &MRI, LiveRegUnits &LiveUnits,
const TargetRegisterClass &RC, bool Unused = false ) {
// Mark callee saved registers as used so we will not choose them.
const MCPhysReg *CSRegs = MRI.getCalleeSavedRegs ();
for (unsigned i = 0 ; CSRegs[i]; ++i)
LiveRegs .addReg (CSRegs[i]);
LiveUnits .addReg (CSRegs[i]);
// We are looking for a register that can be used throughout the entire
// function, so any use is unacceptable.
if (Unused)
return findUnusedRegister (MRI, LiveRegs , RC);
return findUnusedRegister (MRI, LiveUnits , RC);
for (MCRegister Reg : RC) {
if (LiveRegs .available (MRI, Reg))
if (LiveUnits .available (Reg) && !MRI. isReserved ( Reg))
return Reg;
}
return MCRegister ();
}
// / Query target location for spilling SGPRs
// / \p IncludeScratchCopy : Also look for free scratch SGPRs
// / \p IncludeScratchCopy : Also look for free scratch SGPRs
static void getVGPRSpillLaneOrTempRegister (
MachineFunction &MF, LivePhysRegs &LiveRegs , Register SGPR,
MachineFunction &MF, LiveRegUnits &LiveUnits , Register SGPR,
const TargetRegisterClass &RC = AMDGPU::SReg_32_XM0_XEXECRegClass,
bool IncludeScratchCopy = true ) {
SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
Expand All
@@ -81,11 +84,11 @@ static void getVGPRSpillLaneOrTempRegister(
// We need to save and restore the given SGPR.
Register ScratchSGPR;
// 1: Try to save the given register into an unused scratch SGPR. The LiveRegs
// should have all the callee saved registers marked as used. For certain
// cases we skip copy to scratch SGPR.
// 1: Try to save the given register into an unused scratch SGPR. The
// LiveUnits should have all the callee saved registers marked as used. For
// certain cases we skip copy to scratch SGPR.
if (IncludeScratchCopy)
ScratchSGPR = findUnusedRegister (MF.getRegInfo (), LiveRegs , RC);
ScratchSGPR = findUnusedRegister (MF.getRegInfo (), LiveUnits , RC);
if (!ScratchSGPR) {
int FI = FrameInfo.CreateStackObject (Size , Alignment, true , nullptr ,
Expand Down
Expand Up
@@ -118,7 +121,7 @@ static void getVGPRSpillLaneOrTempRegister(
MFI->addToPrologEpilogSGPRSpills (
SGPR, PrologEpilogSGPRSaveRestoreInfo (
SGPRSaveKind::COPY_TO_SCRATCH_SGPR, ScratchSGPR));
LiveRegs .addReg (ScratchSGPR);
LiveUnits .addReg (ScratchSGPR);
LLVM_DEBUG (dbgs () << " Saving " << printReg (SGPR, TRI) << " with copy to "
<< printReg (ScratchSGPR, TRI) << ' \n ' );
}
Expand All
@@ -129,7 +132,7 @@ static void getVGPRSpillLaneOrTempRegister(
// use.
static void buildPrologSpill (const GCNSubtarget &ST, const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs , MachineFunction &MF,
LiveRegUnits &LiveUnits , MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, const DebugLoc &DL,
Register SpillReg, int FI, Register FrameReg,
Expand All
@@ -142,18 +145,18 @@ static void buildPrologSpill(const GCNSubtarget &ST, const SIRegisterInfo &TRI,
MachineMemOperand *MMO = MF.getMachineMemOperand (
PtrInfo, MachineMemOperand::MOStore, FrameInfo.getObjectSize (FI),
FrameInfo.getObjectAlign (FI));
LiveRegs .addReg (SpillReg);
LiveUnits .addReg (SpillReg);
bool IsKill = !MBB.isLiveIn (SpillReg);
TRI.buildSpillLoadStore (MBB, I, DL, Opc, FI, SpillReg, IsKill, FrameReg,
DwordOff, MMO, nullptr , &LiveRegs );
DwordOff, MMO, nullptr , &LiveUnits );
if (IsKill)
LiveRegs .removeReg (SpillReg);
LiveUnits .removeReg (SpillReg);
}
static void buildEpilogRestore (const GCNSubtarget &ST,
const SIRegisterInfo &TRI,
const SIMachineFunctionInfo &FuncInfo,
LivePhysRegs &LiveRegs , MachineFunction &MF,
LiveRegUnits &LiveUnits , MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
const DebugLoc &DL, Register SpillReg, int FI,
Expand All
@@ -167,7 +170,7 @@ static void buildEpilogRestore(const GCNSubtarget &ST,
PtrInfo, MachineMemOperand::MOLoad, FrameInfo.getObjectSize (FI),
FrameInfo.getObjectAlign (FI));
TRI.buildSpillLoadStore (MBB, I, DL, Opc, FI, SpillReg, false , FrameReg,
DwordOff, MMO, nullptr , &LiveRegs );
DwordOff, MMO, nullptr , &LiveUnits );
}
static void buildGitPtr (MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
Expand Down
Expand Up
@@ -195,18 +198,18 @@ static void buildGitPtr(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
.addReg (GitPtrLo);
}
static void initLiveRegs (LivePhysRegs &LiveRegs , const SIRegisterInfo &TRI,
const SIMachineFunctionInfo *FuncInfo,
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsProlog) {
if (LiveRegs .empty ()) {
LiveRegs .init (TRI);
static void initLiveUnits (LiveRegUnits &LiveUnits , const SIRegisterInfo &TRI,
const SIMachineFunctionInfo *FuncInfo,
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, bool IsProlog) {
if (LiveUnits .empty ()) {
LiveUnits .init (TRI);
if (IsProlog) {
LiveRegs .addLiveIns (MBB);
LiveUnits .addLiveIns (MBB);
} else {
// In epilog.
LiveRegs .addLiveOuts (MBB);
LiveRegs .stepBackward (*MBBI);
LiveUnits .addLiveOuts (MBB);
LiveUnits .stepBackward (*MBBI);
}
}
}
Expand All
@@ -228,7 +231,7 @@ class PrologEpilogSGPRSpillBuilder {
const SIRegisterInfo &TRI;
Register SuperReg;
const PrologEpilogSGPRSaveRestoreInfo SI;
LivePhysRegs &LiveRegs ;
LiveRegUnits &LiveUnits ;
const DebugLoc &DL;
Register FrameReg;
ArrayRef<int16_t > SplitParts;
Expand All
@@ -239,10 +242,10 @@ class PrologEpilogSGPRSpillBuilder {
MachineRegisterInfo &MRI = MF.getRegInfo ();
assert (!MFI.isDeadObjectIndex (FI));
initLiveRegs (LiveRegs , TRI, FuncInfo, MF, MBB, MI, /* IsProlog*/ true );
initLiveUnits (LiveUnits , TRI, FuncInfo, MF, MBB, MI, /* IsProlog*/ true );
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister (
MRI, LiveRegs , AMDGPU::VGPR_32RegClass);
MRI, LiveUnits , AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error (" failed to find free scratch register" );
Expand All
@@ -253,7 +256,7 @@ class PrologEpilogSGPRSpillBuilder {
BuildMI (MBB, MI, DL, TII->get (AMDGPU::V_MOV_B32_e32), TmpVGPR)
.addReg (SubReg);
buildPrologSpill (ST, TRI, *FuncInfo, LiveRegs , MF, MBB, MI, DL, TmpVGPR,
buildPrologSpill (ST, TRI, *FuncInfo, LiveUnits , MF, MBB, MI, DL, TmpVGPR,
FI, FrameReg, DwordOff);
DwordOff += 4 ;
}
Expand Down
Expand Up
@@ -287,9 +290,9 @@ class PrologEpilogSGPRSpillBuilder {
void restoreFromMemory (const int FI) {
MachineRegisterInfo &MRI = MF.getRegInfo ();
initLiveRegs (LiveRegs , TRI, FuncInfo, MF, MBB, MI, /* IsProlog*/ false );
initLiveUnits (LiveUnits , TRI, FuncInfo, MF, MBB, MI, /* IsProlog*/ false );
MCPhysReg TmpVGPR = findScratchNonCalleeSaveRegister (
MRI, LiveRegs , AMDGPU::VGPR_32RegClass);
MRI, LiveUnits , AMDGPU::VGPR_32RegClass);
if (!TmpVGPR)
report_fatal_error (" failed to find free scratch register" );
Expand All
@@ -298,8 +301,8 @@ class PrologEpilogSGPRSpillBuilder {
? SuperReg
: Register (TRI.getSubReg (SuperReg, SplitParts[I]));
buildEpilogRestore (ST, TRI, *FuncInfo, LiveRegs , MF, MBB, MI, DL, TmpVGPR ,
FI, FrameReg, DwordOff);
buildEpilogRestore (ST, TRI, *FuncInfo, LiveUnits , MF, MBB, MI, DL,
TmpVGPR, FI, FrameReg, DwordOff);
BuildMI (MBB, MI, DL, TII->get (AMDGPU::V_READFIRSTLANE_B32), SubReg)
.addReg (TmpVGPR, RegState::Kill);
DwordOff += 4 ;
Expand Down
Expand Up
@@ -335,11 +338,12 @@ class PrologEpilogSGPRSpillBuilder {
MachineBasicBlock::iterator MI,
const DebugLoc &DL, const SIInstrInfo *TII,
const SIRegisterInfo &TRI,
LivePhysRegs &LiveRegs , Register FrameReg)
LiveRegUnits &LiveUnits , Register FrameReg)
: MI(MI), MBB(MBB), MF(*MBB.getParent()),
ST (MF.getSubtarget<GCNSubtarget>()), MFI(MF.getFrameInfo()),
FuncInfo(MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
SuperReg(Reg), SI(SI), LiveRegs(LiveRegs), DL(DL), FrameReg(FrameReg) {
SuperReg(Reg), SI(SI), LiveUnits(LiveUnits), DL(DL),
FrameReg(FrameReg) {
const TargetRegisterClass *RC = TRI.getPhysRegBaseClass (SuperReg);
SplitParts = TRI.getRegSplitParts (RC, EltSize);
NumSubRegs = SplitParts.empty () ? 1 : SplitParts.size ();
Expand Down
Expand Up
@@ -396,9 +400,9 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
if (ST.isAmdPalOS ()) {
// Extract the scratch offset from the descriptor in the GIT
LivePhysRegs LiveRegs ;
LiveRegs .init (*TRI);
LiveRegs .addLiveIns (MBB);
LiveRegUnits LiveUnits ;
LiveUnits .init (*TRI);
LiveUnits .addLiveIns (MBB);
// Find unused reg to load flat scratch init into
MachineRegisterInfo &MRI = MF.getRegInfo ();
Expand All
@@ -409,8 +413,8 @@ void SIFrameLowering::emitEntryFunctionFlatScratchInit(
std::min (static_cast <unsigned >(AllSGPR64s.size ()), NumPreloaded));
Register GITPtrLoReg = MFI->getGITPtrLoReg (MF);
for (MCPhysReg Reg : AllSGPR64s) {
if (LiveRegs .available (MRI, Reg) && MRI.isAllocatable (Reg) &&
!TRI->isSubRegisterEq (Reg, GITPtrLoReg)) {
if (LiveUnits .available (Reg) && ! MRI.isReserved (Reg) &&
MRI. isAllocatable (Reg) && !TRI->isSubRegisterEq (Reg, GITPtrLoReg)) {
FlatScrInit = Reg;
break ;
}
Expand Down
Expand Up
@@ -872,7 +876,7 @@ bool SIFrameLowering::isSupportedStackID(TargetStackID::Value ID) const {
// Activate only the inactive lanes when \p EnableInactiveLanes is true.
// Otherwise, activate all lanes. It returns the saved exec.
static Register buildScratchExecCopy (LivePhysRegs &LiveRegs ,
static Register buildScratchExecCopy (LiveRegUnits &LiveUnits ,
MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
Expand All
@@ -885,14 +889,14 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
const SIRegisterInfo &TRI = TII->getRegisterInfo ();
SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
initLiveRegs (LiveRegs , TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
initLiveUnits (LiveUnits , TRI, FuncInfo, MF, MBB, MBBI, IsProlog);
ScratchExecCopy = findScratchNonCalleeSaveRegister (
MRI, LiveRegs , *TRI.getWaveMaskRegClass ());
MRI, LiveUnits , *TRI.getWaveMaskRegClass ());
if (!ScratchExecCopy)
report_fatal_error (" failed to find free scratch register" );
LiveRegs .addReg (ScratchExecCopy);
LiveUnits .addReg (ScratchExecCopy);
const unsigned SaveExecOpc =
ST.isWave32 () ? (EnableInactiveLanes ? AMDGPU::S_XOR_SAVEEXEC_B32
Expand All
@@ -908,7 +912,7 @@ static Register buildScratchExecCopy(LivePhysRegs &LiveRegs,
void SIFrameLowering::emitCSRSpillStores (
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs ,
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits ,
Register FrameReg, Register FramePtrRegScratchCopy) const {
SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
Expand All
@@ -923,15 +927,15 @@ void SIFrameLowering::emitCSRSpillStores(
FuncInfo->splitWWMSpillRegisters (MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty ())
ScratchExecCopy =
buildScratchExecCopy (LiveRegs , MF, MBB, MBBI, DL,
buildScratchExecCopy (LiveUnits , MF, MBB, MBBI, DL,
/* IsProlog*/ true , /* EnableInactiveLanes*/ true );
auto StoreWWMRegisters =
[&](SmallVectorImpl<std::pair<Register, int >> &WWMRegs) {
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first ;
int FI = Reg.second ;
buildPrologSpill (ST, TRI, *FuncInfo, LiveRegs , MF, MBB, MBBI, DL,
buildPrologSpill (ST, TRI, *FuncInfo, LiveUnits , MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
Expand All
@@ -942,7 +946,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned MovOpc = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI (MBB, MBBI, DL, TII->get (MovOpc), TRI.getExec ()).addImm (-1 );
} else {
ScratchExecCopy = buildScratchExecCopy (LiveRegs , MF, MBB, MBBI, DL,
ScratchExecCopy = buildScratchExecCopy (LiveUnits , MF, MBB, MBBI, DL,
/* IsProlog*/ true ,
/* EnableInactiveLanes*/ false );
}
Expand All
@@ -954,7 +958,7 @@ void SIFrameLowering::emitCSRSpillStores(
unsigned ExecMov = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI (MBB, MBBI, DL, TII->get (ExecMov), TRI.getExec ())
.addReg (ScratchExecCopy, RegState::Kill);
LiveRegs .addReg (ScratchExecCopy);
LiveUnits .addReg (ScratchExecCopy);
}
Register FramePtrReg = FuncInfo->getFrameOffsetReg ();
Expand All
@@ -970,7 +974,7 @@ void SIFrameLowering::emitCSRSpillStores(
continue ;
PrologEpilogSGPRSpillBuilder SB (Reg, Spill.second , MBB, MBBI, DL, TII, TRI,
LiveRegs , FrameReg);
LiveUnits , FrameReg);
SB.save ();
}
Expand All
@@ -985,16 +989,16 @@ void SIFrameLowering::emitCSRSpillStores(
MBB.sortUniqueLiveIns ();
}
if (!LiveRegs .empty ()) {
if (!LiveUnits .empty ()) {
for (MCPhysReg Reg : ScratchSGPRs)
LiveRegs .addReg (Reg);
LiveUnits .addReg (Reg);
}
}
}
void SIFrameLowering::emitCSRSpillRestores (
MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LivePhysRegs &LiveRegs ,
MachineBasicBlock::iterator MBBI, DebugLoc &DL, LiveRegUnits &LiveUnits ,
Register FrameReg, Register FramePtrRegScratchCopy) const {
const SIMachineFunctionInfo *FuncInfo = MF.getInfo <SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
Expand All
@@ -1014,7 +1018,7 @@ void SIFrameLowering::emitCSRSpillRestores(
continue ;
PrologEpilogSGPRSpillBuilder SB (Reg, Spill.second , MBB, MBBI, DL, TII, TRI,
LiveRegs , FrameReg);
LiveUnits , FrameReg);
SB.restore ();
}
Expand All
@@ -1026,15 +1030,15 @@ void SIFrameLowering::emitCSRSpillRestores(
FuncInfo->splitWWMSpillRegisters (MF, WWMCalleeSavedRegs, WWMScratchRegs);
if (!WWMScratchRegs.empty ())
ScratchExecCopy =
buildScratchExecCopy (LiveRegs , MF, MBB, MBBI, DL,
buildScratchExecCopy (LiveUnits , MF, MBB, MBBI, DL,
/* IsProlog*/ false , /* EnableInactiveLanes*/ true );
auto RestoreWWMRegisters =
[&](SmallVectorImpl<std::pair<Register, int >> &WWMRegs) {
for (const auto &Reg : WWMRegs) {
Register VGPR = Reg.first ;
int FI = Reg.second ;
buildEpilogRestore (ST, TRI, *FuncInfo, LiveRegs , MF, MBB, MBBI, DL,
buildEpilogRestore (ST, TRI, *FuncInfo, LiveUnits , MF, MBB, MBBI, DL,
VGPR, FI, FrameReg);
}
};
Expand All
@@ -1045,7 +1049,7 @@ void SIFrameLowering::emitCSRSpillRestores(
unsigned MovOpc = ST.isWave32 () ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
BuildMI (MBB, MBBI, DL, TII->get (MovOpc), TRI.getExec ()).addImm (-1 );
} else {
ScratchExecCopy = buildScratchExecCopy (LiveRegs , MF, MBB, MBBI, DL,
ScratchExecCopy = buildScratchExecCopy (LiveUnits , MF, MBB, MBBI, DL,
/* IsProlog*/ false ,
/* EnableInactiveLanes*/ false );
}
Expand Down
Expand Up
@@ -1078,7 +1082,7 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrReg = FuncInfo->getFrameOffsetReg ();
Register BasePtrReg =
TRI.hasBasePointer (MF) ? TRI.getBaseRegister () : Register ();
LivePhysRegs LiveRegs ;
LiveRegUnits LiveUnits ;
MachineBasicBlock::iterator MBBI = MBB.begin ();
// DebugLoc must be unknown since the first instruction with DebugLoc is used
Expand All
@@ -1096,33 +1100,33 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
Register FramePtrRegScratchCopy;
if (!HasFP && !hasFP (MF)) {
// Emit the CSR spill stores with SP base register.
emitCSRSpillStores (MF, MBB, MBBI, DL, LiveRegs , StackPtrReg,
emitCSRSpillStores (MF, MBB, MBBI, DL, LiveUnits , StackPtrReg,
FramePtrRegScratchCopy);
} else {
// CSR spill stores will use FP as base register.
Register SGPRForFPSaveRestoreCopy =
FuncInfo->getScratchSGPRCopyDstReg (FramePtrReg);
initLiveRegs (LiveRegs , TRI, FuncInfo, MF, MBB, MBBI, /* IsProlog*/ true );
initLiveUnits (LiveUnits , TRI, FuncInfo, MF, MBB, MBBI, /* IsProlog*/ true );
if (SGPRForFPSaveRestoreCopy) {
// Copy FP to the scratch register now and emit the CFI entry. It avoids
// the extra FP copy needed in the other two cases when FP is spilled to
// memory or to a VGPR lane.
PrologEpilogSGPRSpillBuilder SB (
FramePtrReg,
FuncInfo->getPrologEpilogSGPRSaveRestoreInfo (FramePtrReg), MBB, MBBI,
DL, TII, TRI, LiveRegs , FramePtrReg);
DL, TII, TRI, LiveUnits , FramePtrReg);
SB.save ();
LiveRegs .addReg (SGPRForFPSaveRestoreCopy);
LiveUnits .addReg (SGPRForFPSaveRestoreCopy);
} else {
// Copy FP into a new scratch register so that its previous value can be
// spilled after setting up the new frame.
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister (
MRI, LiveRegs , AMDGPU::SReg_32_XM0_XEXECRegClass);
MRI, LiveUnits , AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error (" failed to find free scratch register" );
LiveRegs .addReg (FramePtrRegScratchCopy);
LiveUnits .addReg (FramePtrRegScratchCopy);
BuildMI (MBB, MBBI, DL, TII->get (AMDGPU::COPY), FramePtrRegScratchCopy)
.addReg (FramePtrReg);
}
Expand All
@@ -1132,9 +1136,9 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
const unsigned Alignment = MFI.getMaxAlign ().value ();
RoundedSize += Alignment;
if (LiveRegs .empty ()) {
LiveRegs .init (TRI);
LiveRegs .addLiveIns (MBB);
if (LiveUnits .empty ()) {
LiveUnits .init (TRI);
LiveUnits .addLiveIns (MBB);
}
// s_add_i32 s33, s32, NumBytes
Expand All
@@ -1157,10 +1161,10 @@ void SIFrameLowering::emitPrologue(MachineFunction &MF,
// If FP is used, emit the CSR spills with FP base register.
if (HasFP) {
emitCSRSpillStores (MF, MBB, MBBI, DL, LiveRegs , FramePtrReg,
emitCSRSpillStores (MF, MBB, MBBI, DL, LiveUnits , FramePtrReg,
FramePtrRegScratchCopy);
if (FramePtrRegScratchCopy)
LiveRegs .removeReg (FramePtrRegScratchCopy);
LiveUnits .removeReg (FramePtrRegScratchCopy);
}
// If we need a base pointer, set it up here. It's whatever the value of
Expand Down
Expand Up
@@ -1209,7 +1213,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
const SIInstrInfo *TII = ST.getInstrInfo ();
const SIRegisterInfo &TRI = TII->getRegisterInfo ();
MachineRegisterInfo &MRI = MF.getRegInfo ();
LivePhysRegs LiveRegs ;
LiveRegUnits LiveUnits ;
// Get the insert location for the epilogue. If there were no terminators in
// the block, get the last instruction.
MachineBasicBlock::iterator MBBI = MBB.end ();
Expand Down
Expand Up
@@ -1239,19 +1243,19 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
// SGPRForFPSaveRestoreCopy is not true, restore the previous value of FP
// into a new scratch register and copy to FP later when other registers are
// restored from the current stack frame.
initLiveRegs (LiveRegs , TRI, FuncInfo, MF, MBB, MBBI, /* IsProlog*/ false );
initLiveUnits (LiveUnits , TRI, FuncInfo, MF, MBB, MBBI, /* IsProlog*/ false );
if (SGPRForFPSaveRestoreCopy) {
LiveRegs .addReg (SGPRForFPSaveRestoreCopy);
LiveUnits .addReg (SGPRForFPSaveRestoreCopy);
} else {
FramePtrRegScratchCopy = findScratchNonCalleeSaveRegister (
MRI, LiveRegs , AMDGPU::SReg_32_XM0_XEXECRegClass);
MRI, LiveUnits , AMDGPU::SReg_32_XM0_XEXECRegClass);
if (!FramePtrRegScratchCopy)
report_fatal_error (" failed to find free scratch register" );
LiveRegs .addReg (FramePtrRegScratchCopy);
LiveUnits .addReg (FramePtrRegScratchCopy);
}
emitCSRSpillRestores (MF, MBB, MBBI, DL, LiveRegs , FramePtrReg,
emitCSRSpillRestores (MF, MBB, MBBI, DL, LiveUnits , FramePtrReg,
FramePtrRegScratchCopy);
}
Expand All
@@ -1274,7 +1278,7 @@ void SIFrameLowering::emitEpilogue(MachineFunction &MF,
MIB.setMIFlag (MachineInstr::FrameDestroy);
} else {
// Insert the CSR spill restores with SP as the base register.
emitCSRSpillRestores (MF, MBB, MBBI, DL, LiveRegs , StackPtrReg,
emitCSRSpillRestores (MF, MBB, MBBI, DL, LiveUnits , StackPtrReg,
FramePtrRegScratchCopy);
}
}
Expand Down
Expand Up
@@ -1471,30 +1475,30 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
const SIRegisterInfo *TRI = ST.getRegisterInfo ();
LivePhysRegs LiveRegs ;
LiveRegs .init (*TRI);
LiveRegUnits LiveUnits ;
LiveUnits .init (*TRI);
// Initially mark callee saved registers as used so we will not choose them
// while looking for scratch SGPRs.
const MCPhysReg *CSRegs = MF.getRegInfo ().getCalleeSavedRegs ();
for (unsigned I = 0 ; CSRegs[I]; ++I)
LiveRegs .addReg (CSRegs[I]);
LiveUnits .addReg (CSRegs[I]);
const TargetRegisterClass &RC = *TRI->getWaveMaskRegClass ();
if (NeedExecCopyReservedReg) {
Register ReservedReg = MFI->getSGPRForEXECCopy ();
assert (ReservedReg && " Should have reserved an SGPR for EXEC copy." );
Register UnusedScratchReg = findUnusedRegister (MRI, LiveRegs , RC);
Register UnusedScratchReg = findUnusedRegister (MRI, LiveUnits , RC);
if (UnusedScratchReg) {
// If found any unused scratch SGPR, reserve the register itself for Exec
// copy and there is no need for any spills in that case.
MFI->setSGPRForEXECCopy (UnusedScratchReg);
LiveRegs .addReg (UnusedScratchReg);
LiveUnits .addReg (UnusedScratchReg);
} else {
// Needs spill.
assert (!MFI->hasPrologEpilogSGPRSpillEntry (ReservedReg) &&
" Re-reserving spill slot for EXEC copy register" );
getVGPRSpillLaneOrTempRegister (MF, LiveRegs , ReservedReg, RC,
getVGPRSpillLaneOrTempRegister (MF, LiveUnits , ReservedReg, RC,
/* IncludeScratchCopy=*/ false );
}
}
Expand All
@@ -1515,14 +1519,14 @@ void SIFrameLowering::determinePrologEpilogSGPRSaves(
Register FramePtrReg = MFI->getFrameOffsetReg ();
assert (!MFI->hasPrologEpilogSGPRSpillEntry (FramePtrReg) &&
" Re-reserving spill slot for FP" );
getVGPRSpillLaneOrTempRegister (MF, LiveRegs , FramePtrReg);
getVGPRSpillLaneOrTempRegister (MF, LiveUnits , FramePtrReg);
}
if (TRI->hasBasePointer (MF)) {
Register BasePtrReg = TRI->getBaseRegister ();
assert (!MFI->hasPrologEpilogSGPRSpillEntry (BasePtrReg) &&
" Re-reserving spill slot for BP" );
getVGPRSpillLaneOrTempRegister (MF, LiveRegs , BasePtrReg);
getVGPRSpillLaneOrTempRegister (MF, LiveUnits , BasePtrReg);
}
}
Expand Down