Skip to content

Commit

Permalink
[PowerPC] Move the stack pointer update instruction later in the prol…
Browse files Browse the repository at this point in the history
…ogue and earlier in the epilogue.

Move the stdu instruction in the prologue and epilogue.
This should provide a small performance boost in functions that are able to do
this. I've kept this change rather conservative at the moment and functions
with frame pointers or base pointers will not try to move the stack pointer
update.

Differential Revision: https://reviews.llvm.org/D42590

llvm-svn: 355085
  • Loading branch information
stefanp-ibm committed Feb 28, 2019
1 parent 134bc19 commit bd5429e
Show file tree
Hide file tree
Showing 10 changed files with 313 additions and 117 deletions.
168 changes: 145 additions & 23 deletions llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
Expand Up @@ -445,12 +445,26 @@ static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}

/// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
/// call frame size. Update the MachineFunction object with the stack size.
unsigned
PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
bool UseEstimate) const {
unsigned NewMaxCallFrameSize = 0;
unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
&NewMaxCallFrameSize);
MF.getFrameInfo().setStackSize(FrameSize);
MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
return FrameSize;
}

/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
bool UpdateMF,
bool UseEstimate) const {
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned
PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
bool UseEstimate,
unsigned *NewMaxCallFrameSize) const {
const MachineFrameInfo &MFI = MF.getFrameInfo();

// Get the number of bytes to allocate from the FrameInfo
unsigned FrameSize =
Expand All @@ -476,10 +490,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,

// Check whether we can skip adjusting the stack pointer (by using red zone)
if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
NumNoNeedForFrame++;
// No need for frame
if (UpdateMF)
MFI.setStackSize(0);
return 0;
}

Expand All @@ -495,20 +506,16 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
if (MFI.hasVarSizedObjects())
maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;

// Update maximum call frame size.
if (UpdateMF)
MFI.setMaxCallFrameSize(maxCallFrameSize);
// Update the new max call frame size if the caller passes in a valid pointer.
if (NewMaxCallFrameSize)
*NewMaxCallFrameSize = maxCallFrameSize;

// Include call frame size in total.
FrameSize += maxCallFrameSize;

// Make sure the frame is aligned.
FrameSize = (FrameSize + AlignMask) & ~AlignMask;

// Update frame info.
if (UpdateMF)
MFI.setStackSize(FrameSize);

return FrameSize;
}

Expand Down Expand Up @@ -689,7 +696,7 @@ PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
MachineFunction &MF = *(MBB->getParent());
bool HasBP = RegInfo->hasBasePointer(MF);
unsigned FrameSize = determineFrameLayout(MF, false);
unsigned FrameSize = determineFrameLayout(MF);
int NegFrameSize = -FrameSize;
bool IsLargeFrame = !isInt<16>(NegFrameSize);
MachineFrameInfo &MFI = MF.getFrameInfo();
Expand All @@ -712,6 +719,50 @@ bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
return findScratchRegister(TmpMBB, true);
}

bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();

// Abort if there is no register info or function info.
if (!RegInfo || !FI)
return false;

// Only move the stack update on ELFv2 ABI and PPC64.
if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
return false;

// Check the frame size first and return false if it does not fit the
// requirements.
// We need a non-zero frame size as well as a frame that will fit in the red
// zone. This is because by moving the stack pointer update we are now storing
// to the red zone until the stack pointer is updated. If we get an interrupt
// inside the prologue but before the stack update we now have a number of
// stores to the red zone and those stores must all fit.
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned FrameSize = MFI.getStackSize();
if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
return false;

// Frame pointers and base pointers complicate matters so don't do anything
// if we have them. For example having a frame pointer will sometimes require
// a copy of r1 into r31 and that makes keeping track of updates to r1 more
// difficult.
if (hasFP(MF) || RegInfo->hasBasePointer(MF))
return false;

// Calls to fast_cc functions use different rules for passing parameters on
// the stack from the ABI and using PIC base in the function imposes
// similar restrictions to using the base pointer. It is not generally safe
// to move the stack pointer update in these situations.
if (FI->hasFastCall() || FI->usesPICBase())
return false;

// Finally we can move the stack update if we do not require regiser
// scavenging. Register scavenging can introduce more spills and so
// may make the frame size larger than we have computed.
return !RegInfo->requiresFrameIndexScavenging(MF);
}

void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
MachineBasicBlock::iterator MBBI = MBB.begin();
Expand Down Expand Up @@ -747,7 +798,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
MBBI = MBB.begin();

// Work out frame sizes.
unsigned FrameSize = determineFrameLayout(MF);
unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
int NegFrameSize = -FrameSize;
if (!isInt<32>(NegFrameSize))
llvm_unreachable("Unhandled stack size!");
Expand Down Expand Up @@ -854,6 +905,45 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
assert((isPPC64 || !MustSaveCR) &&
"Prologue CR saving supported only in 64-bit mode");

// Check if we can move the stack update instruction (stdu) down the prologue
// past the callee saves. Hopefully this will avoid the situation where the
// saves are waiting for the update on the store with update to complete.
MachineBasicBlock::iterator StackUpdateLoc = MBBI;
bool MovingStackUpdateDown = false;

// Check if we can move the stack update.
if (stackUpdateCanBeMoved(MF)) {
const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
for (CalleeSavedInfo CSI : Info) {
int FrIdx = CSI.getFrameIdx();
// If the frame index is not negative the callee saved info belongs to a
// stack object that is not a fixed stack object. We ignore non-fixed
// stack objects because we won't move the stack update pointer past them.
if (FrIdx >= 0)
continue;

if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
StackUpdateLoc++;
MovingStackUpdateDown = true;
} else {
// We need all of the Frame Indices to meet these conditions.
// If they do not, abort the whole operation.
StackUpdateLoc = MBBI;
MovingStackUpdateDown = false;
break;
}
}

// If the operation was not aborted then update the object offset.
if (MovingStackUpdateDown) {
for (CalleeSavedInfo CSI : Info) {
int FrIdx = CSI.getFrameIdx();
if (FrIdx < 0)
MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
}
}
}

// If we need to spill the CR and the LR but we don't have two separate
// registers available, we must spill them one at a time
if (MustSaveCR && SingleScratchReg && MustSaveLR) {
Expand Down Expand Up @@ -917,7 +1007,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
}

if (MustSaveLR)
BuildMI(MBB, MBBI, dl, StoreInst)
BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
.addReg(ScratchReg, getKillRegState(true))
.addImm(LROffset)
.addReg(SPReg);
Expand Down Expand Up @@ -985,7 +1075,7 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
HasSTUX = true;

} else if (!isLargeFrame) {
BuildMI(MBB, MBBI, dl, StoreUpdtInst, SPReg)
BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
.addReg(SPReg)
.addImm(NegFrameSize)
.addReg(SPReg);
Expand Down Expand Up @@ -1233,6 +1323,12 @@ void PPCFrameLowering::emitPrologue(MachineFunction &MF,
.addCFIIndex(CFIRegister);
} else {
int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
// We have changed the object offset above but we do not want to change
// the actual offsets in the CFI instruction so we have to undo the
// offset change here.
if (MovingStackUpdateDown)
Offset -= NegFrameSize;

unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
Expand Down Expand Up @@ -1379,6 +1475,32 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
unsigned RBReg = SPReg;
unsigned SPAdd = 0;

// Check if we can move the stack update instruction up the epilogue
// past the callee saves. This will allow the move to LR instruction
// to be executed before the restores of the callee saves which means
// that the callee saves can hide the latency from the MTLR instrcution.
MachineBasicBlock::iterator StackUpdateLoc = MBBI;
if (stackUpdateCanBeMoved(MF)) {
const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
for (CalleeSavedInfo CSI : Info) {
int FrIdx = CSI.getFrameIdx();
// If the frame index is not negative the callee saved info belongs to a
// stack object that is not a fixed stack object. We ignore non-fixed
// stack objects because we won't move the update of the stack pointer
// past them.
if (FrIdx >= 0)
continue;

if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
StackUpdateLoc--;
else {
// Abort the operation as we can't update all CSR restores.
StackUpdateLoc = MBBI;
break;
}
}
}

if (FrameSize) {
// In the prologue, the loaded (or persistent) stack pointer value is
// offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
Expand Down Expand Up @@ -1408,7 +1530,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
} else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
if (HasRedZone) {
BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
.addReg(SPReg)
.addImm(FrameSize);
} else {
Expand All @@ -1432,7 +1554,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(FPReg);
RBReg = FPReg;
}
BuildMI(MBB, MBBI, dl, LoadInst, RBReg)
BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
.addImm(0)
.addReg(SPReg);
}
Expand Down Expand Up @@ -1465,7 +1587,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
// a base register anyway, because it may happen to be R0.
bool LoadedLR = false;
if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
.addImm(LROffset+SPAdd)
.addReg(RBReg);
LoadedLR = true;
Expand Down Expand Up @@ -1537,7 +1659,7 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
.addReg(TempReg, getKillRegState(i == e-1));

if (MustSaveLR)
BuildMI(MBB, MBBI, dl, MTLRInst).addReg(ScratchReg);
BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);

// Callee pop calling convention. Pop parameter/linkage area. Used for tail
// call optimization
Expand Down Expand Up @@ -1946,7 +2068,7 @@ PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
// the 16-bit immediate. We don't know the complete frame size here
// because we've not yet computed callee-saved register spills or the
// needed alignment padding.
unsigned StackSize = determineFrameLayout(MF, false, true);
unsigned StackSize = determineFrameLayout(MF, true);
MachineFrameInfo &MFI = MF.getFrameInfo();
if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
Expand Down
23 changes: 20 additions & 3 deletions llvm/lib/Target/PowerPC/PPCFrameLowering.h
Expand Up @@ -72,12 +72,29 @@ class PPCFrameLowering: public TargetFrameLowering {
*/
void createTailCallBranchInstr(MachineBasicBlock &MBB) const;

/**
* Check if the conditions are correct to allow for the stack update
* to be moved past the CSR save/restore code.
*/
bool stackUpdateCanBeMoved(MachineFunction &MF) const;

public:
PPCFrameLowering(const PPCSubtarget &STI);

unsigned determineFrameLayout(MachineFunction &MF,
bool UpdateMF = true,
bool UseEstimate = false) const;
/**
* Determine the frame layout and update the machine function.
*/
unsigned determineFrameLayoutAndUpdate(MachineFunction &MF,
bool UseEstimate = false) const;

/**
* Determine the frame layout but do not update the machine function.
* The MachineFunction object can be const in this case as it is not
* modified.
*/
unsigned determineFrameLayout(const MachineFunction &MF,
bool UseEstimate = false,
unsigned *NewMaxCallFrameSize = nullptr) const;

/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/PowerPC/PPCInstrInfo.cpp
Expand Up @@ -1065,6 +1065,10 @@ unsigned PPCInstrInfo::getStoreOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float8Spill;
} else if (PPC::F4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.contains(Reg)) {
OpcodeIndex = SOK_SPESpill;
} else if (PPC::SPE4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
Expand Down Expand Up @@ -1151,6 +1155,10 @@ PPCInstrInfo::getLoadOpcodeForSpill(unsigned Reg,
OpcodeIndex = SOK_Float8Spill;
} else if (PPC::F4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_Float4Spill;
} else if (PPC::SPERCRegClass.contains(Reg)) {
OpcodeIndex = SOK_SPESpill;
} else if (PPC::SPE4RCRegClass.contains(Reg)) {
OpcodeIndex = SOK_SPE4Spill;
} else if (PPC::CRRCRegClass.contains(Reg)) {
OpcodeIndex = SOK_CRSpill;
} else if (PPC::CRBITRCRegClass.contains(Reg)) {
Expand Down

0 comments on commit bd5429e

Please sign in to comment.