503 changes: 300 additions & 203 deletions llvm/lib/Target/ARM/ARMFrameLowering.cpp

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions llvm/lib/Target/ARM/ARMFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,12 @@ class ARMFrameLowering : public TargetFrameLowering {
private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
ArrayRef<CalleeSavedInfo> CSI, unsigned StmOpc,
unsigned StrOpc, bool NoGap, bool (*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs, unsigned MIFlags = 0) const;
unsigned StrOpc, bool NoGap,
function_ref<bool(unsigned)> Func) const;
void emitPopInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
MutableArrayRef<CalleeSavedInfo> CSI, unsigned LdmOpc,
unsigned LdrOpc, bool isVarArg, bool NoGap,
bool (*Func)(unsigned, bool),
unsigned NumAlignedDPRCS2Regs) const;
function_ref<bool(unsigned)> Func) const;

MachineBasicBlock::iterator
eliminateCallFramePseudoInstr(MachineFunction &MF,
Expand Down
33 changes: 28 additions & 5 deletions llvm/lib/Target/ARM/ARMSubtarget.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -485,11 +485,34 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF,
ARM::GPRRegClass.contains(PhysReg);
}

bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const {
ARMSubtarget::PushPopSplitVariation
ARMSubtarget::getPushPopSplitVariation(const MachineFunction &MF) const {
const Function &F = MF.getFunction();
if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() ||
!F.needsUnwindTableEntry())
return false;
const MachineFrameInfo &MFI = MF.getFrameInfo();
return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF);
const std::vector<CalleeSavedInfo> CSI =
MF.getFrameInfo().getCalleeSavedInfo();

// Returns SplitR7 if the frame setup must be split into two separate pushes
// of r0-r7,lr and another containing r8-r11 (+r12 if necessary). This is
// always required on Thumb1-only targets, as the push and pop instructions
// can't access the high registers. This is also required when R7 is the frame
// pointer and frame pointer elimiination is disabled, or branch signing is
// enabled and AAPCS is disabled.
if ((MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress() &&
!createAAPCSFrameChain()) ||
(getFramePointerReg() == ARM::R7 &&
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
isThumb1Only())
return SplitR7;

// Returns SplitR11WindowsSEH when the stack pointer needs to be
// restored from the frame pointer r11 + an offset and Windows CFI is enabled.
// This stack unwinding cannot be expressed with SEH unwind opcodes when done
// with a single push, making it necessary to split the push into r4-r10, and
// another containing r11+lr.
if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI() &&
F.needsUnwindTableEntry() &&
(MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF)))
return SplitR11WindowsSEH;
return NoSplit;
}
41 changes: 28 additions & 13 deletions llvm/lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,32 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
SingleIssuePlusExtras,
};

/// How the push and pop instructions of callee saved general-purpose
/// registers should be split.
enum PushPopSplitVariation {
/// All GPRs can be pushed in a single instruction.
/// push {r0-r12, lr}
/// vpush {d8-d15}
NoSplit,

/// R7 and LR must be adjacent, because R7 is the frame pointer, and must
/// point to a frame record consisting of the previous frame pointer and the
/// return address.
/// push {r0-r7, lr}
/// push {r8-r12}
/// vpush {d8-d15}
SplitR7,

/// When the stack frame size is not known (because of variable-sized
/// objects or realignment), Windows SEH requires the callee-saved registers
/// to be stored in three regions, with R11 and LR below the floating-point
/// registers.
/// push {r0-r10, r12}
/// vpush {d8-d15}
/// push {r11, lr}
SplitR11WindowsSEH,
};

protected:
// Bool members corresponding to the SubtargetFeatures defined in tablegen
#define GET_SUBTARGETINFO_MACRO(ATTRIBUTE, DEFAULT, GETTER) \
Expand Down Expand Up @@ -371,19 +397,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
return ARM::R11;
}

/// Returns true if the frame setup is split into two separate pushes (first
/// r0-r7,lr then r8-r11), principally so that the frame pointer is adjacent
/// to lr. This is always required on Thumb1-only targets, as the push and
/// pop instructions can't access the high registers.
bool splitFramePushPop(const MachineFunction &MF) const {
if (MF.getInfo<ARMFunctionInfo>()->shouldSignReturnAddress())
return true;
return (getFramePointerReg() == ARM::R7 &&
MF.getTarget().Options.DisableFramePointerElim(MF)) ||
isThumb1Only();
}

bool splitFramePointerPush(const MachineFunction &MF) const;
enum PushPopSplitVariation
getPushPopSplitVariation(const MachineFunction &MF) const;

bool useStride4VFPs() const;

Expand Down
13 changes: 5 additions & 8 deletions llvm/lib/Target/ARM/Thumb1FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
assert(NumBytes >= ArgRegsSaveSize &&
"ArgRegsSaveSize is included in NumBytes");
const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
assert(STI.getPushPopSplitVariation(MF) == ARMSubtarget::SplitR7 &&
"Must use R7 spilt for Thumb1");

// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
Expand Down Expand Up @@ -221,11 +223,8 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R8:
case ARM::R9:
case ARM::R10:
if (STI.splitFramePushPop(MF)) {
GPRCS2Size += 4;
break;
}
[[fallthrough]];
GPRCS2Size += 4;
break;
case ARM::LR:
if (HasFrameRecordArea) {
FRSize += 4;
Expand Down Expand Up @@ -365,9 +364,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF,
case ARM::R10:
case ARM::R11:
case ARM::R12:
if (STI.splitFramePushPop(MF))
break;
[[fallthrough]];
break;
case ARM::R0:
case ARM::R1:
case ARM::R2:
Expand Down