Skip to content

Commit

Permalink
[z/OS] Implement prologue and epilogue generation for z/OS target.
Browse files Browse the repository at this point in the history
This patch adds support for prologue and epilogue generation for
the z/OS target under the XPLINK64 ABI for functions with a stack
size of less than 1048576 bytes (huge stack frames).

Reviewed by: uweigand, Kai

Differential Revision: https://reviews.llvm.org/D114457
  • Loading branch information
Everybody0523 authored and redstar committed Dec 13, 2021
1 parent fd0b00b commit ffad4d7
Show file tree
Hide file tree
Showing 7 changed files with 273 additions and 11 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/SystemZ/SystemZCallingConv.td
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ def CSR_SystemZ_NoRegs : CalleeSavedRegs<(add)>;
// any non-leaf function and restored in the epilogue for use by the
// return instruction so it functions exactly like a callee-saved register.
def CSR_SystemZ_XPLINK64 : CalleeSavedRegs<(add (sequence "R%dD", 7, 15),
(sequence "R%dD", 4, 4),
(sequence "F%dD", 15, 8))>;

def CSR_SystemZ_XPLINK64_Vector : CalleeSavedRegs<(add CSR_SystemZ_XPLINK64,
Expand Down
179 changes: 175 additions & 4 deletions llvm/lib/Target/SystemZ/SystemZFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -818,7 +818,7 @@ bool SystemZELFFrameLowering::usePackedStack(MachineFunction &MF) const {
}

SystemZXPLINKFrameLowering::SystemZXPLINKFrameLowering()
: SystemZFrameLowering(TargetFrameLowering::StackGrowsUp, Align(32), 128,
: SystemZFrameLowering(TargetFrameLowering::StackGrowsDown, Align(32), 0,
Align(32), /* StackRealignable */ false),
RegSpillOffsets(-1) {

Expand Down Expand Up @@ -990,12 +990,183 @@ bool SystemZXPLINKFrameLowering::spillCalleeSavedRegisters(
return true;
}

bool SystemZXPLINKFrameLowering::restoreCalleeSavedRegisters(
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {

if (CSI.empty())
return false;

MachineFunction &MF = *MBB.getParent();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
const TargetInstrInfo *TII = Subtarget.getInstrInfo();
auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();

DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc();

// Restore FPRs in the normal TargetInstrInfo way.
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
unsigned Reg = CSI[I].getReg();
if (SystemZ::FP64BitRegClass.contains(Reg))
TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
&SystemZ::FP64BitRegClass, TRI);
if (SystemZ::VR128BitRegClass.contains(Reg))
TII->loadRegFromStackSlot(MBB, MBBI, Reg, CSI[I].getFrameIdx(),
&SystemZ::VR128BitRegClass, TRI);
}

// Restore call-saved GPRs (but not call-clobbered varargs, which at
// this point might hold return values).
SystemZ::GPRRegs RestoreGPRs = ZFI->getRestoreGPRRegs();
if (RestoreGPRs.LowGPR) {
assert(isInt<20>(Regs.getStackPointerBias() + RestoreGPRs.GPROffset));
if (RestoreGPRs.LowGPR == RestoreGPRs.HighGPR)
// Build an LG/L instruction.
BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LG), RestoreGPRs.LowGPR)
.addReg(Regs.getStackPointerRegister())
.addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset)
.addReg(0);
else {
// Build an LMG/LM instruction.
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(SystemZ::LMG));

// Add the explicit register operands.
MIB.addReg(RestoreGPRs.LowGPR, RegState::Define);
MIB.addReg(RestoreGPRs.HighGPR, RegState::Define);

// Add the address.
MIB.addReg(Regs.getStackPointerRegister());
MIB.addImm(Regs.getStackPointerBias() + RestoreGPRs.GPROffset);

// Do a second scan adding regs as being defined by instruction
for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
unsigned Reg = CSI[I].getReg();
if (Reg > RestoreGPRs.LowGPR && Reg < RestoreGPRs.HighGPR)
MIB.addReg(Reg, RegState::ImplicitDefine);
}
}
}

return true;
}

void SystemZXPLINKFrameLowering::emitPrologue(MachineFunction &MF,
MachineBasicBlock &MBB) const {}
MachineBasicBlock &MBB) const {
assert(&MF.front() == &MBB && "Shrink-wrapping not yet supported");
const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineBasicBlock::iterator MBBI = MBB.begin();
auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
MachineInstr *StoreInstr = nullptr;
bool HasFP = hasFP(MF);
// Debug location must be unknown since the first debug location is used
// to determine the end of the prologue.
DebugLoc DL;
uint64_t Offset = 0;

// TODO: Support leaf functions; only add size of save+reserved area when
// function is non-leaf.
MFFrame.setStackSize(MFFrame.getStackSize() + Regs.getCallFrameSize());
uint64_t StackSize = MFFrame.getStackSize();

// FIXME: Implement support for large stack sizes, when the stack extension
// routine needs to be called.
if (StackSize > 1024 * 1024) {
llvm_unreachable("Huge Stack Frame not yet supported on z/OS");
}

if (ZFI->getSpillGPRRegs().LowGPR) {
// Skip over the GPR saves.
if ((MBBI != MBB.end()) && ((MBBI->getOpcode() == SystemZ::STMG))) {
const int Operand = 3;
// Now we can set the offset for the operation, since now the Stack
// has been finalized.
Offset = Regs.getStackPointerBias() + MBBI->getOperand(Operand).getImm();
// Maximum displacement for STMG instruction.
if (isInt<20>(Offset - StackSize))
Offset -= StackSize;
else
StoreInstr = &*MBBI;
MBBI->getOperand(Operand).setImm(Offset);
++MBBI;
} else
llvm_unreachable("Couldn't skip over GPR saves");
}

if (StackSize) {
MachineBasicBlock::iterator InsertPt = StoreInstr ? StoreInstr : MBBI;
// Allocate StackSize bytes.
int64_t Delta = -int64_t(StackSize);

// In case the STM(G) instruction also stores SP (R4), but the displacement
// is too large, the SP register is manipulated first before storing,
// resulting in the wrong value stored and retrieved later. In this case, we
// need to temporarily save the value of SP, and store it later to memory.
if (StoreInstr && HasFP) {
// Insert LR r0,r4 before STMG instruction.
BuildMI(MBB, InsertPt, DL, ZII->get(SystemZ::LGR))
.addReg(SystemZ::R0D, RegState::Define)
.addReg(SystemZ::R4D);
// Insert ST r0,xxx(,r4) after STMG instruction.
BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::STG), SystemZ::R0D)
.addReg(SystemZ::R4D)
.addImm(Offset)
.addReg(0);
}

emitIncrement(MBB, InsertPt, DL, Regs.getStackPointerRegister(), Delta,
ZII);
}

if (HasFP) {
// Copy the base of the frame to Frame Pointer Register.
BuildMI(MBB, MBBI, DL, ZII->get(SystemZ::LGR),
Regs.getFramePointerRegister())
.addReg(Regs.getStackPointerRegister());

// Mark the FramePtr as live at the beginning of every block except
// the entry block. (We'll have marked R8 as live on entry when
// saving the GPRs.)
for (auto I = std::next(MF.begin()), E = MF.end(); I != E; ++I)
I->addLiveIn(Regs.getFramePointerRegister());
}
}

void SystemZXPLINKFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {}
MachineBasicBlock &MBB) const {
const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
SystemZMachineFunctionInfo *ZFI = MF.getInfo<SystemZMachineFunctionInfo>();
MachineFrameInfo &MFFrame = MF.getFrameInfo();
auto *ZII = static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();

// Skip the return instruction.
assert(MBBI->isReturn() && "Can only insert epilogue into returning blocks");

uint64_t StackSize = MFFrame.getStackSize();
if (StackSize) {
unsigned SPReg = Regs.getStackPointerRegister();
if (ZFI->getRestoreGPRRegs().LowGPR != SPReg) {
DebugLoc DL = MBBI->getDebugLoc();
emitIncrement(MBB, MBBI, DL, SPReg, StackSize, ZII);
}
}
}

bool SystemZXPLINKFrameLowering::hasFP(const MachineFunction &MF) const {
return false;
return (MF.getFrameInfo().hasVarSizedObjects());
}

void SystemZXPLINKFrameLowering::processFunctionBeforeFrameFinalized(
MachineFunction &MF, RegScavenger *RS) const {
MachineFrameInfo &MFFrame = MF.getFrameInfo();
const SystemZSubtarget &Subtarget = MF.getSubtarget<SystemZSubtarget>();
auto &Regs = Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();

// Setup stack frame offset
MFFrame.setOffsetAdjustment(Regs.getStackPointerBias());
}
9 changes: 9 additions & 0 deletions llvm/lib/Target/SystemZ/SystemZFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,11 +115,20 @@ class SystemZXPLINKFrameLowering : public SystemZFrameLowering {
ArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;

bool
restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBII,
MutableArrayRef<CalleeSavedInfo> CSI,
const TargetRegisterInfo *TRI) const override;

void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const override;

void emitPrologue(MachineFunction &MF, MachineBasicBlock &MBB) const override;

bool hasFP(const MachineFunction &MF) const override;

void processFunctionBeforeFrameFinalized(MachineFunction &MF,
RegScavenger *RS) const override;
};
} // end namespace llvm

Expand Down
12 changes: 10 additions & 2 deletions llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1500,8 +1500,16 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
assert(VA.isMemLoc() && "Argument not register or memory");

// Create the frame index object for this incoming parameter.
int FI = MFI.CreateFixedObject(LocVT.getSizeInBits() / 8,
VA.getLocMemOffset(), true);
// FIXME: Pre-include call frame size in the offset, should not
// need to manually add it here.
int64_t ArgSPOffset = VA.getLocMemOffset();
if (Subtarget.isTargetXPLINK64()) {
auto &XPRegs =
Subtarget.getSpecialRegisters<SystemZXPLINK64Registers>();
ArgSPOffset += XPRegs.getCallFrameSize();
}
int FI =
MFI.CreateFixedObject(LocVT.getSizeInBits() / 8, ArgSPOffset, true);

// Create the SelectionDAG nodes corresponding to a load
// from this parameter. Unpromoted ints and floats are
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/SystemZ/call-zos-01.ll
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ entry:
}

; CHECK-LABEL: pass_integrals0:
; CHECK: ag 2, -{{[0-9]+}}(4)
; CHECK: ag 2, 2328(4)
; CHECK-NEXT: lgr 3, 2
define signext i64 @pass_integrals0(i64 signext %arg0, i32 signext %arg1, i16 signext %arg2, i64 signext %arg3) {
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/SystemZ/call-zos-vec.ll
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ entry:
; CHECK: vaf 1, 1, 27
; CHECK: vaf 1, 1, 28
; CHECK: vaf 1, 1, 29
; CHECK: vl 0, 32(4), 4
; CHECK: vl 0, 2432(4), 4
; CHECK: vaf 1, 1, 30
; CHECK: vaf 1, 1, 31
; CHECK: vaf 24, 1, 0
Expand Down
79 changes: 76 additions & 3 deletions llvm/test/CodeGen/SystemZ/zos-prologue-epilog.ll
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,27 @@

; Small stack frame.
; CHECK-LABEL: func0
; CHECK64: stmg 6, 7
; CHECK64: stmg 6, 7, 1872(4)
; stmg instruction's displacement field must be 2064-dsa_size
; as per ABI
; CHECK64: aghi 4, -192

; CHECK64: lg 7, 2072(4)
; CHECK64: aghi 4, 192
; CHECK64: b 2(7)
define void @func0() {
call i64 (i64) @fun(i64 10)
ret void
}

; Spill all GPR CSRs
; CHECK-LABEL: func1
; CHECK64: stmg 6, 15
; CHECK64: stmg 6, 15, 1904(4)
; CHECK64: aghi 4, -160

; CHECK64: lmg 7, 15, 2072(4)
; CHECK64: aghi 4, 160
; CHECK64: b 2(7)
define void @func1(i64 *%ptr) {
%l01 = load volatile i64, i64 *%ptr
%l02 = load volatile i64, i64 *%ptr
Expand Down Expand Up @@ -67,6 +79,8 @@ define void @func1(i64 *%ptr) {

; Spill all FPRs and VRs
; CHECK-LABEL: func2
; CHECK64: stmg 6, 7, 1744(4)
; CHECK64: aghi 4, -320
; CHECK64: std 15, {{[0-9]+}}(4) * 8-byte Folded Spill
; CHECK64: std 14, {{[0-9]+}}(4) * 8-byte Folded Spill
; CHECK64: std 13, {{[0-9]+}}(4) * 8-byte Folded Spill
Expand All @@ -83,6 +97,27 @@ define void @func1(i64 *%ptr) {
; CHECK64: vst 18, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
; CHECK64: vst 17, {{[0-9]+}}(4), 4 * 16-byte Folded Spill
; CHECK64: vst 16, {{[0-9]+}}(4), 4 * 16-byte Folded Spill

; CHECK64: ld 15, {{[0-9]+}}(4) * 8-byte Folded Reload
; CHECK64: ld 14, {{[0-9]+}}(4) * 8-byte Folded Reload
; CHECK64: ld 13, {{[0-9]+}}(4) * 8-byte Folded Reload
; CHECK64: ld 12, {{[0-9]+}}(4) * 8-byte Folded Reload
; CHECK64: ld 11, {{[0-9]+}}(4) * 8-byte Folded Reload
; CHECK64: ld 10, {{[0-9]+}}(4) * 8-byte Folded Reload
; CHECK64: ld 9, {{[0-9]+}}(4) * 8-byte Folded Reload
; CHECK64: ld 8, {{[0-9]+}}(4) * 8-byte Folded Reload
; CHECK64: vl 23, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
; CHECK64: vl 22, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
; CHECK64: vl 21, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
; CHECK64: vl 20, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
; CHECK64: vl 19, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
; CHECK64: vl 18, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
; CHECK64: vl 17, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
; CHECK64: vl 16, {{[0-9]+}}(4), 4 * 16-byte Folded Reload
; CHECK64: lg 7, 2072(4)
; CHECK64: aghi 4, 320
; CHECK64: b 2(7)

define void @func2(double *%ptr, <2 x i64> *%vec_ptr) {
%l00 = load volatile double, double *%ptr
%l01 = load volatile double, double *%ptr
Expand Down Expand Up @@ -232,5 +267,43 @@ define void @func2(double *%ptr, <2 x i64> *%vec_ptr) {
ret void
}

declare i64 @fun(i64 %arg0)
; Big stack frame, force the use of agfi before stmg
; despite not requiring stack extension routine.
; CHECK64: agfi 4, -1040768
; CHECK64: stmg 6, 7, 2064(4)
; CHECK64: agfi 4, 1040768
define void @func3() {
%arr = alloca [130070 x i64], align 8
%ptr = bitcast [130070 x i64]* %arr to i8*
call i64 (i8*) @fun1(i8* %ptr)
ret void
}

; Requires the saving of r4 due to variable sized
; object in stack frame. (Eg: VLA)
; CHECK64: stmg 4, 8, 1856(4)
; CHECK64: aghi 4, -192
; CHECK64: lmg 4, 8, 2048(4)
define i64 @func4(i64 %n) {
%vla = alloca i64, i64 %n, align 8
%call = call i64 @fun2(i64 %n, i64* nonnull %vla, i64* nonnull %vla)
ret i64 %call
}

; Require saving of r4 and in addition, a displacement large enough
; to force use of agfi before stmg.
; CHECK64: lgr 0, 4
; CHECK64: agfi 4, -1040192
; CHECK64: stmg 4, 8, 2048(4)
; CHECK64: lmg 4, 8, 2048(4)
define i64 @func5(i64 %n) {
%vla = alloca i64, i64 %n, align 8
%arr = alloca [130000 x i64], align 8
%ptr = bitcast [130000 x i64]* %arr to i64*
%call = call i64 @fun2(i64 %n, i64* nonnull %vla, i64* %ptr)
ret i64 %call
}

declare i64 @fun(i64 %arg0)
declare i64 @fun1(i8* %ptr)
declare i64 @fun2(i64 %n, i64* %arr0, i64* %arr1)

0 comments on commit ffad4d7

Please sign in to comment.