Skip to content

Commit

Permalink
[AArch64] Implement stack probing for windows
Browse files Browse the repository at this point in the history
Differential Revision: https://reviews.llvm.org/D41131

llvm-svn: 321150
  • Loading branch information
mstorsjo committed Dec 20, 2017
1 parent 6528fb8 commit 2778fd0
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 9 deletions.
28 changes: 28 additions & 0 deletions llvm/docs/Extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -288,3 +288,31 @@ standard stack probe emission.

The MSVC environment does not emit code for VLAs currently.

Windows on ARM64
----------------

Stack Probe Emission
^^^^^^^^^^^^^^^^^^^^

The reference implementation (Microsoft Visual Studio 2017) emits stack probes
in the following fashion:

.. code-block:: gas
mov x15, #constant
bl __chkstk
sub sp, sp, x15, lsl #4
However, this has the limitation of 256 MiB (±128MiB). In order to accommodate
larger binaries, LLVM supports the use of ``-mcode-model=large`` to allow a 8GiB
(±4GiB) range via a slight deviation. It will generate an indirect jump as
follows:

.. code-block:: gas
mov x15, #constant
adrp x16, __chkstk
add x16, x16, :lo12:__chkstk
blr x16
sub sp, sp, x15, lsl #4
86 changes: 77 additions & 9 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
#include "AArch64RegisterInfo.h"
#include "AArch64Subtarget.h"
#include "AArch64TargetMachine.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/LivePhysRegs.h"
Expand Down Expand Up @@ -335,6 +336,22 @@ bool AArch64FrameLowering::canUseAsPrologue(
return findScratchNonCalleeSaveRegister(TmpMBB) != AArch64::NoRegister;
}

static bool windowsRequiresStackProbe(MachineFunction &MF,
unsigned StackSizeInBytes) {
const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
if (!Subtarget.isTargetWindows())
return false;
const Function &F = MF.getFunction();
// TODO: When implementing stack protectors, take that into account
// for the probe threshold.
unsigned StackProbeSize = 4096;
if (F.hasFnAttribute("stack-probe-size"))
F.getFnAttribute("stack-probe-size")
.getValueAsString()
.getAsInteger(0, StackProbeSize);
return StackSizeInBytes >= StackProbeSize;
}

bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
MachineFunction &MF, unsigned StackBumpBytes) const {
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
Expand All @@ -347,7 +364,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(

// 512 is the maximum immediate for stp/ldp that will be used for
// callee-save save/restores
if (StackBumpBytes >= 512)
if (StackBumpBytes >= 512 || windowsRequiresStackProbe(MF, StackBumpBytes))
return false;

if (MFI.hasVarSizedObjects())
Expand Down Expand Up @@ -478,7 +495,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
return;

int NumBytes = (int)MFI.getStackSize();
if (!AFI->hasStackFrame()) {
if (!AFI->hasStackFrame() && !windowsRequiresStackProbe(MF, NumBytes)) {
assert(!HasFP && "unexpected function without stack frame but with FP");

// All of the stack allocation is for locals.
Expand Down Expand Up @@ -550,6 +567,44 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
MachineInstr::FrameSetup);
}

if (windowsRequiresStackProbe(MF, NumBytes)) {
uint32_t NumWords = NumBytes >> 4;

BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), AArch64::X15)
.addImm(NumWords)
.setMIFlags(MachineInstr::FrameSetup);

switch (MF.getTarget().getCodeModel()) {
case CodeModel::Small:
case CodeModel::Medium:
case CodeModel::Kernel:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
.addExternalSymbol("__chkstk")
.addReg(AArch64::X15, RegState::Implicit)
.setMIFlags(MachineInstr::FrameSetup);
break;
case CodeModel::Large:
BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVaddrEXT))
.addReg(AArch64::X16, RegState::Define)
.addExternalSymbol("__chkstk")
.addExternalSymbol("__chkstk")
.setMIFlags(MachineInstr::FrameSetup);

BuildMI(MBB, MBBI, DL, TII->get(AArch64::BLR))
.addReg(AArch64::X16, RegState::Kill)
.addReg(AArch64::X15, RegState::Implicit | RegState::Define)
.setMIFlags(MachineInstr::FrameSetup);
break;
}

BuildMI(MBB, MBBI, DL, TII->get(AArch64::SUBXrx64), AArch64::SP)
.addReg(AArch64::SP, RegState::Kill)
.addReg(AArch64::X15, RegState::Kill)
.addImm(AArch64_AM::getArithExtendImm(AArch64_AM::UXTX, 4))
.setMIFlags(MachineInstr::FrameSetup);
NumBytes = 0;
}

// Allocate space for the rest of the frame.
if (NumBytes) {
const bool NeedsRealignment = RegInfo->needsStackRealignment(MF);
Expand Down Expand Up @@ -1164,18 +1219,32 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
unsigned UnspilledCSGPR = AArch64::NoRegister;
unsigned UnspilledCSGPRPaired = AArch64::NoRegister;

MachineFrameInfo &MFI = MF.getFrameInfo();
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);

unsigned BasePointerReg = RegInfo->hasBasePointer(MF)
? RegInfo->getBaseRegister()
: (unsigned)AArch64::NoRegister;

unsigned SpillEstimate = SavedRegs.count();
for (unsigned i = 0; CSRegs[i]; ++i) {
unsigned Reg = CSRegs[i];
unsigned PairedReg = CSRegs[i ^ 1];
if (Reg == BasePointerReg)
SpillEstimate++;
if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
SpillEstimate++;
}
SpillEstimate += 2; // Conservatively include FP+LR in the estimate
unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;

// The frame record needs to be created by saving the appropriate registers
if (hasFP(MF)) {
if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
SavedRegs.set(AArch64::FP);
SavedRegs.set(AArch64::LR);
}

unsigned BasePointerReg = AArch64::NoRegister;
if (RegInfo->hasBasePointer(MF))
BasePointerReg = RegInfo->getBaseRegister();

unsigned ExtraCSSpill = 0;
const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
// Figure out which callee-saved registers to save/restore.
for (unsigned i = 0; CSRegs[i]; ++i) {
const unsigned Reg = CSRegs[i];
Expand Down Expand Up @@ -1217,7 +1286,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,

// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo &MFI = MF.getFrameInfo();
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
Expand Down
25 changes: 25 additions & 0 deletions llvm/test/CodeGen/AArch64/chkstk.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs %s -o - \
; RUN: | FileCheck -check-prefix CHECK-DEFAULT-CODE-MODEL %s

; RUN: llc -mtriple=aarch64-windows -verify-machineinstrs -code-model=large %s -o - \
; RUN: | FileCheck -check-prefix CHECK-LARGE-CODE-MODEL %s

define void @check_watermark() {
entry:
%buffer = alloca [4096 x i8], align 1
ret void
}

; CHECK-DEFAULT-CODE-MODEL: check_watermark:
; CHECK-DEFAULT-CODE-MODEL-DAG: stp x29, x30, [sp
; CHECK-DEFAULT-CODE-MODEL-DAG: orr x15, xzr, #0x100
; CHECK-DEFAULT-CODE-MODEL: bl __chkstk
; CHECK-DEFAULT-CODE-MODEL: sub sp, sp, x15, lsl #4

; CHECK-LARGE-CODE-MODEL: check_watermark:
; CHECK-LARGE-CODE-MODEL-DAG: stp x29, x30, [sp
; CHECK-LARGE-CODE-MODEL-DAG: orr x15, xzr, #0x100
; CHECK-LARGE-CODE-MODEL-DAG: adrp x16, __chkstk
; CHECK-LARGE-CODE-MODEL-DAG: add x16, x16, __chkstk
; CHECK-LARGE-CODE-MODEL: blr x16
; CHECK-LARGE-CODE-MODEL: sub sp, sp, x15, lsl #4

0 comments on commit 2778fd0

Please sign in to comment.