Skip to content

Commit

Permalink
[PEI, AArch64] Use empty spaces in stack area for local stack slot al…
Browse files Browse the repository at this point in the history
…location.

Summary:
If the target requests it, use emptry spaces in the fixed and
callee-save stack area to allocate local stack objects.

AArch64: Change last callee-save reg stack object alignment instead of
size to leave a gap to take advantage of above change.

Reviewers: t.p.northover, qcolombet, MatzeB

Subscribers: rengolin, mcrosier, llvm-commits, aemerson

Differential Revision: http://reviews.llvm.org/D20220

llvm-svn: 271527
  • Loading branch information
geoffberry committed Jun 2, 2016
1 parent f509d85 commit 66f6b65
Show file tree
Hide file tree
Showing 7 changed files with 157 additions and 13 deletions.
7 changes: 7 additions & 0 deletions llvm/include/llvm/Target/TargetFrameLowering.h
Expand Up @@ -151,6 +151,13 @@ class TargetFrameLowering {
return false;
}

/// Returns true if the stack slot holes in the fixed and callee-save stack
/// area should be used when allocating other stack locations to reduce stack
/// size.
virtual bool enableStackSlotScavenging(const MachineFunction &MF) const {
return false;
}

/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
virtual void emitPrologue(MachineFunction &MF,
Expand Down
127 changes: 122 additions & 5 deletions llvm/lib/CodeGen/PrologEpilogInserter.cpp
Expand Up @@ -577,6 +577,108 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
}
}

/// Compute which bytes of fixed and callee-save stack area are unused and keep
/// track of them in StackBytesFree.
///
static inline void
computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
int64_t FixedCSEnd, BitVector &StackBytesFree) {
// Avoid undefined int64_t -> int conversion below in extreme case.
if (FixedCSEnd > std::numeric_limits<int>::max())
return;

StackBytesFree.resize(FixedCSEnd, true);

SmallVector<int, 16> AllocatedFrameSlots;
// Add fixed objects.
for (int i = MFI->getObjectIndexBegin(); i != 0; ++i)
AllocatedFrameSlots.push_back(i);
// Add callee-save objects.
for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
AllocatedFrameSlots.push_back(i);

for (int i : AllocatedFrameSlots) {
// These are converted from int64_t, but they should always fit in int
// because of the FixedCSEnd check above.
int ObjOffset = MFI->getObjectOffset(i);
int ObjSize = MFI->getObjectSize(i);
int ObjStart, ObjEnd;
if (StackGrowsDown) {
// ObjOffset is negative when StackGrowsDown is true.
ObjStart = -ObjOffset - ObjSize;
ObjEnd = -ObjOffset;
} else {
ObjStart = ObjOffset;
ObjEnd = ObjOffset + ObjSize;
}
// Ignore fixed holes that are in the previous stack frame.
if (ObjEnd > 0)
StackBytesFree.reset(ObjStart, ObjEnd);
}
}

/// Assign frame object to an unused portion of the stack in the fixed stack
/// object range. Return true if the allocation was successful.
///
static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
bool StackGrowsDown, unsigned MaxAlign,
BitVector &StackBytesFree) {
if (MFI->isVariableSizedObjectIndex(FrameIdx))
return false;

if (StackBytesFree.none()) {
// clear it to speed up later scavengeStackSlot calls to
// StackBytesFree.none()
StackBytesFree.clear();
return false;
}

unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx);
if (ObjAlign > MaxAlign)
return false;

int64_t ObjSize = MFI->getObjectSize(FrameIdx);
int FreeStart;
for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
FreeStart = StackBytesFree.find_next(FreeStart)) {

// Check that free space has suitable alignment.
unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart;
if (alignTo(ObjStart, ObjAlign) != ObjStart)
continue;

if (FreeStart + ObjSize > StackBytesFree.size())
return false;

bool AllBytesFree = true;
for (unsigned Byte = 0; Byte < ObjSize; ++Byte)
if (!StackBytesFree.test(FreeStart + Byte)) {
AllBytesFree = false;
break;
}
if (AllBytesFree)
break;
}

if (FreeStart == -1)
return false;

if (StackGrowsDown) {
int ObjStart = -(FreeStart + ObjSize);
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
<< "]\n");
MFI->setObjectOffset(FrameIdx, ObjStart);
} else {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
<< "]\n");
MFI->setObjectOffset(FrameIdx, FreeStart);
}

StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
return true;
}

/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
/// those required to be close to the Stack Protector) to stack offsets.
static void
Expand Down Expand Up @@ -621,9 +723,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {

// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
// We currently don't support filling in holes in between fixed sized
// objects, so we adjust 'Offset' to point to the end of last fixed sized
// preallocated object.
// Adjust 'Offset' to point to the end of last fixed sized preallocated
// object.
for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
int64_t FixedOff;
if (StackGrowsDown) {
Expand Down Expand Up @@ -667,6 +768,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
}

// FixedCSEnd is the stack offset to the end of the fixed and callee-save
// stack area.
int64_t FixedCSEnd = Offset;
unsigned MaxAlign = MFI->getMaxAlignment();

// Make sure the special register scavenging spill slot is closest to the
Expand Down Expand Up @@ -798,10 +902,23 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
if (Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
Fn.getTarget().Options.StackSymbolOrdering)
TFI.orderFrameObjects(Fn, ObjectsToAllocate);


// Keep track of which bytes in the fixed and callee-save range are used so we
// can use the holes when allocating later stack objects. Only do this if
// stack protector isn't being used and the target requests it and we're
// optimizing.
BitVector StackBytesFree;
if (!ObjectsToAllocate.empty() &&
Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
FixedCSEnd, StackBytesFree);

// Now walk the objects and actually assign base offsets to them.
for (auto &Object : ObjectsToAllocate)
AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
StackBytesFree))
AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);

// Make sure the special register scavenging spill slot is closest to the
// stack pointer.
Expand Down
9 changes: 8 additions & 1 deletion llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Expand Up @@ -942,7 +942,8 @@ static void computeCalleeSaveRegisterPairs(
// callee-save area to ensure 16-byte alignment.
Offset -= 16;
assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
MFI->setObjectSize(RPI.FrameIdx, 16);
MFI->setObjectAlignment(RPI.FrameIdx, 16);
AFI->setCalleeSaveStackHasFreeSpace(true);
} else
Offset -= RPI.isPaired() ? 16 : 8;
assert(Offset % 8 == 0);
Expand Down Expand Up @@ -1190,3 +1191,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// instructions.
AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
}

bool AArch64FrameLowering::enableStackSlotScavenging(
const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return AFI->hasCalleeSaveStackFreeSpace();
}
2 changes: 2 additions & 0 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.h
Expand Up @@ -67,6 +67,8 @@ class AArch64FrameLowering : public TargetFrameLowering {
return true;
}

bool enableStackSlotScavenging(const MachineFunction &MF) const override;

private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;
Expand Down
17 changes: 15 additions & 2 deletions llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
Expand Up @@ -83,18 +83,24 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
/// frame is unknown at compile time. e.g., in case of VLAs.
bool StackRealigned;

/// True when the callee-save stack area has unused gaps that may be used for
/// other stack allocations.
bool CalleeSaveStackHasFreeSpace;

public:
AArch64FunctionInfo()
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
IsSplitCSR(false), StackRealigned(false) {}
IsSplitCSR(false), StackRealigned(false),
CalleeSaveStackHasFreeSpace(false) {}

explicit AArch64FunctionInfo(MachineFunction &MF)
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
IsSplitCSR(false), StackRealigned(false) {
IsSplitCSR(false), StackRealigned(false),
CalleeSaveStackHasFreeSpace(false) {
(void)MF;
}

Expand All @@ -112,6 +118,13 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
bool isStackRealigned() const { return StackRealigned; }
void setStackRealigned(bool s) { StackRealigned = s; }

bool hasCalleeSaveStackFreeSpace() const {
return CalleeSaveStackHasFreeSpace;
}
void setCalleeSaveStackHasFreeSpace(bool s) {
CalleeSaveStackHasFreeSpace = s;
}

bool isSplitCSR() const { return IsSplitCSR; }
void setIsSplitCSR(bool s) { IsSplitCSR = s; }

Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AArch64/aarch64-dynamic-stack-layout.ll
Expand Up @@ -674,7 +674,7 @@ bb1:

define void @realign_conditional2(i1 %b) {
entry:
%tmp = alloca i8, i32 4
%tmp = alloca i8, i32 16
br i1 %b, label %bb0, label %bb1

bb0:
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AArch64/arm64-hello.ll
Expand Up @@ -14,14 +14,12 @@
; CHECK-NEXT: ret

; CHECK-LINUX-LABEL: main:
; CHECK-LINUX: sub sp, sp, #32
; CHECK-LINUX-NEXT: str x30, [sp, #16]
; CHECK-LINUX: str x30, [sp, #-16]!
; CHECK-LINUX-NEXT: str wzr, [sp, #12]
; CHECK-LINUX: adrp x0, .L.str
; CHECK-LINUX: add x0, x0, :lo12:.L.str
; CHECK-LINUX-NEXT: bl puts
; CHECK-LINUX-NEXT: ldr x30, [sp, #16]
; CHECK-LINUX-NEXT: add sp, sp, #32
; CHECK-LINUX-NEXT: ldr x30, [sp], #16
; CHECK-LINUX-NEXT: ret

@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"
Expand Down

0 comments on commit 66f6b65

Please sign in to comment.