Skip to content

Commit

Permalink
[AArch64] Fix fastcc -tailcallopt epilog code generation.
Browse files Browse the repository at this point in the history
Summary:
Fix a bug in epilog generation where the incoming stack arguments were
not being popped for fastcc functions when -tailcallopt was passed.

Reviewers: t.p.northover, mcrosier, jmolloy, rengolin

Subscribers: aemerson, rengolin, mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D16894

llvm-svn: 261650
  • Loading branch information
geoffberry committed Feb 23, 2016
1 parent 40e7ba0 commit a1c6269
Show file tree
Hide file tree
Showing 2 changed files with 115 additions and 6 deletions.
29 changes: 23 additions & 6 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Expand Up @@ -607,7 +607,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
//
// AArch64TargetLowering::LowerCall figures out ArgumentPopSize and keeps
// it as the 2nd argument of AArch64ISD::TC_RETURN.
NumBytes += ArgumentPopSize;

// Move past the restores of the callee-saved registers.
MachineBasicBlock::iterator LastPopI = MBB.getFirstTerminator();
Expand All @@ -623,12 +622,23 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
assert(NumBytes >= 0 && "Negative stack allocation size!?");

if (!hasFP(MF)) {
bool RedZone = canUseRedZone(MF);
// If this was a redzone leaf function, we don't need to restore the
// stack pointer.
if (!canUseRedZone(MF))
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP, NumBytes,
TII, MachineInstr::FrameDestroy);
return;
// stack pointer (but we may need to pop stack args for fastcc).
if (RedZone && ArgumentPopSize == 0)
return;

bool NoCalleeSaveRestore = AFI->getCalleeSavedStackSize() == 0;
int StackRestoreBytes = RedZone ? 0 : NumBytes;
if (NoCalleeSaveRestore)
StackRestoreBytes += ArgumentPopSize;
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
StackRestoreBytes, TII, MachineInstr::FrameDestroy);
// If we were able to combine the local stack pop with the argument pop,
// then we're done.
if (NoCalleeSaveRestore || ArgumentPopSize == 0)
return;
NumBytes = 0;
}

// Restore the original stack pointer.
Expand All @@ -639,6 +649,13 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
-AFI->getCalleeSavedStackSize() + 16, TII,
MachineInstr::FrameDestroy);

// This must be placed after the callee-save restore code because that code
// assumes the SP is at the same location as it was after the callee-save save
// code in the prologue.
if (ArgumentPopSize)
emitFrameOffset(MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
ArgumentPopSize, TII, MachineInstr::FrameDestroy);
}

/// getFrameIndexReference - Provide a base+offset reference to an FI slot for
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/AArch64/fastcc.ll
@@ -1,5 +1,6 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s -check-prefix CHECK-TAIL
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt -aarch64-redzone | FileCheck %s -check-prefix CHECK-TAIL-RZ

; Without tailcallopt fastcc still means the caller cleans up the
; stack, so try to make sure this is respected.
Expand Down Expand Up @@ -97,6 +98,7 @@ define fastcc void @func_stack8([8 x i32], i32 %stacked) {

; CHECK-TAIL: mov sp, x29
; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16
; CHECK-TAIL-NEXT: add sp, sp, #16
; CHECK-TAIL-NEXT: ret
}

Expand Down Expand Up @@ -140,5 +142,95 @@ define fastcc void @func_stack32([8 x i32], i128 %stacked0, i128 %stacked1) {

; CHECK-TAIL: mov sp, x29
; CHECK-TAIL-NEXT: ldp x29, x30, [sp], #16
; CHECK-TAIL-NEXT: add sp, sp, #32
; CHECK-TAIL-NEXT: ret
}

; Check that arg stack pop is done after callee-save restore when no frame pointer is used.
define fastcc void @func_stack32_leaf([8 x i32], i128 %stacked0, i128 %stacked1) {
; CHECK-LABEL: func_stack32_leaf:
; CHECK: str x20, [sp, #-16]!
; CHECK: nop
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: ldr x20, [sp], #16
; CHECK-NEXT: ret

; CHECK-TAIL-LABEL: func_stack32_leaf:
; CHECK-TAIL: str x20, [sp, #-16]!
; CHECK-TAIL: nop
; CHECK-TAIL-NEXT: //NO_APP
; CHECK-TAIL-NEXT: ldr x20, [sp], #16
; CHECK-TAIL-NEXT: add sp, sp, #32
; CHECK-TAIL-NEXT: ret

; CHECK-TAIL-RZ-LABEL: func_stack32_leaf:
; CHECK-TAIL-RZ: str x20, [sp, #-16]!
; CHECK-TAIL-RZ-NOT: sub sp, sp
; CHECK-TAIL-RZ: nop
; CHECK-TAIL-RZ-NEXT: //NO_APP
; CHECK-TAIL-RZ-NEXT: ldr x20, [sp], #16
; CHECK-TAIL-RZ-NEXT: add sp, sp, #32
; CHECK-TAIL-RZ-NEXT: ret

; Make sure there is a callee-save register to save/restore.
call void asm sideeffect "nop", "~{x20}"() nounwind
ret void
}

; Check that arg stack pop is done after callee-save restore when no frame pointer is used.
define fastcc void @func_stack32_leaf_local([8 x i32], i128 %stacked0, i128 %stacked1) {
; CHECK-LABEL: func_stack32_leaf_local:
; CHECK: str x20, [sp, #-16]!
; CHECK-NEXT: sub sp, sp, #16
; CHECK: nop
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: ldr x20, [sp], #16
; CHECK-NEXT: ret

; CHECK-TAIL-LABEL: func_stack32_leaf_local:
; CHECK-TAIL: str x20, [sp, #-16]!
; CHECK-TAIL-NEXT: sub sp, sp, #16
; CHECK-TAIL: nop
; CHECK-TAIL-NEXT: //NO_APP
; CHECK-TAIL-NEXT: add sp, sp, #16
; CHECK-TAIL-NEXT: ldr x20, [sp], #16
; CHECK-TAIL-NEXT: add sp, sp, #32
; CHECK-TAIL-NEXT: ret

; CHECK-TAIL-RZ-LABEL: func_stack32_leaf_local:
; CHECK-TAIL-RZ: str x20, [sp, #-16]!
; CHECK-TAIL-RZ-NOT: sub sp, sp
; CHECK-TAIL-RZ: nop
; CHECK-TAIL-RZ-NEXT: //NO_APP
; CHECK-TAIL-RZ-NEXT: ldr x20, [sp], #16
; CHECK-TAIL-RZ-NEXT: add sp, sp, #32
; CHECK-TAIL-RZ-NEXT: ret

%val0 = alloca [2 x i64], align 8

; Make sure there is a callee-save register to save/restore.
call void asm sideeffect "nop", "~{x20}"() nounwind
ret void
}

; Check that arg stack pop is done after callee-save restore when no frame pointer is used.
define fastcc void @func_stack32_leaf_local_nocs([8 x i32], i128 %stacked0, i128 %stacked1) {
; CHECK-LABEL: func_stack32_leaf_local_nocs:
; CHECK: sub sp, sp, #16
; CHECK: add sp, sp, #16
; CHECK-NEXT: ret

; CHECK-TAIL-LABEL: func_stack32_leaf_local_nocs:
; CHECK-TAIL: sub sp, sp, #16
; CHECK-TAIL: add sp, sp, #48
; CHECK-TAIL-NEXT: ret

; CHECK-TAIL-RZ-LABEL: func_stack32_leaf_local_nocs:
; CHECK-TAIL-RZ: add sp, sp, #32
; CHECK-TAIL-RZ-NEXT: ret

%val0 = alloca [2 x i64], align 8

ret void
}

0 comments on commit a1c6269

Please sign in to comment.