Skip to content

Commit

Permalink
Revert "Revert "Reapply D70800: Fix AArch64 AAPCS frame record chain""
Browse files Browse the repository at this point in the history
This reverts commit bc9a29b.

The reasoning that this patch was wrong was itself incorrect
(see discussion on llvm-commits). This patch does seem to be exposing
a latent SVE code generation bug on non-public tests, which should
not block a correctness fix for public, non-SVE use cases.
  • Loading branch information
resistor committed Sep 1, 2020
1 parent ae95cee commit 5987da8
Show file tree
Hide file tree
Showing 6 changed files with 125 additions and 19 deletions.
36 changes: 19 additions & 17 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1024,10 +1024,6 @@ static bool needsWinCFI(const MachineFunction &MF) {
F.needsUnwindTableEntry();
}

static bool isTargetDarwin(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetDarwin();
}

static bool isTargetWindows(const MachineFunction &MF) {
return MF.getSubtarget<AArch64Subtarget>().isTargetWindows();
}
Expand Down Expand Up @@ -1185,7 +1181,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// For funclets the FP belongs to the containing function.
if (!IsFunclet && HasFP) {
// Only set up FP if we actually need to.
int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0;
int64_t FPOffset = AFI->getCalleeSaveBaseToFrameRecordOffset();

if (CombineSPBump)
FPOffset += AFI->getLocalStackSize();
Expand Down Expand Up @@ -1409,11 +1405,6 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
}

if (needsFrameMoves) {
const DataLayout &TD = MF.getDataLayout();
const int StackGrowth = isTargetDarwin(MF)
? (2 * -TD.getPointerSize(0))
: -AFI->getCalleeSavedStackSize();
Register FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
// .globl __foo
Expand Down Expand Up @@ -1481,10 +1472,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// .cfi_offset w28, -32

if (HasFP) {
const int OffsetToFirstCalleeSaveFromFP =
AFI->getCalleeSaveBaseToFrameRecordOffset() -
AFI->getCalleeSavedStackSize();
Register FramePtr = RegInfo->getFrameRegister(MF);

// Define the current CFA rule to use the provided FP.
unsigned Reg = RegInfo->getDwarfRegNum(FramePtr, true);
unsigned CFIIndex = MF.addFrameInst(
MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - StackGrowth));
MCCFIInstruction::cfiDefCfa(nullptr, Reg, FixedObject - OffsetToFirstCalleeSaveFromFP));
BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
.addCFIIndex(CFIIndex)
.setMIFlags(MachineInstr::FrameSetup);
Expand Down Expand Up @@ -1775,10 +1771,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// non-post-indexed loads for the restores if we aren't actually going to
// be able to save any instructions.
if (!IsFunclet && (MFI.hasVarSizedObjects() || AFI->isStackRealigned())) {
int64_t OffsetToFrameRecord =
isTargetDarwin(MF) ? (-(int64_t)AFI->getCalleeSavedStackSize() + 16) : 0;
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::FP,
{OffsetToFrameRecord, MVT::i8},
{-AFI->getCalleeSaveBaseToFrameRecordOffset(), MVT::i8},
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI);
} else if (NumBytes)
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
Expand Down Expand Up @@ -1839,11 +1833,11 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset)
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
bool IsWin64 =
Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());

unsigned FixedObject =
getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
unsigned FPAdjust = isTargetDarwin(MF)
? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo());
int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
int64_t FPAdjust =
CalleeSaveSize - AFI->getCalleeSaveBaseToFrameRecordOffset();
return {ObjectOffset + FixedObject + FPAdjust, MVT::i8};
}

Expand Down Expand Up @@ -2231,6 +2225,14 @@ static void computeCalleeSaveRegisterPairs(
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");

// Save the offset to frame record so that the FP register can point to the
// innermost frame record (spilled FP and LR registers).
if (NeedsFrameRecord && ((!IsWindows && RPI.Reg1 == AArch64::LR &&
RPI.Reg2 == AArch64::FP) ||
(IsWindows && RPI.Reg1 == AArch64::FP &&
RPI.Reg2 == AArch64::LR)))
AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);

RegPairs.push_back(RPI);
if (RPI.isPaired())
++i;
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64InstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3442,8 +3442,8 @@ void llvm::emitFrameOffset(MachineBasicBlock &MBB,

// First emit non-scalable frame offsets, or a simple 'mov'.
if (Bytes || (!Offset && SrcReg != DestReg)) {
assert((DestReg != AArch64::SP || Bytes % 16 == 0) &&
"SP increment/decrement not 16-byte aligned");
assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
"SP increment/decrement not 8-byte aligned");
unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
if (Bytes < 0) {
Bytes = -Bytes;
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// e.g. Tail Call, Thunk, or Function if none apply.
Optional<std::string> OutliningStyle;

// Offset from SP-after-callee-saved-spills (i.e. SP-at-entry minus
// CalleeSavedStackSize) to the address of the frame record.
int CalleeSaveBaseToFrameRecordOffset = 0;

public:
AArch64FunctionInfo() = default;

Expand Down Expand Up @@ -338,6 +342,13 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
TaggedBasePointerOffset = Offset;
}

int getCalleeSaveBaseToFrameRecordOffset() const {
return CalleeSaveBaseToFrameRecordOffset;
}
void setCalleeSaveBaseToFrameRecordOffset(int Offset) {
CalleeSaveBaseToFrameRecordOffset = Offset;
}

private:
// Hold the lists of LOHs.
MILOHContainer LOHContainerSet;
Expand Down
22 changes: 22 additions & 0 deletions llvm/test/CodeGen/AArch64/framelayout-fp-csr.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -disable-post-ra --frame-pointer=all < %s | FileCheck %s

; The purpose of this test is to verify that frame pointer (x29)
; is correctly setup in the presence of callee-saved floating
; point registers. The frame pointer should point to the frame
; record, which is located 16 bytes above the end of the CSR
; space when a single FP CSR is in use.
define void @test1(i32) #26 {
entry:
call void asm sideeffect "nop", "~{d8}"() #26
ret void
}
; CHECK-LABEL: test1:
; CHECK: str d8, [sp, #-32]!
; CHECK-NEXT: stp x29, x30, [sp, #16]
; CHECK-NEXT: add x29, sp, #16
; CHECK: nop
; CHECK: ldp x29, x30, [sp, #16]
; CHECK-NEXT: ldr d8, [sp], #32
; CHECK-NEXT: ret

attributes #26 = { nounwind }
29 changes: 29 additions & 0 deletions llvm/test/CodeGen/AArch64/framelayout-frame-record.mir
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# RUN: llc -mtriple=aarch64-linux-gnu -start-before prologepilog %s -o - | FileCheck %s

---
name: TestFrameRecordLocation
tracksRegLiveness: true
frameInfo:
isFrameAddressTaken: true
body: |
bb.0:
$d8 = IMPLICIT_DEF
$d9 = IMPLICIT_DEF
$x19 = IMPLICIT_DEF
RET_ReallyLR
# CHECK-LABEL: TestFrameRecordLocation

# CHECK: stp d9, d8, [sp, #-48]!
# CHECK: stp x29, x30, [sp, #16]
# CHECK: str x19, [sp, #32]

# CHECK: add x29, sp, #16

# CHECK: .cfi_def_cfa w29, 32
# CHECK: .cfi_offset w19, -16
# CHECK: .cfi_offset w30, -24
# CHECK: .cfi_offset w29, -32
# CHECK: .cfi_offset b8, -40
# CHECK: .cfi_offset b9, -48
...
42 changes: 42 additions & 0 deletions llvm/test/CodeGen/AArch64/framelayout-unaligned-fp.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
; RUN: llc -verify-machineinstrs < %s | FileCheck %s

; The purpose of this test is to construct a scenario where an odd number
; of callee-saved GPRs as well as an odd number of callee-saved FPRs are
; used. This caused the frame pointer to be aligned to a multiple of 8
; on non-Darwin platforms, rather than a multiple of 16 as usual.

target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
target triple = "aarch64-unknown-linux-gnu"

@a = global i64 0, align 4


define i64 @b() {
entry:
%call = tail call i64 @d()
%0 = alloca i8, i64 ptrtoint (i64 ()* @d to i64), align 16
%1 = ptrtoint i8* %0 to i64
store i64 %1, i64* @a, align 4
%call1 = call i64 @e()
%conv = sitofp i64 %call1 to float
%2 = load i64, i64* @a, align 4
%call2 = call i64 @f(i64 %2)
%conv3 = fptosi float %conv to i64
ret i64 %conv3
}

; CHECK-LABEL: b:
; CHECK: str d8, [sp, #-32]!
; CHECK-NEXT: stp x29, x30, [sp, #8]
; CHECK-NEXT: str x19, [sp, #24]
; CHECK-NEXT: add x29, sp, #8

; CHECK: sub sp, x29, #8
; CHECK-NEXT: ldr x19, [sp, #24]
; CHECK-NEXT: ldp x29, x30, [sp, #8]
; CHECK-NEXT: ldr d8, [sp], #32
; CHECK-NEXT: ret

declare i64 @d()
declare i64 @e()
declare i64 @f(i64)

0 comments on commit 5987da8

Please sign in to comment.