From b485511b083e80f677e71df8f4fe734a33ab05e4 Mon Sep 17 00:00:00 2001 From: Daniel Frampton Date: Tue, 5 May 2020 10:00:36 -0700 Subject: [PATCH] Backport fixes for aarch64-pc-windows-msvc (#46) * [AArch64] Fix mismatch in prologue and epilogue for funclets on Windows The generated code for a funclet can have an add to sp in the epilogue for which there is no corresponding sub in the prologue. This patch removes the early return from emitPrologue that was preventing the sub to sp, and instead conditionalizes the appropriate parts of the rest of the function. Fixes https://bugs.llvm.org/show_bug.cgi?id=45345 Differential Revision: https://reviews.llvm.org/D77015 * [AArch64] Change AArch64 Windows EH UnwindHelp object to be a fixed object The UnwindHelp object is used during exception handling by runtime code. It must be findable from a fixed offset from FP. This change allocates the UnwindHelp object as a fixed object (as is done for x86_64) to ensure that both the generated code and runtime agree on the location of the object. Fixes https://bugs.llvm.org/show_bug.cgi?id=45346 Differential Revision: https://reviews.llvm.org/D77016 --- .../Target/AArch64/AArch64FrameLowering.cpp | 101 ++++++++++-------- .../AArch64/funclet-match-add-sub-stack.ll | 62 +++++++++++ llvm/test/CodeGen/AArch64/seh-finally.ll | 24 ++--- .../CodeGen/AArch64/wineh-try-catch-cbz.ll | 7 +- .../AArch64/wineh-try-catch-realign.ll | 2 +- llvm/test/CodeGen/AArch64/wineh-try-catch.ll | 14 +-- .../AArch64/wineh-unwindhelp-via-fp.ll | 69 ++++++++++++ 7 files changed, 212 insertions(+), 67 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/funclet-match-add-sub-stack.ll create mode 100644 llvm/test/CodeGen/AArch64/wineh-unwindhelp-via-fp.ll diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index ea3e800a1ad20..651ad9ad4c83f 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -211,6 +211,24 @@ AArch64FrameLowering::getStackIDForScalableVectors() const { return TargetStackID::SVEVector; } +/// Returns the size of the fixed object area (allocated next to sp on entry) +/// On Win64 this may include a var args area and an UnwindHelp object for EH. +static unsigned getFixedObjectSize(const MachineFunction &MF, + const AArch64FunctionInfo *AFI, bool IsWin64, + bool IsFunclet) { + if (!IsWin64 || IsFunclet) { + // Only Win64 uses fixed objects, and then only for the function (not + // funclets) + return 0; + } else { + // Var args are stored here in the primary function. + const unsigned VarArgsArea = AFI->getVarArgsGPRSize(); + // To support EH funclets we allocate an UnwindHelp object + const unsigned UnwindHelpObject = (MF.hasEHFunclets() ? 8 : 0); + return alignTo(VarArgsArea + UnwindHelpObject, 16); + } +} + /// Returns the size of the entire SVE stackframe (calleesaves + spills). static StackOffset getSVEStackSize(const MachineFunction &MF) { const AArch64FunctionInfo *AFI = MF.getInfo(); @@ -959,10 +977,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - // Var args are accounted for in the containing function, so don't - // include them for funclets. - unsigned FixedObject = (IsWin64 && !IsFunclet) ? - alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; // All of the remaining stack allocations are for locals. @@ -993,32 +1008,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, ++MBBI; } - // The code below is not applicable to funclets. We have emitted all the SEH - // opcodes that we needed to emit. The FP and BP belong to the containing - // function. - if (IsFunclet) { - if (NeedsWinCFI) { - HasWinCFI = true; - BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_PrologEnd)) - .setMIFlag(MachineInstr::FrameSetup); - } - - // SEH funclets are passed the frame pointer in X1. If the parent - // function uses the base register, then the base register is used - // directly, and is not retrieved from X1. - if (F.hasPersonalityFn()) { - EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); - if (isAsynchronousEHPersonality(Per)) { - BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) - .addReg(AArch64::X1).setMIFlag(MachineInstr::FrameSetup); - MBB.addLiveIn(AArch64::X1); - } - } - - return; - } - - if (HasFP) { + // For funclets the FP belongs to the containing function. + if (!IsFunclet && HasFP) { // Only set up FP if we actually need to. int64_t FPOffset = isTargetDarwin(MF) ? (AFI->getCalleeSavedStackSize() - 16) : 0; @@ -1161,7 +1152,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // Allocate space for the rest of the frame. if (NumBytes) { - const bool NeedsRealignment = RegInfo->needsStackRealignment(MF); + // Alignment is required for the parent frame, not the funclet + const bool NeedsRealignment = + !IsFunclet && RegInfo->needsStackRealignment(MF); unsigned scratchSPReg = AArch64::SP; if (NeedsRealignment) { @@ -1215,7 +1208,8 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, // FIXME: Clarify FrameSetup flags here. // Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is // needed. - if (RegInfo->hasBasePointer(MF)) { + // For funclets the BP belongs to the containing function. + if (!IsFunclet && RegInfo->hasBasePointer(MF)) { TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP, false); if (NeedsWinCFI) { @@ -1232,6 +1226,19 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF, .setMIFlag(MachineInstr::FrameSetup); } + // SEH funclets are passed the frame pointer in X1. If the parent + // function uses the base register, then the base register is used + // directly, and is not retrieved from X1. + if (IsFunclet && F.hasPersonalityFn()) { + EHPersonality Per = classifyEHPersonality(F.getPersonalityFn()); + if (isAsynchronousEHPersonality(Per)) { + BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::COPY), AArch64::FP) + .addReg(AArch64::X1) + .setMIFlag(MachineInstr::FrameSetup); + MBB.addLiveIn(AArch64::X1); + } + } + if (needsFrameMoves) { const DataLayout &TD = MF.getDataLayout(); const int StackGrowth = isTargetDarwin(MF) @@ -1450,10 +1457,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF, bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - // Var args are accounted for in the containing function, so don't - // include them for funclets. - unsigned FixedObject = - (IsWin64 && !IsFunclet) ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet); uint64_t AfterCSRPopSize = ArgumentPopSize; auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject; @@ -1679,7 +1683,9 @@ static StackOffset getFPOffset(const MachineFunction &MF, int64_t ObjectOffset) const auto &Subtarget = MF.getSubtarget(); bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()); - unsigned FixedObject = IsWin64 ? alignTo(AFI->getVarArgsGPRSize(), 16) : 0; + + unsigned FixedObject = + getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false); unsigned FPAdjust = isTargetDarwin(MF) ? 16 : AFI->getCalleeSavedStackSize(MF.getFrameInfo()); return {ObjectOffset + FixedObject + FPAdjust, MVT::i8}; @@ -2632,9 +2638,14 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized( ++MBBI; // Create an UnwindHelp object. - int UnwindHelpFI = - MFI.CreateStackObject(/*size*/8, /*alignment*/16, false); + // The UnwindHelp object is allocated at the start of the fixed object area + int64_t FixedObject = + getFixedObjectSize(MF, AFI, /*IsWin64*/ true, /*IsFunclet*/ false); + int UnwindHelpFI = MFI.CreateFixedObject(/*Size*/ 8, + /*SPOffset*/ -FixedObject, + /*IsImmutable=*/false); EHInfo.UnwindHelpFrameIdx = UnwindHelpFI; + // We need to store -2 into the UnwindHelp object at the start of the // function. DebugLoc DL; @@ -2656,10 +2667,14 @@ int AArch64FrameLowering::getFrameIndexReferencePreferSP( const MachineFunction &MF, int FI, unsigned &FrameReg, bool IgnoreSPUpdates) const { const MachineFrameInfo &MFI = MF.getFrameInfo(); - LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " - << MFI.getObjectOffset(FI) << "\n"); - FrameReg = AArch64::SP; - return MFI.getObjectOffset(FI); + if (IgnoreSPUpdates) { + LLVM_DEBUG(dbgs() << "Offset from the SP for " << FI << " is " + << MFI.getObjectOffset(FI) << "\n"); + FrameReg = AArch64::SP; + return MFI.getObjectOffset(FI); + } + + return getFrameIndexReference(MF, FI, FrameReg); } /// The parent frame offset (aka dispFrame) is only used on X86_64 to retrieve diff --git a/llvm/test/CodeGen/AArch64/funclet-match-add-sub-stack.ll b/llvm/test/CodeGen/AArch64/funclet-match-add-sub-stack.ll new file mode 100644 index 0000000000000..67e9c49675cfd --- /dev/null +++ b/llvm/test/CodeGen/AArch64/funclet-match-add-sub-stack.ll @@ -0,0 +1,62 @@ +; RUN: llc -o - %s -mtriple=aarch64-windows | FileCheck %s +; Check that the stack bump around a funclet is computed correctly in both the +; prologue and epilogue in the case we have a MaxCallFrameSize > 0 and are doing alloca +target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-pc-windows-msvc19.25.28611" + +; // requires passing arguments on the stack +; void test2(void*, int, int, int, int, int, int, int, int); +; +; // function with the funclet being checked +; void test1(size_t bytes) +; { +; // alloca forces a separate callee save bump and stack bump +; void *data = _alloca(bytes); +; try { +; test2(data, 0, 1, 2, 3, 4, 5, 6, 7); +; } catch (...) { +; // the funclet being checked +; } +; } + +; CHECK-LABEL: ?catch$2@?0??test1@@YAX_K@Z@4HA +; CHECK: sub sp, sp, #16 +; CHECK: add sp, sp, #16 +; Function Attrs: uwtable +define dso_local void @"?test1@@YAX_K@Z"(i64 %0) #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { + %2 = alloca i64, align 8 + %3 = alloca i8*, align 8 + store i64 %0, i64* %2, align 8 + %4 = load i64, i64* %2, align 8 + %5 = alloca i8, i64 %4, align 16 + store i8* %5, i8** %3, align 8 + %6 = load i8*, i8** %3, align 8 + invoke void @"?test2@@YAXPEAXHHHHHHHH@Z"(i8* %6, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7) + to label %13 unwind label %7 + +7: ; preds = %1 + %8 = catchswitch within none [label %9] unwind to caller + +9: ; preds = %7 + %10 = catchpad within %8 [i8* null, i32 64, i8* null] + catchret from %10 to label %11 + +11: ; preds = %9 + br label %12 + +12: ; preds = %11, %13 + ret void + +13: ; preds = %1 + br label %12 +} + +declare dso_local void @"?test2@@YAXPEAXHHHHHHHH@Z"(i8*, i32, i32, i32, i32, i32, i32, i32, i32) #1 + +declare dso_local i32 @__CxxFrameHandler3(...) + +attributes #0 = { uwtable } + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"wchar_size", i32 2} diff --git a/llvm/test/CodeGen/AArch64/seh-finally.ll b/llvm/test/CodeGen/AArch64/seh-finally.ll index 66558c90a79cd..dbc6c4b0804bf 100644 --- a/llvm/test/CodeGen/AArch64/seh-finally.ll +++ b/llvm/test/CodeGen/AArch64/seh-finally.ll @@ -37,7 +37,7 @@ entry: ; CHECK-LABEL: simple_seh ; CHECK: add x29, sp, #16 ; CHECK: mov x0, #-2 -; CHECK: stur x0, [x29, #-16] +; CHECK: stur x0, [x29, #16] ; CHECK: .set .Lsimple_seh$frame_escape_0, -8 ; CHECK: ldur w0, [x29, #-8] ; CHECK: bl foo @@ -87,13 +87,13 @@ define void @stack_realign() #0 personality i8* bitcast (i32 (...)* @__C_specifi entry: ; CHECK-LABEL: stack_realign ; CHECK: mov x29, sp -; CHECK: sub x9, sp, #64 +; CHECK: sub x9, sp, #16 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp ; CHECK: mov x0, #-2 -; CHECK: stur x0, [x19, #16] -; CHECK: .set .Lstack_realign$frame_escape_0, 32 -; CHECK: ldr w0, [x19, #32] +; CHECK: stur x0, [x29, #32] +; CHECK: .set .Lstack_realign$frame_escape_0, 0 +; CHECK: ldr w0, [x19] ; CHECK: bl foo %o = alloca %struct.S, align 32 @@ -142,7 +142,7 @@ entry: ; CHECK-LABEL: vla_present ; CHECK: add x29, sp, #32 ; CHECK: mov x1, #-2 -; CHECK: stur x1, [x29, #-32] +; CHECK: stur x1, [x29, #16] ; CHECK: .set .Lvla_present$frame_escape_0, -4 ; CHECK: stur w0, [x29, #-4] ; CHECK: ldur w8, [x29, #-4] @@ -206,17 +206,17 @@ define void @vla_and_realign(i32 %n) #0 personality i8* bitcast (i32 (...)* @__C entry: ; CHECK-LABEL: vla_and_realign ; CHECK: mov x29, sp -; CHECK: sub x9, sp, #64 +; CHECK: sub x9, sp, #48 ; CHECK: and sp, x9, #0xffffffffffffffe0 ; CHECK: mov x19, sp ; CHECK: mov x1, #-2 -; CHECK: stur x1, [x19] +; CHECK: stur x1, [x29, #32] ; CHECK: .set .Lvla_and_realign$frame_escape_0, 32 -; CHECK: str w0, [x29, #28] -; CHECK: ldr w8, [x29, #28] +; CHECK: str w0, [x29, #44] +; CHECK: ldr w8, [x29, #44] ; CHECK: mov x9, sp -; CHECK: str x9, [x19, #24] -; CHECK: str x8, [x19, #16] +; CHECK: str x9, [x29, #24] +; CHECK: str x8, [x19, #24] ; CHECK: ldr w0, [x19, #32] ; CHECK: bl foo diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch-cbz.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch-cbz.ll index d84c07f8bc1a5..cbed64ab99e3d 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch-cbz.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch-cbz.ll @@ -4,11 +4,10 @@ ; but the original issue only reproduced if the cbz was immediately ; after the frame setup.) -; CHECK: sub sp, sp, #32 -; CHECK-NEXT: stp x29, x30, [sp, #16] -; CHECK-NEXT: add x29, sp, #16 +; CHECK: stp x29, x30, [sp, #-32]! +; CHECK-NEXT: mov x29, sp ; CHECK-NEXT: mov x1, #-2 -; CHECK-NEXT: stur x1, [x29, #-16] +; CHECK-NEXT: stur x1, [x29, #16] ; CHECK-NEXT: cbz w0, .LBB0_2 target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll index b10a0f3033a02..a66ec10748e77 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch-realign.ll @@ -12,7 +12,7 @@ ; CHECK: stp x29, x30, [sp, #-32]! ; CHECK-NEXT: str x28, [sp, #16] ; CHECK-NEXT: str x19, [sp, #24] -; CHECK-NEXT: add x0, x19, #64 +; CHECK-NEXT: add x0, x19, #0 ; CHECK-NEXT: mov w1, wzr ; CHECK-NEXT: bl "?bb@@YAXPEAHH@Z" ; CHECK-NEXT: adrp x0, .LBB0_1 diff --git a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll index 3ae2df37efe4b..73909825d377d 100644 --- a/llvm/test/CodeGen/AArch64/wineh-try-catch.ll +++ b/llvm/test/CodeGen/AArch64/wineh-try-catch.ll @@ -11,11 +11,11 @@ ; and the parent function. ; The following checks that the unwind help object has -2 stored into it at -; fp - 400 - 256 = fp - 656, which is on-entry sp - 48 + 32 - 656 = -; on-entry sp - 672. We check this offset in the table later on. +; fp + 16, which is on-entry sp - 16. +; We check this offset in the table later on. ; CHECK-LABEL: "?func@@YAHXZ": -; CHECK: stp x29, x30, [sp, #-48]! +; CHECK: stp x29, x30, [sp, #-64]! ; CHECK: str x28, [sp, #16] ; CHECK: str x21, [sp, #24] ; CHECK: stp x19, x20, [sp, #32] @@ -23,7 +23,7 @@ ; CHECK: sub sp, sp, #624 ; CHECK: mov x19, sp ; CHECK: mov x0, #-2 -; CHECK: stur x0, [x19] +; CHECK: stur x0, [x29, #48] ; Now check that x is stored at fp - 20. We check that this is the same ; location accessed from the funclet to retrieve x. @@ -72,7 +72,7 @@ ; Now check that the offset of the unwind help object from the stack pointer on ; entry to func is encoded in cppxdata that is passed to __CxxFrameHandler3. As -; computed above, this comes to -672. +; computed above, this comes to -16. ; CHECK-LABEL: "$cppxdata$?func@@YAHXZ": ; CHECK-NEXT: .word 429065506 ; MagicNumber ; CHECK-NEXT: .word 2 ; MaxState @@ -81,7 +81,7 @@ ; CHECK-NEXT: .word ("$tryMap$?func@@YAHXZ")@IMGREL ; TryBlockMap ; CHECK-NEXT: .word 4 ; IPMapEntries ; CHECK-NEXT: .word ("$ip2state$?func@@YAHXZ")@IMGREL ; IPToStateXData -; CHECK-NEXT: .word -672 ; UnwindHelp +; CHECK-NEXT: .word -16 ; UnwindHelp ; UNWIND: Function: ?func@@YAHXZ (0x0) ; UNWIND: Prologue [ @@ -91,7 +91,7 @@ ; UNWIND-NEXT: ; stp x19, x20, [sp, #32] ; UNWIND-NEXT: ; str x21, [sp, #24] ; UNWIND-NEXT: ; str x28, [sp, #16] -; UNWIND-NEXT: ; stp x29, x30, [sp, #-48]! +; UNWIND-NEXT: ; stp x29, x30, [sp, #-64]! ; UNWIND-NEXT: ; end ; UNWIND: Function: ?catch$2@?0??func@@YAHXZ@4HA ; UNWIND: Prologue [ diff --git a/llvm/test/CodeGen/AArch64/wineh-unwindhelp-via-fp.ll b/llvm/test/CodeGen/AArch64/wineh-unwindhelp-via-fp.ll new file mode 100644 index 0000000000000..6ec78087020c4 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/wineh-unwindhelp-via-fp.ll @@ -0,0 +1,69 @@ +; RUN: llc -o - %s -mtriple=aarch64-windows | FileCheck %s +; Check that we allocate the unwind help stack object in a fixed location from fp +; so that the runtime can find it when handling an exception +target datalayout = "e-m:w-p:64:64-i32:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-pc-windows-msvc19.25.28611" + +; Check that the store to the unwind help object for func2 is via FP +; CHECK-LABEL: ?func2@@YAXXZ +; CHECK: mov x[[#SCRATCH_REG:]], #-2 +; CHECK: stur x[[#SCRATCH_REG:]], [x29, #[[#]]] +; +; // struct that requires greater than stack alignment +; struct alignas(32) A +; { +; // data that would be invalid for unwind help (> 0) +; int _x[4]{42, 42, 42, 42}; +; ~A() {} +; }; +; +; // cause us to run the funclet in func2 +; void func3() +; { +; throw 1; +; } +; +; // the funclet that ensures we have the unwind help correct +; void func2() +; { +; A a; +; func3(); +; } +; +; // function to ensure we are misaligned in func2 +; void func1() +; { +; func2(); +; } +; +; // set things up and ensure alignment for func1 +; void test() +; { +; try { +; A a; +; func1(); +; } catch(...) {} +; } + +%struct.A = type { [4 x i32], [16 x i8] } +declare dso_local %struct.A* @"??0A@@QEAA@XZ"(%struct.A* returned %0) +declare dso_local void @"??1A@@QEAA@XZ"(%struct.A* %0) +declare dso_local i32 @__CxxFrameHandler3(...) +declare dso_local void @"?func3@@YAXXZ"() + +; Function Attrs: noinline optnone uwtable +define dso_local void @"?func2@@YAXXZ"() #0 personality i8* bitcast (i32 (...)* @__CxxFrameHandler3 to i8*) { + %1 = alloca %struct.A, align 32 + %2 = call %struct.A* @"??0A@@QEAA@XZ"(%struct.A* %1) #3 + invoke void @"?func3@@YAXXZ"() + to label %3 unwind label %4 + +3: ; preds = %0 + call void @"??1A@@QEAA@XZ"(%struct.A* %1) #3 + ret void + +4: ; preds = %0 + %5 = cleanuppad within none [] + call void @"??1A@@QEAA@XZ"(%struct.A* %1) #3 [ "funclet"(token %5) ] + cleanupret from %5 unwind to caller +}