diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index 70c5c29149288..00c2957d2820e 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -1554,8 +1554,10 @@ static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL, !AFL.requiresSaveVG(MF) && !AFI->isSVECC(); } -static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, - bool NeedsWinCFI, bool IsFirst, +static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile, + unsigned SpillCount, unsigned Reg1, + unsigned Reg2, bool NeedsWinCFI, + bool IsFirst, const TargetRegisterInfo *TRI) { // If we are generating register pairs for a Windows function that requires // EH support, then pair consecutive registers only. There are no unwind @@ -1568,8 +1570,18 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, return true; if (!NeedsWinCFI) return false; + + // ARM64EC introduced `save_any_regp`, which expects 16-byte alignment. + // This is handled by only allowing paired spills for registers spilled at + // even positions (which should be 16-byte aligned, as other GPRs/FPRs are + // 8-bytes). We carve out an exception for {FP,LR}, which does not require + // 16-byte alignment in the uop representation. if (TRI->getEncodingValue(Reg2) == TRI->getEncodingValue(Reg1) + 1) - return false; + return SpillExtendedVolatile + ? !((Reg1 == AArch64::FP && Reg2 == AArch64::LR) || + (SpillCount % 2) == 0) + : false; + // If pairing a GPR with LR, the pair can be described by the save_lrpair // opcode. If this is the first register pair, it would end up with a // predecrement, but there's no save_lrpair_x opcode, so we can only do this @@ -1585,12 +1597,15 @@ static bool invalidateWindowsRegisterPairing(unsigned Reg1, unsigned Reg2, /// WindowsCFI requires that only consecutive registers can be paired. /// LR and FP need to be allocated together when the frame needs to save /// the frame-record. This means any other register pairing with LR is invalid. -static bool invalidateRegisterPairing(unsigned Reg1, unsigned Reg2, - bool UsesWinAAPCS, bool NeedsWinCFI, - bool NeedsFrameRecord, bool IsFirst, +static bool invalidateRegisterPairing(bool SpillExtendedVolatile, + unsigned SpillCount, unsigned Reg1, + unsigned Reg2, bool UsesWinAAPCS, + bool NeedsWinCFI, bool NeedsFrameRecord, + bool IsFirst, const TargetRegisterInfo *TRI) { if (UsesWinAAPCS) - return invalidateWindowsRegisterPairing(Reg1, Reg2, NeedsWinCFI, IsFirst, + return invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount, + Reg1, Reg2, NeedsWinCFI, IsFirst, TRI); // If we need to store the frame record, don't pair any register @@ -1688,6 +1703,17 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, } bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize(); + // Windows AAPCS has x9-x15 as volatile registers, x16-x17 as intra-procedural + // scratch, x18 as platform reserved. However, clang has extended calling + // convensions such as preserve_most and preserve_all which treat these as + // CSR. As such, the ARM64 unwind uOPs bias registers by 19. We use ARM64EC + // uOPs which have separate restrictions. We need to check for that. + bool SpillExtendedVolatile = + IsWindows && std::any_of(std::begin(CSI), std::end(CSI), + [](const CalleeSavedInfo &CSI) { + const auto &Reg = CSI.getReg(); + return Reg > AArch64::X8 && Reg < AArch64::X19; + }); int ZPRByteOffset = 0; int PPRByteOffset = 0; @@ -1749,17 +1775,19 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL, if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) { MCRegister NextReg = CSI[i + RegInc].getReg(); bool IsFirst = i == FirstReg; + unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i; switch (RPI.Type) { case RegPairInfo::GPR: if (AArch64::GPR64RegClass.contains(NextReg) && - !invalidateRegisterPairing(RPI.Reg1, NextReg, IsWindows, - NeedsWinCFI, NeedsFrameRecord, IsFirst, - TRI)) + !invalidateRegisterPairing( + SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows, + NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI)) RPI.Reg2 = NextReg; break; case RegPairInfo::FPR64: if (AArch64::FPR64RegClass.contains(NextReg) && - !invalidateWindowsRegisterPairing(RPI.Reg1, NextReg, NeedsWinCFI, + !invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount, + RPI.Reg1, NextReg, NeedsWinCFI, IsFirst, TRI)) RPI.Reg2 = NextReg; break; diff --git a/llvm/test/CodeGen/AArch64/seh-extended-spills.ll b/llvm/test/CodeGen/AArch64/seh-extended-spills.ll index ecc22703ef584..54f8e3f4c5a64 100644 --- a/llvm/test/CodeGen/AArch64/seh-extended-spills.ll +++ b/llvm/test/CodeGen/AArch64/seh-extended-spills.ll @@ -1,4 +1,5 @@ ; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype asm -o - %s | FileCheck %s +; RUN: llc -mtriple aarch64-unknown-windows-msvc -filetype obj -o - %s | llvm-readobj -u - | FileCheck %s -check-prefix CHECK-UNWIND declare dso_local void @g(ptr noundef) define dso_local preserve_mostcc void @f(ptr noundef %p) #0 { @@ -12,23 +13,38 @@ entry: attributes #0 = { nounwind uwtable(sync) } -; CHECK: stp x9, x10, [sp, #[[OFFSET_0:[0-9]+]]] -; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]] -; CHECK: stp x11, x12, [sp, #[[OFFSET_1:[0-9]+]]] -; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]] -; CHECK: stp x13, x14, [sp, #[[OFFSET_2:[0-9]+]]] -; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]] -; CHECK: str x15, [sp, #[[OFFSET_3:[0-9]+]]] -; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]] +; CHECK: str x30, [sp, #16] +; CHECK-NEXT: .seh_save_reg x30, 16 +; CHECK: str x9, [sp, #24] +; CHECK-NEXT: .seh_save_any_reg x9, 24 +; CHECK: stp x10, x11, [sp, #32 +; CHECK-NEXT: .seh_save_any_reg_p x10, 32 +; CHECK: stp x12, x13, [sp, #48] +; CHECK-NEXT: .seh_save_any_reg_p x12, 48 +; CHECK: stp x14, x15, [sp, #64] +; CHECK-NEXT: .seh_save_any_reg_p x14, 64 ; CHECK: .seh_endprologue ; CHECK: .seh_startepilogue -; CHECK: ldr x15, [sp, #[[OFFSET_3]]] -; CHECK-NEXT: .seh_save_any_reg x15, [[OFFSET_3]] -; CHECK: ldp x13, x14, [sp, #[[OFFSET_2]]] -; CHECK-NEXT: .seh_save_any_reg_p x13, [[OFFSET_2]] -; CHECK: ldp x11, x12, [sp, #[[OFFSET_1]]] -; CHECK-NEXT: .seh_save_any_reg_p x11, [[OFFSET_1]] -; CHECK: ldp x9, x10, [sp, #[[OFFSET_0]]] -; CHECK-NEXT: .seh_save_any_reg_p x9, [[OFFSET_0]] +; CHECK: ldp x14, x15, [sp, #64] +; CHECK-NEXT: .seh_save_any_reg_p x14, 64 +; CHECK: ldp x12, x13, [sp, #48] +; CHECK-NEXT: .seh_save_any_reg_p x12, 48 +; CHECK: ldp x10, x11, [sp, #32 +; CHECK-NEXT: .seh_save_any_reg_p x10, 32 +; CHECK: ldr x9, [sp, #24] +; CHECK-NEXT: .seh_save_any_reg x9, 24 +; CHECK: ldr x30, [sp, #16] +; CHECK-NEXT: .seh_save_reg x30, 16 + ; CHECK: .seh_endepilogue + +; CHECK-UNWIND: Prologue [ +; CHECK-UNWIND: 0xe74e04 ; stp x14, x15, [sp, #64] +; CHECK-UNWIND: 0xe74c03 ; stp x12, x13, [sp, #48] +; CHECK-UNWIND: 0xe74a02 ; stp x10, x11, [sp, #32] +; CHECK-UNWIND: 0xe70903 ; str x9, [sp, #24] +; CHECK-UNWIND: 0xd2c2 ; str x30, [sp, #16] +; CHECK-UNWIND: 0x05 ; sub sp, #80 +; CHECK-UNWIND: 0xe4 ; end +; CHECK-UNWIND: ]