From 2ab19bfa4125d803c16e60b5df4df9d03838e0ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Martin=20Storsj=C3=B6?= Date: Fri, 26 Nov 2021 15:17:47 +0200 Subject: [PATCH] [ARM] Adjust the frame pointer when it's needed for SEH unwinding For functions that require restoring SP from FP (e.g. that need to align the stack, or that have variable sized allocations), the prologue and epilogue previously used to look like this: push {r4-r5, r11, lr} add r11, sp, #8 ... sub r4, r11, #8 mov sp, r4 pop {r4-r5, r11, pc} This is problematic, because this unwinding operation (restoring sp from r11 - offset) can't be expressed with the SEH unwind opcodes (probably because this unwind procedure doesn't map exactly to individual instructions; note the detour via r4 in the epilogue too). To make unwinding work, the GPR push is split into two; the first one pushing all other registers, and the second one pushing r11+lr, so that r11 can be set pointing at this spot on the stack: push {r4-r5} push {r11, lr} mov r11, sp ... mov sp, r11 pop {r11, lr} pop {r4-r5} bx lr For the same setup, MSVC generates code that uses two registers; r11 still pointing at the {r11,lr} pair, but a separate register used for restoring the stack at the end: push {r4-r5, r7, r11, lr} add r11, sp, #12 mov r7, sp ... mov sp, r7 pop {r4-r5, r7, r11, pc} For cases with clobbered float/vector registers, they are pushed after the GPRs, before the {r11,lr} pair. Differential Revision: https://reviews.llvm.org/D125649 --- llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 2 + llvm/lib/Target/ARM/ARMBaseRegisterInfo.h | 27 +++ llvm/lib/Target/ARM/ARMCallingConv.td | 4 + llvm/lib/Target/ARM/ARMFrameLowering.cpp | 224 ++++++++++++------ llvm/lib/Target/ARM/ARMSubtarget.cpp | 10 + llvm/lib/Target/ARM/ARMSubtarget.h | 2 + .../CodeGen/ARM/Windows/wineh-framepointer.ll | 62 +++++ 7 files changed, 265 insertions(+), 66 deletions(-) create mode 100644 llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index c543d02ff75aa..cae72e465c7bf 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -73,6 +73,8 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // GHC set of callee saved regs is empty as all those regs are // used for passing STG regs around return CSR_NoRegs_SaveList; + } else if (STI.splitFramePointerPush(*MF)) { + return CSR_Win_SplitFP_SaveList; } else if (F.getCallingConv() == CallingConv::CFGuard_Check) { return CSR_Win_AAPCS_CFGuard_Check_SaveList; } else if (F.getCallingConv() == CallingConv::SwiftTail) { diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 7ee9bd56452ee..73ed300ccff46 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -71,6 +71,33 @@ static inline bool isARMArea2Register(unsigned Reg, bool SplitFramePushPop) { } } +static inline bool isSplitFPArea1Register(unsigned Reg, + bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R0: case R1: case R2: case R3: + case R4: case R5: case R6: case R7: + case R8: case R9: case R10: case R12: + case SP: case PC: + return true; + default: + return false; + } +} + +static inline bool isSplitFPArea2Register(unsigned Reg, + bool SplitFramePushPop) { + using namespace ARM; + + switch (Reg) { + case R11: case LR: + return true; + default: + return false; + } +} + static inline bool isARMArea3Register(unsigned Reg, bool SplitFramePushPop) { using namespace ARM; diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index a6dbe563a4ab7..45b9e482fc434 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -289,6 +289,10 @@ def CSR_AAPCS_SplitPush : CalleeSavedRegs<(add LR, R7, R6, R5, R4, R11, R10, R9, R8, (sequence "D%u", 15, 8))>; +def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, + (sequence "D%u", 15, 8), + LR, R11)>; + // R8 is used to pass swifterror, remove it from CSR. def CSR_AAPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS_SplitPush, R8)>; diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 71801f420217a..0916df31ac956 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -300,7 +300,6 @@ static MachineBasicBlock::iterator insertSEH(MachineBasicBlock::iterator MBBI, break; case ARM::t2ADDri: // add.w r11, sp, #xx case ARM::t2ADDri12: // add.w r11, sp, #xx - case ARM::t2SUBri: // sub.w r4, r11, #xx case ARM::t2MOVTi16: // movt r4, #xx case ARM::t2MOVi16: // movw r4, #xx case ARM::tBL: // bl __chkstk @@ -633,15 +632,23 @@ static void emitAligningInstructions(MachineFunction &MF, ARMFunctionInfo *AFI, /// Unfortunately we cannot determine this value in determineCalleeSaves() yet /// as assignCalleeSavedSpillSlots() hasn't run at this point. Instead we use /// this to produce a conservative estimate that we check in an assert() later. -static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI) { +static int getMaxFPOffset(const ARMSubtarget &STI, const ARMFunctionInfo &AFI, + const MachineFunction &MF) { // For Thumb1, push.w isn't available, so the first push will always push // r7 and lr onto the stack first. if (AFI.isThumb1OnlyFunction()) return -AFI.getArgRegsSaveSize() - (2 * 4); // This is a conservative estimation: Assume the frame pointer being r7 and // pc("r15") up to r8 getting spilled before (= 8 registers). - int FPCXTSaveSize = (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; - return - FPCXTSaveSize - AFI.getArgRegsSaveSize() - (8 * 4); + int MaxRegBytes = 8 * 4; + if (STI.splitFramePointerPush(MF)) { + // Here, r11 can be stored below all of r4-r15 (3 registers more than + // above), plus d8-d15. + MaxRegBytes = 11 * 4 + 8 * 8; + } + int FPCXTSaveSize = + (STI.hasV8_1MMainlineOps() && AFI.isCmseNSEntryFunction()) ? 4 : 0; + return -FPCXTSaveSize - AFI.getArgRegsSaveSize() - MaxRegBytes; } void ARMFrameLowering::emitPrologue(MachineFunction &MF, @@ -704,42 +711,80 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } // Determine spill area sizes. - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) { + if (STI.splitFramePointerPush(MF)) { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R11: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; GPRCS2Size += 4; break; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R12: + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; + } + } + } else { + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) { + GPRCS2Size += 4; + break; + } + LLVM_FALLTHROUGH; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + if (Reg == FramePtr) + FramePtrSpillFI = FI; + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) + DPRCSSize += 8; } - LLVM_FALLTHROUGH; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - GPRCS1Size += 4; - break; - case ARM::FPCXTNS: - FPCXTSaveSize = 4; - break; - default: - // This is a DPR. Exclude the aligned DPRCS2 spills. - if (Reg == ARM::D8) - D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) - DPRCSSize += 8; } } @@ -774,15 +819,23 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4); - unsigned DPRGapSize = - (GPRCS1Size + GPRCS2Size + FPCXTSaveSize + ArgRegsSaveSize) % - DPRAlign.value(); + unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize; + if (!STI.splitFramePointerPush(MF)) { + DPRGapSize += GPRCS2Size; + } + DPRGapSize %= DPRAlign.value(); - unsigned DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; + unsigned DPRCSOffset; + if (STI.splitFramePointerPush(MF)) { + DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize; + GPRCS2Offset = DPRCSOffset - GPRCS2Size; + } else { + DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; + } int FramePtrOffsetInPush = 0; if (HasFP) { int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); - assert(getMaxFPOffset(STI, *AFI) <= FPOffset && + assert(getMaxFPOffset(STI, *AFI, MF) <= FPOffset && "Max FP estimation is wrong"); FramePtrOffsetInPush = FPOffset + ArgRegsSaveSize + FPCXTSaveSize; AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + @@ -793,7 +846,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); // Move past area 2. - if (GPRCS2Size > 0) { + if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) { GPRCS2Push = LastPush = MBBI++; DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); } @@ -833,6 +886,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } else NumBytes = DPRCSOffset; + if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) { + GPRCS2Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + } + + bool NeedsWinCFIStackAlloc = NeedsWinCFI; + if (STI.splitFramePointerPush(MF) && HasFP) + NeedsWinCFIStackAlloc = false; + if (STI.isTargetWindows() && WindowsRequiresStackProbe(MF, NumBytes)) { uint32_t NumWords = NumBytes >> 2; @@ -888,7 +950,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, .setMIFlags(MachineInstr::FrameSetup) .add(predOps(ARMCC::AL)) .add(condCodeOp()); - if (NeedsWinCFI) { + if (NeedsWinCFIStackAlloc) { SEH = BuildMI(MF, dl, TII.get(ARM::SEH_StackAlloc)) .addImm(NumBytes) .addImm(/*Wide=*/1) @@ -927,13 +989,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // into spill area 1, including the FP in R11. In either case, it // is in area one and the adjustment needs to take place just after // that push. + MachineBasicBlock::iterator AfterPush; if (HasFP) { - MachineBasicBlock::iterator AfterPush = std::next(GPRCS1Push); + AfterPush = std::next(GPRCS1Push); unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); - emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, - dl, TII, FramePtr, ARM::SP, - PushSize + FramePtrOffsetInPush, - MachineInstr::FrameSetup); + int FPOffset = PushSize + FramePtrOffsetInPush; + if (STI.splitFramePointerPush(MF)) { + AfterPush = std::next(GPRCS2Push); + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); + } else { + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, + FramePtr, ARM::SP, FPOffset, + MachineInstr::FrameSetup); + } if (!NeedsWinCFI) { if (FramePtrOffsetInPush + PushSize != 0) { unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( @@ -956,8 +1025,11 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Emit a SEH opcode indicating the prologue end. The rest of the prologue // instructions below don't need to be replayed to unwind the stack. if (NeedsWinCFI && MBBI != MBB.begin()) { - insertSEHRange(MBB, {}, MBBI, TII, MachineInstr::FrameSetup); - BuildMI(MBB, MBBI, dl, TII.get(ARM::SEH_PrologEnd)) + MachineBasicBlock::iterator End = MBBI; + if (HasFP && STI.splitFramePointerPush(MF)) + End = AfterPush; + insertSEHRange(MBB, {}, End, TII, MachineInstr::FrameSetup); + BuildMI(MBB, End, dl, TII.get(ARM::SEH_PrologEnd)) .setMIFlag(MachineInstr::FrameSetup); MF.setHasWinCFI(true); } @@ -1483,7 +1555,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, continue; if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && !isCmseEntry && !isTrap && AFI->getArgumentStackToRestore() == 0 && - STI.hasV5TOps() && MBB.succ_empty() && !hasPAC) { + STI.hasV5TOps() && MBB.succ_empty() && !hasPAC && + !STI.splitFramePointerPush(MF)) { Reg = ARM::PC; // Fold the return instruction into the LDM. DeleteRet = true; @@ -1847,12 +1920,21 @@ bool ARMFrameLowering::spillCalleeSavedRegisters( .addImm(-4) .add(predOps(ARMCC::AL)); } - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, 0, - MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, - MachineInstr::FrameSetup); - emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, - NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + if (STI.splitFramePointerPush(MF)) { + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, + &isSplitFPArea1Register, 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, + &isSplitFPArea2Register, 0, MachineInstr::FrameSetup); + } else { + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register, + 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, + 0, MachineInstr::FrameSetup); + emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, + NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); + } // The code above does not insert spill code for the aligned DPRCS2 registers. // The stack realignment code will be inserted between the push instructions @@ -1880,14 +1962,24 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters( emitAlignedDPRCS2Restores(MBB, MI, NumAlignedDPRCS2Regs, CSI, TRI); unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; - unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST :ARM::LDR_POST_IMM; + unsigned LdrOpc = + AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; unsigned FltOpc = ARM::VLDMDIA_UPD; - emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, - NumAlignedDPRCS2Regs); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea2Register, 0); - emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, - &isARMArea1Register, 0); + if (STI.splitFramePointerPush(MF)) { + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isSplitFPArea2Register, 0); + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isSplitFPArea1Register, 0); + } else { + emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, + NumAlignedDPRCS2Regs); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea2Register, 0); + emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, + &isARMArea1Register, 0); + } return true; } @@ -2287,7 +2379,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, // // We could do slightly better on Thumb1; in some cases, an sp-relative // offset would be legal even though an fp-relative offset is not. - int MaxFPOffset = getMaxFPOffset(STI, *AFI); + int MaxFPOffset = getMaxFPOffset(STI, *AFI, MF); bool HasLargeArgumentList = HasFP && (MaxFixedOffset - MaxFPOffset) > (int)EstimatedRSFixedSizeLimit; diff --git a/llvm/lib/Target/ARM/ARMSubtarget.cpp b/llvm/lib/Target/ARM/ARMSubtarget.cpp index 89e5b8762d80f..5b48292d8baf7 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.cpp +++ b/llvm/lib/Target/ARM/ARMSubtarget.cpp @@ -27,6 +27,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalValue.h" @@ -491,3 +492,12 @@ bool ARMSubtarget::ignoreCSRForAllocationOrder(const MachineFunction &MF, return isThumb2() && MF.getFunction().hasMinSize() && ARM::GPRRegClass.contains(PhysReg); } + +bool ARMSubtarget::splitFramePointerPush(const MachineFunction &MF) const { + const Function &F = MF.getFunction(); + if (!MF.getTarget().getMCAsmInfo()->usesWindowsCFI() || + !F.needsUnwindTableEntry()) + return false; + const MachineFrameInfo &MFI = MF.getFrameInfo(); + return MFI.hasVarSizedObjects() || getRegisterInfo()->hasStackRealignment(MF); +} diff --git a/llvm/lib/Target/ARM/ARMSubtarget.h b/llvm/lib/Target/ARM/ARMSubtarget.h index 8d56c70e80949..3f978f51a1d9e 100644 --- a/llvm/lib/Target/ARM/ARMSubtarget.h +++ b/llvm/lib/Target/ARM/ARMSubtarget.h @@ -447,6 +447,8 @@ class ARMSubtarget : public ARMGenSubtargetInfo { isThumb1Only(); } + bool splitFramePointerPush(const MachineFunction &MF) const; + bool useStride4VFPs() const; bool useMovt() const; diff --git a/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll b/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll new file mode 100644 index 0000000000000..6b9a8a9b05209 --- /dev/null +++ b/llvm/test/CodeGen/ARM/Windows/wineh-framepointer.ll @@ -0,0 +1,62 @@ +;; Check that this produces the expected assembly output +; RUN: llc -mtriple=thumbv7-windows -o - %s -verify-machineinstrs | FileCheck %s +;; Also try to write an object file, which verifies that the SEH opcodes +;; match the actual prologue/epilogue length. +; RUN: llc -mtriple=thumbv7-windows -filetype=obj -o %t.obj %s -verify-machineinstrs + +; CHECK-LABEL: alloc_local: +; CHECK-NEXT: .seh_proc alloc_local +; CHECK-NEXT: @ %bb.0: @ %entry +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10} +; CHECK-NEXT: .seh_save_regs_w {r4-r10} +; CHECK-NEXT: sub sp, #4 +; CHECK-NEXT: .seh_stackalloc 4 +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .seh_save_fregs {d8-d15} +; CHECK-NEXT: push.w {r11, lr} +; CHECK-NEXT: .seh_save_regs_w {r11, lr} +; CHECK-NEXT: mov r11, sp +; CHECK-NEXT: .seh_save_sp r11 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: movw r4, #1256 +; CHECK-NEXT: bl __chkstk +; CHECK-NEXT: sub.w sp, sp, r4 +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: bfc r4, #0, #4 +; CHECK-NEXT: mov sp, r4 + +; CHECK: ldr.w [[TMP:r[0-9]]], [r11, #104] +; CHECK: mov r0, [[TMP]] + +; CHECK: .seh_startepilogue +; CHECK-NEXT: mov sp, r11 +; CHECK-NEXT: .seh_save_sp r11 +; CHECK-NEXT: pop.w {r11, lr} +; CHECK-NEXT: .seh_save_regs_w {r11, lr} +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: .seh_save_fregs {d8-d15} +; CHECK-NEXT: add sp, #4 +; CHECK-NEXT: .seh_stackalloc 4 +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10} +; CHECK-NEXT: .seh_save_regs_w {r4-r10} +; CHECK-NEXT: bx lr +; CHECK-NEXT: .seh_nop +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: .seh_endproc + +define arm_aapcs_vfpcc void @alloc_local(i32 noundef %a, i32 noundef %b, i32 noundef %c, i32 noundef %d, i32 noundef %e) uwtable { +entry: + %buf2 = alloca [5000 x i8], align 16 + %vla = alloca i8, i32 %a, align 1 + call void @llvm.lifetime.start.p0(i64 5000, ptr nonnull %buf2) #3 + call arm_aapcs_vfpcc void @other(i32 noundef %e, ptr noundef nonnull %vla, ptr noundef nonnull %buf2) + call void asm sideeffect "", "~{r4},~{r5},~{r6},~{r7},~{r8},~{r9},~{r10},~{r11},~{r12}"() + call void asm sideeffect "", "~{d8},~{d9},~{d10},~{d11},~{d12},~{d13},~{d14},~{d15}"() + call void @llvm.lifetime.end.p0(i64 5000, ptr nonnull %buf2) #3 + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) + +declare arm_aapcs_vfpcc void @other(i32 noundef, ptr noundef, ptr noundef)