diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 0225598cbbe8a..c13b886ff08bd 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -957,6 +957,22 @@ def ARMInterrupt : InheritableAttr, TargetSpecificAttr { let Documentation = [ARMInterruptDocs]; } +def ARMInterruptSaveFP : InheritableAttr, TargetSpecificAttr { + let Spellings = [GNU<"interrupt_save_fp">]; + let Args = [EnumArgument<"Interrupt", "InterruptType", /*is_string=*/true, + ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", ""], + ["IRQ", "FIQ", "SWI", "ABORT", "UNDEF", "Generic"], + 1>]; + let HasCustomParsing = 0; + let Documentation = [ARMInterruptSaveFPDocs]; +} + +def ARMSaveFP : InheritableAttr, TargetSpecificAttr { + let Spellings = []; + let Subjects = SubjectList<[Function]>; + let Documentation = [InternalOnly]; +} + def AVRInterrupt : InheritableAttr, TargetSpecificAttr { let Spellings = [GCC<"interrupt">]; let Subjects = SubjectList<[Function]>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 8e6faabfae647..aa7aca096b561 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2239,6 +2239,19 @@ The semantics are as follows: }]; } +def ARMInterruptSaveFPDocs : Documentation { + let Category = DocCatFunction; + let Heading = "interrupt_save_fp (ARM)"; + let Content = [{ +Clang supports the GNU style ``__attribute__((interrupt_save_fp("TYPE")))`` +on ARM targets. This attribute behaves the same way as the ARM interrupt +attribute, except the general purpose floating point registers are also saved, +along with FPEXC and FPSCR. Note, even on M-class CPUs, where the floating +point context can be automatically saved depending on the FPCCR, the general +purpose floating point registers will be saved. + }]; +} + def BPFPreserveAccessIndexDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 9a0bae9c216de..f3e2f07b873d3 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -337,7 +337,14 @@ def warn_anyx86_excessive_regsave : Warning< " or be compiled with '-mgeneral-regs-only'">, InGroup>; def warn_arm_interrupt_calling_convention : Warning< - "call to function without interrupt attribute could clobber interruptee's VFP registers">, + "call to function without interrupt attribute could clobber interruptee's " + "VFP registers; consider using the `interrupt_save_fp` attribute to prevent " + "this behavior">, + InGroup; +def warn_arm_interrupt_save_fp_without_vfp_unit : Warning< + "`interrupt_save_fp` only applies to targets that have a VFP unit enabled " + "for this compilation; this will be treated as a regular `interrupt` " + "attribute">, InGroup; def warn_interrupt_attribute_invalid : Warning< "%select{MIPS|MSP430|RISC-V}0 'interrupt' attribute only applies to " diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp index 885d9c77d0e76..a45858e2e2e58 100644 --- a/clang/lib/CodeGen/Targets/ARM.cpp +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -185,6 +185,12 @@ class ARMTargetCodeGenInfo : public TargetCodeGenInfo { Fn->addFnAttr("interrupt", Kind); + // Note: the ARMSaveFPAttr can only exist if we also have an interrupt + // attribute + const ARMSaveFPAttr *SaveFPAttr = FD->getAttr(); + if (SaveFPAttr) + Fn->addFnAttr("save-fp"); + ARMABIKind ABI = getABIInfo().getABIKind(); if (ABI == ARMABIKind::APCS) return; diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 363ae93cb62df..931c98a790b63 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -7524,6 +7524,20 @@ static void handleARMInterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { D->addAttr(::new (S.Context) ARMInterruptAttr(S.Context, AL, Kind)); } +static void handleARMInterruptSaveFPAttr(Sema &S, Decl *D, + const ParsedAttr &AL) { + handleARMInterruptAttr(S, D, AL); + + bool VFP = S.Context.getTargetInfo().hasFeature("vfp"); + + if (!VFP) { + S.Diag(D->getLocation(), diag::warn_arm_interrupt_save_fp_without_vfp_unit); + return; + } + + D->addAttr(::new (S.Context) ARMSaveFPAttr(S.Context, AL)); +} + static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) { // MSP430 'interrupt' attribute is applied to // a function with no parameters and void return type. @@ -9134,9 +9148,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, if (AL.isCXX11Attribute() && !Options.IncludeCXX11Attributes) return; - // Unknown attributes are automatically warned on. Target-specific attributes - // which do not apply to the current target architecture are treated as - // though they were unknown attributes. + // Unknown attributes are automatically warned on. Target-specific + // attributes which do not apply to the current target architecture are + // treated as though they were unknown attributes. if (AL.getKind() == ParsedAttr::UnknownAttribute || !AL.existsInTarget(S.Context.getTargetInfo())) { S.Diag(AL.getLoc(), @@ -9241,6 +9255,9 @@ ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, const ParsedAttr &AL, case ParsedAttr::AT_Interrupt: handleInterruptAttr(S, D, AL); break; + case ParsedAttr::AT_ARMInterruptSaveFP: + handleARMInterruptSaveFPAttr(S, D, AL); + break; case ParsedAttr::AT_X86ForceAlignArgPointer: handleX86ForceAlignArgPointerAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index b1322f30fa6b6..9bd2a2072f939 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -6939,7 +6939,8 @@ ExprResult Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, // no_caller_saved_registers since there is no efficient way to // save and restore the non-GPR state. if (auto *Caller = getCurFunctionDecl()) { - if (Caller->hasAttr()) { + if (Caller->hasAttr() && + !Caller->hasAttr()) { bool VFP = Context.getTargetInfo().hasFeature("vfp"); if (VFP && (!FDecl || !FDecl->hasAttr())) { Diag(Fn->getExprLoc(), diag::warn_arm_interrupt_calling_convention); diff --git a/clang/test/CodeGen/arm-interrupt-save-fp-attr-status-regs.c b/clang/test/CodeGen/arm-interrupt-save-fp-attr-status-regs.c new file mode 100644 index 0000000000000..457f725f8d3af --- /dev/null +++ b/clang/test/CodeGen/arm-interrupt-save-fp-attr-status-regs.c @@ -0,0 +1,34 @@ +// REQUIRES: arm-registered-target +// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -marm -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-R +// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r5 -mfpu=vfpv3-d16 -mthumb -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-R +// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -marm -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-R +// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-r4 -mfpu=vfpv3-d16 -mthumb -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-R +// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-M +// RUN: %clang -target arm-none-none-eabihf -mcpu=cortex-m33 -mfpu=fpv5-sp-d16 -S -o - %s \ +// RUN: | FileCheck %s --check-prefix=CHECK-M + +void bar(); + +__attribute__((interrupt_save_fp)) void test_generic_interrupt() { + // CHECK-R: vmrs r4, fpscr + // CHECK-R-NEXT: vmrs r5, fpexc + // CHECK-R-NEXT: .save {fpscr, fpexc} + // CHECK-R-NEXT: push {r4, r5} + // ..... + // CHECK-R: pop {r4, r5} + // CHECK-R-NEXT: vmsr fpscr, r4 + // CHECK-R-NEXT: vmsr fpexc, r5 + + // CHECK-M: vmrs r4, fpscr + // CHECK-M-NEXT: .save {fpscr} + // CHECK-M-NEXT: push {r4} + // ..... + // CHECK-M: pop {r4} + // CHECK-M-NEXT: vmsr fpscr, r4 + bar(); +} diff --git a/clang/test/CodeGen/arm-interrupt-save-fp-attr.c b/clang/test/CodeGen/arm-interrupt-save-fp-attr.c new file mode 100644 index 0000000000000..5db8b3daa7212 --- /dev/null +++ b/clang/test/CodeGen/arm-interrupt-save-fp-attr.c @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple thumb-apple-darwin -target-abi aapcs -target-feature +vfp4 -target-cpu cortex-m3 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple arm-apple-darwin -target-abi apcs-gnu -target-feature +vfp4 -emit-llvm -o - %s | FileCheck %s --check-prefix=CHECK-APCS + +__attribute__((interrupt_save_fp)) void test_generic_interrupt() { + // CHECK: define{{.*}} arm_aapcscc void @test_generic_interrupt() [[GENERIC_ATTR:#[0-9]+]] + + // CHECK-APCS: define{{.*}} void @test_generic_interrupt() [[GENERIC_ATTR:#[0-9]+]] +} + +__attribute__((interrupt_save_fp("IRQ"))) void test_irq_interrupt() { + // CHECK: define{{.*}} arm_aapcscc void @test_irq_interrupt() [[IRQ_ATTR:#[0-9]+]] +} + +__attribute__((interrupt_save_fp("FIQ"))) void test_fiq_interrupt() { + // CHECK: define{{.*}} arm_aapcscc void @test_fiq_interrupt() [[FIQ_ATTR:#[0-9]+]] +} + +__attribute__((interrupt_save_fp("SWI"))) void test_swi_interrupt() { + // CHECK: define{{.*}} arm_aapcscc void @test_swi_interrupt() [[SWI_ATTR:#[0-9]+]] +} + +__attribute__((interrupt_save_fp("ABORT"))) void test_abort_interrupt() { + // CHECK: define{{.*}} arm_aapcscc void @test_abort_interrupt() [[ABORT_ATTR:#[0-9]+]] +} + + +__attribute__((interrupt_save_fp("UNDEF"))) void test_undef_interrupt() { + // CHECK: define{{.*}} arm_aapcscc void @test_undef_interrupt() [[UNDEF_ATTR:#[0-9]+]] +} + + +// CHECK: attributes [[GENERIC_ATTR]] = { {{.*}} {{"interrupt"[^=]}}{{.*}} "save-fp" +// CHECK: attributes [[IRQ_ATTR]] = { {{.*}} "interrupt"="IRQ" {{.*}} "save-fp" +// CHECK: attributes [[FIQ_ATTR]] = { {{.*}} "interrupt"="FIQ" {{.*}} "save-fp" +// CHECK: attributes [[SWI_ATTR]] = { {{.*}} "interrupt"="SWI" {{.*}} "save-fp" +// CHECK: attributes [[ABORT_ATTR]] = { {{.*}} "interrupt"="ABORT" {{.*}} "save-fp" +// CHECK: attributes [[UNDEF_ATTR]] = { {{.*}} "interrupt"="UNDEF" {{.*}} "save-fp" + +// CHECK-APCS: attributes [[GENERIC_ATTR]] = { {{.*}} "interrupt" {{.*}} "save-fp" \ No newline at end of file diff --git a/clang/test/Sema/arm-interrupt-attr.c b/clang/test/Sema/arm-interrupt-attr.c index 3537fba8521ad..937fd929483da 100644 --- a/clang/test/Sema/arm-interrupt-attr.c +++ b/clang/test/Sema/arm-interrupt-attr.c @@ -31,13 +31,13 @@ void caller1(void) { #ifndef SOFT __attribute__((interrupt("IRQ"))) void caller2(void) { - callee1(); // expected-warning {{call to function without interrupt attribute could clobber interruptee's VFP registers}} + callee1(); // expected-warning {{call to function without interrupt attribute could clobber interruptee's VFP registers; consider using the `interrupt_save_fp` attribute to prevent this behavior}} callee2(); } void (*callee3)(void); __attribute__((interrupt("IRQ"))) void caller3(void) { - callee3(); // expected-warning {{call to function without interrupt attribute could clobber interruptee's VFP registers}} + callee3(); // expected-warning {{call to function without interrupt attribute could clobber interruptee's VFP registers; consider using the `interrupt_save_fp` attribute to prevent this behavior}} } #else __attribute__((interrupt("IRQ"))) void caller2(void) { diff --git a/clang/test/Sema/arm-interrupt-save-fp-attr.c b/clang/test/Sema/arm-interrupt-save-fp-attr.c new file mode 100644 index 0000000000000..e0fd4e2c4d128 --- /dev/null +++ b/clang/test/Sema/arm-interrupt-save-fp-attr.c @@ -0,0 +1,59 @@ +// RUN: %clang_cc1 %s -triple arm-apple-darwin -target-feature +vfp2 -verify -fsyntax-only +// RUN: %clang_cc1 %s -triple thumb-apple-darwin -target-feature +vfp3 -verify -fsyntax-only +// RUN: %clang_cc1 %s -triple armeb-none-eabi -target-feature +vfp4 -verify -fsyntax-only +// RUN: %clang_cc1 %s -triple thumbeb-none-eabi -target-feature +neon -verify -fsyntax-only +// RUN: %clang_cc1 %s -triple thumbeb-none-eabi -target-feature +neon -target-feature +soft-float -DSOFT -verify -fsyntax-only + +#ifndef SOFT +__attribute__((interrupt_save_fp(IRQ))) void foo() {} // expected-error {{'interrupt_save_fp' attribute requires a string}} +__attribute__((interrupt_save_fp("irq"))) void foo1() {} // expected-warning {{'interrupt_save_fp' attribute argument not supported: irq}} + +__attribute__((interrupt_save_fp("IRQ", 1))) void foo2() {} // expected-error {{'interrupt_save_fp' attribute takes no more than 1 argument}} +__attribute__((interrupt_save_fp("IRQ"))) void foo3() {} +__attribute__((interrupt_save_fp("FIQ"))) void foo4() {} +__attribute__((interrupt_save_fp("SWI"))) void foo5() {} +__attribute__((interrupt_save_fp("ABORT"))) void foo6() {} +__attribute__((interrupt_save_fp("UNDEF"))) void foo7() {} +__attribute__((interrupt_save_fp)) void foo8() {} +__attribute__((interrupt_save_fp())) void foo9() {} +__attribute__((interrupt_save_fp(""))) void foo10() {} +void callee1(); +__attribute__((interrupt_save_fp("IRQ"))) void callee2(); +void caller1() { + callee1(); + callee2(); +} +__attribute__((interrupt_save_fp("IRQ"))) void caller2() { + callee1(); + callee2(); +} + +void (*callee3)(); +__attribute__((interrupt_save_fp("IRQ"))) void caller3() { + callee3(); +} +#else +__attribute__((interrupt_save_fp("IRQ"))) void foo3() {} // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +__attribute__((interrupt_save_fp("FIQ"))) void foo4() {} // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +__attribute__((interrupt_save_fp("SWI"))) void foo5() {} // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +__attribute__((interrupt_save_fp("ABORT"))) void foo6() {} // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +__attribute__((interrupt_save_fp("UNDEF"))) void foo7() {} // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +__attribute__((interrupt_save_fp)) void foo8() {} // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +__attribute__((interrupt_save_fp())) void foo9() {} // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +__attribute__((interrupt_save_fp(""))) void foo10() {} // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +void callee1(); +__attribute__((interrupt_save_fp("IRQ"))) void callee2(); // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} +void caller1() { + callee1(); + callee2(); +} +__attribute__((interrupt_save_fp("IRQ"))) void caller2() { // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} + callee1(); + callee2(); +} + +void (*callee3)(); +__attribute__((interrupt_save_fp("IRQ"))) void caller3() { // expected-warning {{`interrupt_save_fp` only applies to targets that have a VFP unit enabled for this compilation; this will be treated as a regular `interrupt` attribute}} + callee3(); +} +#endif \ No newline at end of file diff --git a/llvm/include/llvm/IR/IntrinsicsARM.td b/llvm/include/llvm/IR/IntrinsicsARM.td index 11b9877091a8e..91f9d40f2e05f 100644 --- a/llvm/include/llvm/IR/IntrinsicsARM.td +++ b/llvm/include/llvm/IR/IntrinsicsARM.td @@ -311,7 +311,7 @@ def int_arm_isb : ClangBuiltin<"__builtin_arm_isb">, MSBuiltin<"__isb">, // VFP def int_arm_get_fpscr : ClangBuiltin<"__builtin_arm_get_fpscr">, - DefaultAttrsIntrinsic<[llvm_i32_ty], [], []>; + DefaultAttrsIntrinsic<[llvm_i32_ty], [], [IntrReadMem]>; def int_arm_set_fpscr : ClangBuiltin<"__builtin_arm_set_fpscr">, DefaultAttrsIntrinsic<[], [llvm_i32_ty], []>; def int_arm_vcvtr : DefaultAttrsIntrinsic<[llvm_float_ty], diff --git a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 642739a29d6b0..353f5d1a57540 100644 --- a/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -1207,6 +1207,14 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { SrcReg = ~0U; DstReg = MI->getOperand(0).getReg(); break; + case ARM::VMRS: + SrcReg = ARM::FPSCR; + DstReg = MI->getOperand(0).getReg(); + break; + case ARM::VMRS_FPEXC: + SrcReg = ARM::FPEXC; + DstReg = MI->getOperand(0).getReg(); + break; default: SrcReg = MI->getOperand(1).getReg(); DstReg = MI->getOperand(0).getReg(); @@ -1369,6 +1377,13 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { // correct ".save" later. AFI->EHPrologueRemappedRegs[DstReg] = SrcReg; break; + case ARM::VMRS: + case ARM::VMRS_FPEXC: + // If a function spills FPSCR or FPEXC, we copy the values to low + // registers before pushing them. Record the copy so we can emit the + // correct ".save" later. + AFI->EHPrologueRemappedRegs[DstReg] = SrcReg; + break; case ARM::tLDRpci: { // Grab the constpool index and check, whether it corresponds to // original or cloned constpool entry. diff --git a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 9adf758b46c48..ed774ea185f85 100644 --- a/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -79,11 +79,35 @@ ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { : (UseSplitPush ? CSR_ATPCS_SplitPush_SwiftTail_SaveList : CSR_AAPCS_SwiftTail_SaveList); } else if (F.hasFnAttribute("interrupt")) { + + // Don't bother saving the floating point registers if target is not hard + // float. This will prevent the Thumb1FrameLowering (cortex-m0) from + // crashing due to an llvm_unreachable being triggered when a D-class + // register is in the calling convention. + if (STI.isTargetHardFloat() && F.hasFnAttribute("save-fp")) { + bool HasNEON = STI.hasNEON(); + + if (STI.isMClass()) { + assert(!HasNEON && "NEON is only for Cortex-R/A"); + return UseSplitPush ? CSR_ATPCS_SplitPush_FP_SaveList + : CSR_AAPCS_FP_SaveList; + } + if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { + return HasNEON ? CSR_FIQ_FP_NEON_SaveList : CSR_FIQ_FP_SaveList; + } + return HasNEON ? CSR_GenericInt_FP_NEON_SaveList + : CSR_GenericInt_FP_SaveList; + } + if (STI.isMClass()) { // M-class CPUs have hardware which saves the registers needed to allow a // function conforming to the AAPCS to function as a handler. + // Additionally, M Class has hardware support for saving VFP registers, + // but the option can be disabled return UseSplitPush ? CSR_ATPCS_SplitPush_SaveList : CSR_AAPCS_SaveList; - } else if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { + } + + if (F.getFnAttribute("interrupt").getValueAsString() == "FIQ") { // Fast interrupt mode gives the handler a private copy of R8-R14, so less // need to be saved to restore user-mode state. return CSR_FIQ_SaveList; diff --git a/llvm/lib/Target/ARM/ARMCallingConv.td b/llvm/lib/Target/ARM/ARMCallingConv.td index d14424c2decac..18bb0c6571cf3 100644 --- a/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/llvm/lib/Target/ARM/ARMCallingConv.td @@ -267,19 +267,14 @@ def CC_ARM_Win32_CFGuard_Check : CallingConv<[ def CSR_NoRegs : CalleeSavedRegs<(add)>; def CSR_FPRegs : CalleeSavedRegs<(add (sequence "D%u", 0, 31))>; +def CSR_FP_Interrupt_Regs : CalleeSavedRegs<(add FPSCR, FPEXC, (sequence "D%u", 15, 0))>; +def CSR_FP_NEON_Interrupt_Regs : CalleeSavedRegs<(add CSR_FP_Interrupt_Regs, + (sequence "D%u", 31, 16))>; + def CSR_AAPCS : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8))>; -// The Windows Control Flow Guard Check function preserves the same registers as -// AAPCS, and also preserves all floating point registers. -def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, - R6, R5, R4, (sequence "D%u", 15, 0))>; - -// R8 is used to pass swifterror, remove it from CSR. -def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>; - -// R10 is used to pass swiftself, remove it from CSR. -def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>; +def CSR_AAPCS_FP : CalleeSavedRegs<(add CSR_AAPCS, CSR_FP_Interrupt_Regs)>; // The order of callee-saved registers needs to match the order we actually push // them in FrameLowering, because this order is what's used by @@ -293,6 +288,21 @@ def CSR_Win_SplitFP : CalleeSavedRegs<(add R10, R9, R8, R7, R6, R5, R4, (sequence "D%u", 15, 8), LR, R11)>; +def CSR_ATPCS_SplitPush_FP : CalleeSavedRegs<(add CSR_ATPCS_SplitPush, + CSR_FP_Interrupt_Regs)>; + +// The Windows Control Flow Guard Check function preserves the same registers as +// AAPCS, and also preserves all floating point registers. +def CSR_Win_AAPCS_CFGuard_Check : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, + R6, R5, R4, (sequence "D%u", 15, 0))>; + +// R8 is used to pass swifterror, remove it from CSR. +def CSR_AAPCS_SwiftError : CalleeSavedRegs<(sub CSR_AAPCS, R8)>; + +// R10 is used to pass swiftself, remove it from CSR. +def CSR_AAPCS_SwiftTail : CalleeSavedRegs<(sub CSR_AAPCS, R10)>; + + // R8 is used to pass swifterror, remove it from CSR. def CSR_ATPCS_SplitPush_SwiftError : CalleeSavedRegs<(sub CSR_ATPCS_SplitPush, R8)>; @@ -357,6 +367,13 @@ def CSR_iOS_CXX_TLS_ViaCopy : CalleeSavedRegs<(sub CSR_iOS_CXX_TLS, // generally does rather than tracking its liveness as a normal register. def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>; +def CSR_GenericInt_FP : CalleeSavedRegs<(add CSR_GenericInt, + CSR_FP_Interrupt_Regs)>; + +def CSR_GenericInt_FP_NEON : CalleeSavedRegs<(add CSR_GenericInt_FP, + CSR_FP_NEON_Interrupt_Regs)>; + + // The fast interrupt handlers have more private state and get their own copies // of R8-R12, in addition to SP and LR. As before, mark LR for saving too. @@ -365,4 +382,9 @@ def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>; // registers. def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>; +def CSR_FIQ_FP : CalleeSavedRegs<(add CSR_FIQ, CSR_FP_Interrupt_Regs)>; + +def CSR_FIQ_FP_NEON : CalleeSavedRegs<(add CSR_FIQ_FP, + CSR_FP_NEON_Interrupt_Regs)>; + diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 11496a6e032dd..8df316b83270a 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -759,7 +759,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Determine the sizes of each callee-save spill areas and record which frame // belongs to which callee-save spill areas. - unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; + unsigned GPRCS1Size = 0, GPRCS2Size = 0, FPStatusSize = 0, DPRCS1Size = 0, + GPRCS3Size = 0, DPRCS2Size = 0; int FramePtrSpillFI = 0; int D8SpillFI = 0; @@ -790,84 +791,67 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } // Determine spill area sizes. - if (STI.splitFramePointerPush(MF)) { - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R11: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - GPRCS2Size += 4; - break; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R12: - GPRCS1Size += 4; - break; - case ARM::FPCXTNS: - FPCXTSaveSize = 4; - break; - default: - // This is a DPR. Exclude the aligned DPRCS2 spills. - if (Reg == ARM::D8) - D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) - DPRCSSize += 8; + for (const CalleeSavedInfo &I : CSI) { + Register Reg = I.getReg(); + int FI = I.getFrameIdx(); + + if (Reg == FramePtr) { + FramePtrSpillFI = FI; + } + + if (STI.splitFramePointerPush(MF)) { + if (Reg == ARM::LR || Reg == ARM::R11) { + GPRCS3Size += 4; + continue; } } - } else { - for (const CalleeSavedInfo &I : CSI) { - Register Reg = I.getReg(); - int FI = I.getFrameIdx(); - switch (Reg) { - case ARM::R8: - case ARM::R9: - case ARM::R10: - case ARM::R11: - case ARM::R12: - if (STI.splitFramePushPop(MF)) { - GPRCS2Size += 4; - break; - } - [[fallthrough]]; - case ARM::R0: - case ARM::R1: - case ARM::R2: - case ARM::R3: - case ARM::R4: - case ARM::R5: - case ARM::R6: - case ARM::R7: - case ARM::LR: - if (Reg == FramePtr) - FramePtrSpillFI = FI; - GPRCS1Size += 4; - break; - case ARM::FPCXTNS: - FPCXTSaveSize = 4; + + switch (Reg) { + case ARM::R8: + case ARM::R9: + case ARM::R10: + case ARM::R11: + case ARM::R12: + if (STI.splitFramePushPop(MF)) { + GPRCS2Size += 4; break; - default: - // This is a DPR. Exclude the aligned DPRCS2 spills. - if (Reg == ARM::D8) - D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) - DPRCSSize += 8; + } + [[fallthrough]]; + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: + case ARM::R4: + case ARM::R5: + case ARM::R6: + case ARM::R7: + case ARM::LR: + case ARM::SP: + case ARM::PC: + GPRCS1Size += 4; + break; + case ARM::FPCXTNS: + FPCXTSaveSize = 4; + break; + case ARM::FPSCR: + case ARM::FPEXC: + FPStatusSize += 4; + break; + default: + // This is a DPR. Exclude the aligned DPRCS2 spills. + if (Reg == ARM::D8) + D8SpillFI = FI; + + if (ARM::D8 <= Reg && Reg < ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) { + DPRCS2Size += 8; + } else { + DPRCS1Size += 8; } } } - MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push; + MachineBasicBlock::iterator LastPush = MBB.end(), GPRCS1Push, GPRCS2Push, + GPRCS3Push; // Move past the PAC computation. if (AFI->shouldSignReturnAddress()) @@ -897,20 +881,15 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned FPCXTOffset = NumBytes - ArgRegsSaveSize - FPCXTSaveSize; unsigned GPRCS1Offset = FPCXTOffset - GPRCS1Size; unsigned GPRCS2Offset = GPRCS1Offset - GPRCS2Size; - Align DPRAlign = DPRCSSize ? std::min(Align(8), Alignment) : Align(4); - unsigned DPRGapSize = GPRCS1Size + FPCXTSaveSize + ArgRegsSaveSize; - if (!STI.splitFramePointerPush(MF)) { - DPRGapSize += GPRCS2Size; - } - DPRGapSize %= DPRAlign.value(); + unsigned FPStatusOffset = GPRCS2Offset - FPStatusSize; + + Align DPRAlign = DPRCS1Size ? std::min(Align(8), Alignment) : Align(4); + unsigned DPRGapSize = (ArgRegsSaveSize + FPCXTSaveSize + GPRCS1Size + + GPRCS2Size + FPStatusSize) % + DPRAlign.value(); + + unsigned DPRCS1Offset = FPStatusOffset - DPRGapSize - DPRCS1Size; - unsigned DPRCSOffset; - if (STI.splitFramePointerPush(MF)) { - DPRCSOffset = GPRCS1Offset - DPRGapSize - DPRCSSize; - GPRCS2Offset = DPRCSOffset - GPRCS2Size; - } else { - DPRCSOffset = GPRCS2Offset - DPRGapSize - DPRCSSize; - } int FramePtrOffsetInPush = 0; if (HasFP) { int FPOffset = MFI.getObjectOffset(FramePtrSpillFI); @@ -922,14 +901,28 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); - AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + AFI->setDPRCalleeSavedAreaOffset(DPRCS1Offset); // Move past area 2. - if (GPRCS2Size > 0 && !STI.splitFramePointerPush(MF)) { + if (GPRCS2Size > 0) { + assert(STI.splitFramePushPop(MF)); GPRCS2Push = LastPush = MBBI++; DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); } + // Move past FP status save area. + if (FPStatusSize > 0) { + while (MBBI != MBB.end()) { + unsigned Opc = MBBI->getOpcode(); + if (Opc == ARM::VMRS || Opc == ARM::VMRS_FPEXC) + MBBI++; + else + break; + } + LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, FPStatusSize); + } + // Prolog/epilog inserter assumes we correctly align DPRs on the stack, so our // .cfi_offset operations will reflect that. if (DPRGapSize) { @@ -945,7 +938,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } // Move past area 3. - if (DPRCSSize > 0) { + if (DPRCS1Size > 0) { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::VSTMDDB_UPD) { @@ -955,7 +948,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } // Move past the aligned DPRCS2 area. - if (AFI->getNumAlignedDPRCS2Regs() > 0) { + if (DPRCS2Size > 0) { MBBI = skipAlignedDPRCS2Spills(MBBI, AFI->getNumAlignedDPRCS2Regs()); // The code inserted by emitAlignedDPRCS2Spills realigns the stack, and // leaves the stack pointer pointing to the DPRCS2 area. @@ -963,11 +956,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, // Adjust NumBytes to represent the stack slots below the DPRCS2 area. NumBytes += MFI.getObjectOffset(D8SpillFI); } else - NumBytes = DPRCSOffset; + NumBytes = DPRCS1Offset; - if (GPRCS2Size > 0 && STI.splitFramePointerPush(MF)) { - GPRCS2Push = LastPush = MBBI++; - DefCFAOffsetCandidates.addInst(LastPush, GPRCS2Size); + // Move past split LR + R11 push + if (GPRCS3Size > 0) { + assert(STI.splitFramePointerPush(MF)); + GPRCS3Push = LastPush = MBBI++; + DefCFAOffsetCandidates.addInst(LastPush, GPRCS3Size); } bool NeedsWinCFIStackAlloc = NeedsWinCFI; @@ -1077,7 +1072,7 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, unsigned PushSize = sizeOfSPAdjustment(*GPRCS1Push); int FPOffset = PushSize + FramePtrOffsetInPush; if (STI.splitFramePointerPush(MF)) { - AfterPush = std::next(GPRCS2Push); + AfterPush = std::next(GPRCS3Push); emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, AfterPush, dl, TII, FramePtr, ARM::SP, 0, MachineInstr::FrameSetup); } else { @@ -1153,7 +1148,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, } } - if (GPRCS2Size > 0 && !NeedsWinCFI) { + if (GPRCS2Size > 0) { + assert(STI.splitFramePushPop(MF)); MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); for (const auto &Entry : CSI) { Register Reg = Entry.getReg(); @@ -1164,22 +1160,20 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, case ARM::R10: case ARM::R11: case ARM::R12: - if (STI.splitFramePushPop(MF)) { - unsigned DwarfReg = MRI->getDwarfRegNum( - Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true); - unsigned Offset = MFI.getObjectOffset(FI); - unsigned CFIIndex = MF.addFrameInst( - MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); - BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) - .addCFIIndex(CFIIndex) - .setMIFlags(MachineInstr::FrameSetup); - } + unsigned DwarfReg = MRI->getDwarfRegNum( + Reg == ARM::R12 ? ARM::RA_AUTH_CODE : Reg, true); + unsigned Offset = MFI.getObjectOffset(FI); + unsigned CFIIndex = MF.addFrameInst( + MCCFIInstruction::createOffset(nullptr, DwarfReg, Offset)); + BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) + .addCFIIndex(CFIIndex) + .setMIFlags(MachineInstr::FrameSetup); break; } } } - if (DPRCSSize > 0 && !NeedsWinCFI) { + if (DPRCS1Size > 0 && !NeedsWinCFI) { // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. MachineBasicBlock::iterator Pos = std::next(LastPush); @@ -1213,8 +1207,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF, AFI->setFPCXTSaveAreaSize(FPCXTSaveSize); AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); + AFI->setFPStatusSavesSize(FPStatusSize); AFI->setDPRCalleeSavedGapSize(DPRGapSize); - AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + AFI->setDPRCalleeSavedArea1Size(DPRCS1Size); + AFI->setGPRCalleeSavedArea3Size(GPRCS3Size); // If we need dynamic stack realignment, do it here. Be paranoid and make // sure if we also have VLAs, we have a base pointer for frame access. @@ -1335,8 +1331,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, AFI->getFPCXTSaveAreaSize() + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + + AFI->getFPStatusSavesSize() + AFI->getDPRCalleeSavedGapSize() + - AFI->getDPRCalleeSavedAreaSize()); + AFI->getDPRCalleeSavedArea1Size() + + AFI->getGPRCalleeSavedArea3Size()); // Reset SP based on frame pointer only if the stack frame extends beyond // frame pointer stack slot or target is ELF and the function has FP. @@ -1384,10 +1382,12 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); // Increment past our save areas. - if (AFI->getGPRCalleeSavedArea2Size() && STI.splitFramePointerPush(MF)) + if (AFI->getGPRCalleeSavedArea3Size()) { + assert(STI.splitFramePointerPush(MF)); MBBI++; + } - if (MBBI != MBB.end() && AFI->getDPRCalleeSavedAreaSize()) { + if (MBBI != MBB.end() && AFI->getDPRCalleeSavedArea1Size()) { MBBI++; // Since vpop register list cannot have gaps, there may be multiple vpop // instructions in the epilogue. @@ -1401,8 +1401,10 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, MachineInstr::FrameDestroy); } - if (AFI->getGPRCalleeSavedArea2Size() && !STI.splitFramePointerPush(MF)) + if (AFI->getGPRCalleeSavedArea2Size()) { + assert(STI.splitFramePushPop(MF)); MBBI++; + } if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; if (ReservedArgStack || IncomingArgStackToRestore) { @@ -1710,6 +1712,108 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, } } +void ARMFrameLowering::emitFPStatusSaves(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + ArrayRef CSI, + unsigned PushOpc) const { + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + + SmallVector Regs; + auto RegPresent = [&CSI](Register Reg) { + return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) { + return C.getReg() == Reg; + }); + }; + + // If we need to save FPSCR, then we must move FPSCR into R4 with the VMRS + // instruction. + if (RegPresent(ARM::FPSCR)) { + BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS), ARM::R4) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); + + Regs.push_back(ARM::R4); + } + + // If we need to save FPEXC, then we must move FPEXC into R5 with the + // VMRS_FPEXC instruction. + if (RegPresent(ARM::FPEXC)) { + BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::VMRS_FPEXC), ARM::R5) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); + + Regs.push_back(ARM::R5); + } + + // If neither FPSCR and FPEXC are present, then do nothing. + if (Regs.size() == 0) + return; + + // Push both R4 and R5 onto the stack, if present. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DebugLoc(), TII.get(PushOpc), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameSetup); + + for (Register Reg : Regs) { + MIB.addReg(Reg); + } +} + +void ARMFrameLowering::emitFPStatusRestores( + MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + MutableArrayRef CSI, unsigned LdmOpc) const { + MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo(); + + SmallVector Regs; + auto RegPresent = [&CSI](Register Reg) { + return llvm::any_of(CSI, [Reg](const CalleeSavedInfo &C) { + return C.getReg() == Reg; + }); + }; + + // Do nothing if we don't need to restore any FP status registers. + if (!RegPresent(ARM::FPSCR) && !RegPresent(ARM::FPEXC)) + return; + + // Pop registers off of the stack. + MachineInstrBuilder MIB = + BuildMI(MBB, MI, DebugLoc(), TII.get(LdmOpc), ARM::SP) + .addReg(ARM::SP) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameDestroy); + + // If FPSCR was saved, it will be popped into R4. + if (RegPresent(ARM::FPSCR)) { + MIB.addReg(ARM::R4, RegState::Define); + } + + // If FPEXC was saved, it will be popped into R5. + if (RegPresent(ARM::FPEXC)) { + MIB.addReg(ARM::R5, RegState::Define); + } + + // Move the FPSCR value back into the register with the VMSR instruction. + if (RegPresent(ARM::FPSCR)) { + BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR)) + .addReg(ARM::R4) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameDestroy); + } + + // Move the FPEXC value back into the register with the VMSR_FPEXC + // instruction. + if (RegPresent(ARM::FPEXC)) { + BuildMI(MBB, MI, DebugLoc(), STI.getInstrInfo()->get(ARM::VMSR_FPEXC)) + .addReg(ARM::R5) + .add(predOps(ARMCC::AL)) + .setMIFlags(MachineInstr::FrameDestroy); + } +} + /// Emit aligned spill instructions for NumAlignedDPRCS2Regs D-registers /// starting from d8. Also insert stack realignment code and leave the stack /// pointer pointing to the d8 spill slot. @@ -2005,6 +2109,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters( .addImm(-4) .add(predOps(ARMCC::AL)); } + if (STI.splitFramePointerPush(MF)) { emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isSplitFPArea1Register, 0, MachineInstr::FrameSetup); @@ -2017,6 +2122,7 @@ bool ARMFrameLowering::spillCalleeSavedRegisters( 0, MachineInstr::FrameSetup); emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register, 0, MachineInstr::FrameSetup); + emitFPStatusSaves(MBB, MI, CSI, PushOpc); emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register, NumAlignedDPRCS2Regs, MachineInstr::FrameSetup); } @@ -2060,6 +2166,7 @@ bool ARMFrameLowering::restoreCalleeSavedRegisters( } else { emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register, NumAlignedDPRCS2Regs); + emitFPStatusRestores(MBB, MI, CSI, PopOpc); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, &isARMArea2Register, 0); emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, @@ -2278,6 +2385,7 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, unsigned NumFPRSpills = 0; SmallVector UnspilledCS1GPRs; SmallVector UnspilledCS2GPRs; + const Function &F = MF.getFunction(); const ARMBaseRegisterInfo *RegInfo = static_cast( MF.getSubtarget().getRegisterInfo()); const ARMBaseInstrInfo &TII = @@ -2289,6 +2397,21 @@ void ARMFrameLowering::determineCalleeSaves(MachineFunction &MF, (void)TRI; // Silence unused warning in non-assert builds. Register FramePtr = RegInfo->getFrameRegister(MF); + // For a floating point interrupt, save these registers always, since LLVM + // currently doesn't model reads/writes to these registers. + if (F.hasFnAttribute("interrupt") && F.hasFnAttribute("save-fp")) { + SavedRegs.set(ARM::FPSCR); + SavedRegs.set(ARM::R4); + + // This register will only be present on non-MClass registers. + if (STI.isMClass()) { + SavedRegs.reset(ARM::FPEXC); + } else { + SavedRegs.set(ARM::FPEXC); + SavedRegs.set(ARM::R5); + } + } + // Spill R4 if Thumb2 function requires stack realignment - it will be used as // scratch register. Also spill R4 if Thumb2 function has varsized objects, // since it's not always possible to restore sp from fp in a single diff --git a/llvm/lib/Target/ARM/ARMFrameLowering.h b/llvm/lib/Target/ARM/ARMFrameLowering.h index 3c7358d8cd53e..7a67effe21461 100644 --- a/llvm/lib/Target/ARM/ARMFrameLowering.h +++ b/llvm/lib/Target/ARM/ARMFrameLowering.h @@ -97,6 +97,15 @@ class ARMFrameLowering : public TargetFrameLowering { bool (*Func)(unsigned, bool), unsigned NumAlignedDPRCS2Regs) const; + void emitFPStatusSaves(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, + ArrayRef CSI, + unsigned PushOneOpc) const; + + void emitFPStatusRestores(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + MutableArrayRef CSI, + unsigned LdrOpc) const; + MachineBasicBlock::iterator eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td index 3094a4db2b4d1..4837eb57c54f5 100644 --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -2488,6 +2488,9 @@ class MovFromVFP opc19_16, dag oops, dag iops, string opc, string asm, let Inst{3-0} = 0b0000; let Unpredictable{7-5} = 0b111; let Unpredictable{3-0} = 0b1111; + + // Needed to avoid errors when a MachineInstrt::FrameSetup flag is set. + let mayStore = 0; } let DecoderMethod = "DecodeForVMRSandVMSR" in { diff --git a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h index b9ff3a08f998f..25a78d6c5382c 100644 --- a/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h +++ b/llvm/lib/Target/ARM/ARMMachineFunctionInfo.h @@ -89,8 +89,10 @@ class ARMFunctionInfo : public MachineFunctionInfo { unsigned FRSaveSize = 0; unsigned GPRCS1Size = 0; unsigned GPRCS2Size = 0; + unsigned FPStatusSize = 0; unsigned DPRCSAlignGapSize = 0; - unsigned DPRCSSize = 0; + unsigned DPRCS1Size = 0; + unsigned GPRCS3Size = 0; /// NumAlignedDPRCS2Regs - The number of callee-saved DPRs that are saved in /// the aligned portion of the stack frame. This is always a contiguous @@ -204,15 +206,19 @@ class ARMFunctionInfo : public MachineFunctionInfo { unsigned getFrameRecordSavedAreaSize() const { return FRSaveSize; } unsigned getGPRCalleeSavedArea1Size() const { return GPRCS1Size; } unsigned getGPRCalleeSavedArea2Size() const { return GPRCS2Size; } + unsigned getFPStatusSavesSize() const { return FPStatusSize; } unsigned getDPRCalleeSavedGapSize() const { return DPRCSAlignGapSize; } - unsigned getDPRCalleeSavedAreaSize() const { return DPRCSSize; } + unsigned getDPRCalleeSavedArea1Size() const { return DPRCS1Size; } + unsigned getGPRCalleeSavedArea3Size() const { return GPRCS3Size; } void setFPCXTSaveAreaSize(unsigned s) { FPCXTSaveSize = s; } void setFrameRecordSavedAreaSize(unsigned s) { FRSaveSize = s; } void setGPRCalleeSavedArea1Size(unsigned s) { GPRCS1Size = s; } void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; } + void setFPStatusSavesSize(unsigned s) { FPStatusSize = s; } void setDPRCalleeSavedGapSize(unsigned s) { DPRCSAlignGapSize = s; } - void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; } + void setDPRCalleeSavedArea1Size(unsigned s) { DPRCS1Size = s; } + void setGPRCalleeSavedArea3Size(unsigned s) { GPRCS3Size = s; } unsigned getArgumentStackSize() const { return ArgumentStackSize; } void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; } diff --git a/llvm/lib/Target/ARM/ARMRegisterInfo.td b/llvm/lib/Target/ARM/ARMRegisterInfo.td index 212f22651f9f9..e35790844bb6e 100644 --- a/llvm/lib/Target/ARM/ARMRegisterInfo.td +++ b/llvm/lib/Target/ARM/ARMRegisterInfo.td @@ -415,6 +415,13 @@ def VCCR : RegisterClass<"ARM", [i32, v16i1, v8i1, v4i1, v2i1], 32, (add VPR)> { // output to an instruction such as MVE VADC. def cl_FPSCR_NZCV : RegisterClass<"ARM", [i32], 32, (add FPSCR_NZCV)>; +// This RegisterClass is required to add FPSCR and FPEXC into a calling +// convention. +def FP_STATUS_REGS : RegisterClass<"ARM", [i32], 32, (add FPSCR, FPEXC)> { + let CopyCost = -1; // Don't allow copying of status registers. + let isAllocatable = 0; +} + // Scalar single precision floating point register class.. // FIXME: Allocation order changed to s0, s2, ... or s0, s4, ... as a quick hack // to avoid partial-write dependencies on D or Q (depending on platform) diff --git a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp index e908f1fb95124..f2d01547c6b76 100644 --- a/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/llvm/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -445,7 +445,7 @@ void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); - AFI->setDPRCalleeSavedAreaSize(DPRCSSize); + AFI->setDPRCalleeSavedArea1Size(DPRCSSize); if (RegInfo->hasStackRealignment(MF)) { const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); @@ -534,7 +534,7 @@ void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, NumBytes -= (AFI->getFrameRecordSavedAreaSize() + AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + - AFI->getDPRCalleeSavedAreaSize() + + AFI->getDPRCalleeSavedArea1Size() + ArgRegsSaveSize); // We are likely to need a scratch register and we know all callee-save diff --git a/llvm/test/CodeGen/ARM/interrupt-save-fp-attr-status-regs.mir b/llvm/test/CodeGen/ARM/interrupt-save-fp-attr-status-regs.mir new file mode 100644 index 0000000000000..e94790ab71411 --- /dev/null +++ b/llvm/test/CodeGen/ARM/interrupt-save-fp-attr-status-regs.mir @@ -0,0 +1,280 @@ +# RUN: llc -mtriple=armv7-ti-none-eabihf -mcpu=cortex-r5 -o - %s -run-pass=prologepilog \ +# RUN: | FileCheck --check-prefix=CHECK-R-ARM %s +# RUN: llc -mtriple=armv7-ti-none-eabihf -mcpu=cortex-r4 -o - %s -run-pass=prologepilog \ +# RUN: | FileCheck --check-prefix=CHECK-R-ARM %s +# RUN: llc -mtriple=thumbv7-ti-none-eabihf -mcpu=cortex-r5 -o - %s -run-pass=prologepilog \ +# RUN: | FileCheck --check-prefix=CHECK-R-THUMB %s +# RUN: llc -mtriple=thumbv7-ti-none-eabihf -mcpu=cortex-r4 -o - %s -run-pass=prologepilog \ +# RUN: | FileCheck --check-prefix=CHECK-R-THUMB %s +# RUN: llc -mtriple=thumbv7-ti-none-eabihf -mcpu=cortex-m3 -o - %s -run-pass=prologepilog \ +# RUN: | FileCheck --check-prefix=CHECK-M-THUMB %s +# RUN: llc -mtriple=thumbv7-ti-none-eabihf -mcpu=cortex-m4 -o - %s -run-pass=prologepilog \ +# RUN: | FileCheck --check-prefix=CHECK-M-THUMB %s +# RUN: llc -mtriple=thumbv8-ti-none-eabihf -mcpu=cortex-m33 -o - %s -run-pass=prologepilog \ +# RUN: | FileCheck --check-prefix=CHECK-M-THUMB %s + +# ============================================================================= +# ============================ cortex-r arm-mode ============================== +# ============================================================================= +# This IRQ will save 112 bytes: +# +# |---------+------+----------| +# | reg | size | zone | +# |---------+------+----------| +# | LR | 4x1 | GPR | +# | R12-R10 | 4x3 | | +# | R5-R0 | 4x6 | | +# |---------+------+----------| +# | FPEXC | 4x1 | FPStatus | +# | FPSCR | 4x1 | | +# |---------+------+----------| +# | D7-D0 | 8x8 | FPRegs | +# |---------+------+----------| +# | | 112 | | +# |---------+------+----------| +# +# ================================= Prologue ================================= +# +# Frame pointer (r11) will be store at $original_sp - 12, but we can't save the +# FP until after we save the GPR zone of registers. The GPR zone of registers +# moves the stack by 40 bytes. So $original_sp = $current_sp + 40. Thus, +# $current_sp + 40 - 12 = $current_sp + 28. Thus, we see the instruction: +# +# $r11 = ADDri $sp, 28 +# +# We don't have dwarf information for the FPEXC and FPSCR registers, so there's +# no CFI_INSTRUCTION for those saves. So, we should see an 8 byte disparity in +# the register offsets. $r0 is -40, and $d7 is -56. +# +# (-40) - (-56) = 16. +# 16 = 8 (bytes from $d7) + 8 (bytes from FPSCR + FPEXC) +# +# There's an extra BFC to force the stack to be aligned. +# +# $sp = BFC $sp, 4294967288 /* ~0x7 */ +# +# ================================= Epilogue ================================= +# +# We use the frame pointer to restore the SP. Since $r11 is currently pointing +# to the previous $r11's stack position (aka base_of_stack - 12), and we +# allocated 112 bytes, $sp = $r11 - (112 - 12), or $sp = $r11 - 100, which is +# why we see this instruction: +# +# $sp = SUBri $r11, 92 + +# CHECK-R-ARM-LABEL: name: irq_fn +# CHECK-R-ARM-LABEL: bb.0 (%ir-block.0): +# CHECK-R-ARM: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r0, killed $r1, killed $r2, killed $r3, killed $r4, killed $r5, killed $r10, killed $r11, killed $r12, killed $lr +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 40 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r12, -8 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r11, -12 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r10, -16 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r5, -20 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -24 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r3, -28 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r2, -32 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r1, -36 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $r0, -40 +# CHECK-R-ARM-NEXT: $r11 = frame-setup ADDri killed $sp, 28, 14 /* CC::al */, $noreg, $noreg +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION def_cfa $r11, 12 +# CHECK-R-ARM-NEXT: $r4 = frame-setup VMRS 14 /* CC::al */, $noreg, implicit $fpscr +# CHECK-R-ARM-NEXT: $r5 = frame-setup VMRS_FPEXC 14 /* CC::al */, $noreg, implicit $fpscr +# CHECK-R-ARM-NEXT: $sp = frame-setup STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5 +# CHECK-R-ARM-NEXT: $sp = frame-setup VSTMDDB_UPD $sp, 14 /* CC::al */, $noreg, killed $d0, killed $d1, killed $d2, killed $d3, killed $d4, killed $d5, killed $d6, killed $d7 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $d7, -56 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $d6, -64 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $d5, -72 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $d4, -80 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $d3, -88 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $d2, -96 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $d1, -104 +# CHECK-R-ARM-NEXT: frame-setup CFI_INSTRUCTION offset $d0, -112 +# CHECK-R-ARM-NEXT: $sp = BFC killed $sp, 4294967288, 14 /* CC::al */, $noreg +# CHECK-R-ARM-NEXT: BL @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp +# CHECK-R-ARM-NEXT: $sp = frame-destroy SUBri killed $r11, 100, 14 /* CC::al */, $noreg, $noreg +# CHECK-R-ARM-NEXT: $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d0, def $d1, def $d2, def $d3, def $d4, def $d5, def $d6, def $d7 +# CHECK-R-ARM-NEXT: $sp = frame-destroy LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5 +# CHECK-R-ARM-NEXT: frame-destroy VMSR $r4, 14 /* CC::al */, $noreg, implicit-def $fpscr +# CHECK-R-ARM-NEXT: frame-destroy VMSR_FPEXC $r5, 14 /* CC::al */, $noreg, implicit-def $fpscr + +# ============================================================================= +# =========================== cortex-r thumb-mode ============================= +# ============================================================================= +# This IRQ will save 112 bytes: +# +# |-------+------+----------| +# | reg | size | zone | +# |-------+------+----------| +# | LR | 4x1 | GPR | +# | R12 | 4x1 | | +# | R7-R0 | 4x8 | | +# |-------+------+----------| +# | FPEXC | 4x1 | FPStatus | +# | FPSCR | 4x1 | | +# |-------+------+----------| +# | D7-D0 | 8x8 | FPRegs | +# |-------+------+----------| +# | | 112 | | +# |-------+------+----------| +# +# ================================= Prologue ================================= +# +# Frame pointer (r7) will be store at $original_sp - 12, but we can't save the +# FP until after we save the GPR zone of registers. The GPR zone of registers +# moves the stack by 40 bytes. So $original_sp = $current_sp + 40. Thus, +# $current_sp + 40 - 12 = $current_sp + 28. Thus, we see the instruction: +# +# $r7 = t2ADDri $sp, 28 +# +# We don't have dwarf information for the FPEXC and FPSCR registers, so there's +# no CFI_INSTRUCTION for those saves. So, we should see an 8 byte disparity in +# the register offsets. $r0 is -40, and $d7 is -56. +# +# (-40) - (-56) = 16. +# 16 = 8 (bytes from $d7) + 8 (bytes from FPSCR + FPEXC) +# +# There's an extra BFC to force the stack to be aligned. This is done in 3 +# steps, because the value of $sp needs to be moved into a low register +# (r0-r7), and then operated on, and then moved back. +# +# $r4 = tMOVr $sp, 14 +# $r4 = t2BFC $r4, 4294967288 /* ~0x7 */, 14 +# $sp = tMOVr $r4, 14 +# +# ================================= Epilogue ================================= +# +# We use the frame pointer to restore the SP. Since $r7 is currently pointing +# to the previous $r7's stack position (aka base_of_stack - 12), and we +# allocated 112 bytes, $sp = $r7 - (112 - 12), or $sp = $r7 - 100, which is +# why we see this instruction: +# +# $r4 = t2SUBri $r7, 100 +# $sp = tMOVr $r4 + +# CHECK-R-THUMB-LABEL: name: irq_fn +# CHECK-R-THUMB-LABEL: bb.0 (%ir-block.0): +# CHECK-R-THUMB: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r0, killed $r1, killed $r2, killed $r3, killed $r4, killed $r5, killed $r6, killed $r7, killed $r12, killed $lr +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 40 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r12, -8 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -12 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -16 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r5, -20 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -24 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r3, -28 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r2, -32 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r1, -36 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r0, -40 +# CHECK-R-THUMB-NEXT: $r7 = frame-setup t2ADDri killed $sp, 28, 14 /* CC::al */, $noreg, $noreg +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa $r7, 12 +# CHECK-R-THUMB-NEXT: $r4 = frame-setup VMRS 14 /* CC::al */, $noreg, implicit $fpscr +# CHECK-R-THUMB-NEXT: $r5 = frame-setup VMRS_FPEXC 14 /* CC::al */, $noreg, implicit $fpscr +# CHECK-R-THUMB-NEXT: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4, $r5 +# CHECK-R-THUMB-NEXT: $sp = frame-setup VSTMDDB_UPD $sp, 14 /* CC::al */, $noreg, killed $d0, killed $d1, killed $d2, killed $d3, killed $d4, killed $d5, killed $d6, killed $d7 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d7, -56 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d6, -64 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d5, -72 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d4, -80 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d3, -88 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d2, -96 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d1, -104 +# CHECK-R-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d0, -112 +# CHECK-R-THUMB-NEXT: $r4 = tMOVr killed $sp, 14 /* CC::al */, $noreg +# CHECK-R-THUMB-NEXT: $r4 = t2BFC killed $r4, 4294967288, 14 /* CC::al */, $noreg +# CHECK-R-THUMB-NEXT: $sp = tMOVr killed $r4, 14 /* CC::al */, $noreg +# CHECK-R-THUMB-NEXT: BL @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp +# CHECK-R-THUMB-NEXT: $r4 = frame-destroy t2SUBri killed $r7, 100, 14 /* CC::al */, $noreg, $noreg +# CHECK-R-THUMB-NEXT: $sp = frame-destroy tMOVr $r4, 14 /* CC::al */, $noreg +# CHECK-R-THUMB-NEXT: $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d0, def $d1, def $d2, def $d3, def $d4, def $d5, def $d6, def $d7 +# CHECK-R-THUMB-NEXT: $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r5 +# CHECK-R-THUMB-NEXT: frame-destroy VMSR $r4, 14 /* CC::al */, $noreg, implicit-def $fpscr +# CHECK-R-THUMB-NEXT: frame-destroy VMSR_FPEXC $r5, 14 /* CC::al */, $noreg, implicit-def $fpscr +# CHECK-R-THUMB-NEXT: $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r0, def $r1, def $r2, def $r3, def $r4, def $r5, def $r6, def $r7, def $r12, def $lr +# CHECK-R-THUMB-NEXT: SUBS_PC_LR 4, 14 /* CC::al */, $noreg + +# ============================================================================= +# ============================== cortex-m thumb =============================== +# ============================================================================= +# This IRQ will save 88 bytes: +# +# |---------+------+----------| +# | reg | size | zone | +# |---------+------+----------| +# | LR | 4x1 | GPR | +# | R7-R6 | 4x2 | | +# | R4 | 4x1 | | +# |---------+------+----------| +# | FPSCR | 4x1 | FPStatus | +# |---------+------+----------| +# | EMPTY | 4x1 | Align | +# |---------+------+----------| +# | D7-D0 | 8x8 | FPRegs | +# |---------+------+----------| +# | | 88 | | +# |---------+------+----------| + +# CHECK-M-THUMB-LABEL: name: irq_fn +# CHECK-M-THUMB-LABEL: bb.0 (%ir-block.0): +# CHECK-M-THUMB: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r4, killed $r6, killed $r7, killed $lr +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $lr, -4 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r7, -8 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r6, -12 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $r4, -16 +# CHECK-M-THUMB-NEXT: $r7 = frame-setup t2ADDri killed $sp, 8, 14 /* CC::al */, $noreg, $noreg +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION def_cfa $r7, 8 +# CHECK-M-THUMB-NEXT: $r4 = frame-setup VMRS 14 /* CC::al */, $noreg, implicit $fpscr +# CHECK-M-THUMB-NEXT: $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, $r4 +# CHECK-M-THUMB-NEXT: $sp = frame-setup tSUBspi $sp, 1, 14 /* CC::al */, $noreg +# CHECK-M-THUMB-NEXT: $sp = frame-setup VSTMDDB_UPD $sp, 14 /* CC::al */, $noreg, killed $d0, killed $d1, killed $d2, killed $d3, killed $d4, killed $d5, killed $d6, killed $d7 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d7, -32 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d6, -40 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d5, -48 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d4, -56 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d3, -64 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d2, -72 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d1, -80 +# CHECK-M-THUMB-NEXT: frame-setup CFI_INSTRUCTION offset $d0, -88 +# CHECK-M-THUMB-NEXT: $r4 = tMOVr killed $sp, 14 /* CC::al */, $noreg +# CHECK-M-THUMB-NEXT: $r4 = t2BFC killed $r4, 4294967288, 14 /* CC::al */, $noreg +# CHECK-M-THUMB-NEXT: $sp = tMOVr killed $r4, 14 /* CC::al */, $noreg +# CHECK-M-THUMB-NEXT: BL @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp +# CHECK-M-THUMB-NEXT: $r4 = frame-destroy t2SUBri killed $r7, 80, 14 /* CC::al */, $noreg, $noreg +# CHECK-M-THUMB-NEXT: $sp = frame-destroy tMOVr $r4, 14 /* CC::al */, $noreg +# CHECK-M-THUMB-NEXT: $sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d0, def $d1, def $d2, def $d3, def $d4, def $d5, def $d6, def $d7 +# CHECK-M-THUMB-NEXT: $sp = frame-destroy tADDspi $sp, 1, 14 /* CC::al */, $noreg +# CHECK-M-THUMB-NEXT: $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4 +# CHECK-M-THUMB-NEXT: frame-destroy VMSR $r4, 14 /* CC::al */, $noreg, implicit-def $fpscr +# CHECK-M-THUMB-NEXT: $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r4, def $r6, def $r7, def $lr +# CHECK-M-THUMB-NEXT: SUBS_PC_LR 4, 14 /* CC::al */, $noreg + +--- | + ; ModuleID = '/scratch/benson/tools2/llvm_cgt/llvm-project/llvm/test/CodeGen/ARM/fp-attr-fpscr.ll' + source_filename = "/scratch/benson/tools2/llvm_cgt/llvm-project/llvm/test/CodeGen/ARM/fp-attr-fpscr.ll" + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + + declare arm_aapcscc void @bar() + + ; Function Attrs: alignstack(8) + define arm_aapcscc void @irq_fn() #1 { + call arm_aapcscc void @bar() + ret void + } + + attributes #1 = { alignstack=8 "interrupt"="IRQ" "save-fp" } + +... +--- +name: irq_fn +alignment: 16 +frameInfo: + adjustsStack: true + hasCalls: true +body: | + bb.0 (%ir-block.0): + ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp + BL @bar, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, -1, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp + SUBS_PC_LR 4, 14 /* CC::al */, $noreg +... diff --git a/llvm/test/CodeGen/ARM/interrupt-save-fp-attr.ll b/llvm/test/CodeGen/ARM/interrupt-save-fp-attr.ll new file mode 100644 index 0000000000000..e96dfa4a38e12 --- /dev/null +++ b/llvm/test/CodeGen/ARM/interrupt-save-fp-attr.ll @@ -0,0 +1,303 @@ +; RUN: llc -mtriple=arm-ti-none-eabihf -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A %s +; RUN: llc -mtriple=thumb-ti-none-eabihf -mcpu=cortex-a15 -o - %s | FileCheck --check-prefix=CHECK-A-THUMB %s +; RUN: llc -mtriple=thumb-ti-none-eabihf -mcpu=cortex-m4 -o - %s | FileCheck --check-prefix=CHECK-M %s +; RUN: llc -mtriple=thumbv7em-ti-none-eabihf -mcpu=cortex-m4 -o - %s | FileCheck --check-prefix=CHECK-M %s +; RUN: llc -mtriple=thumbv7r5-ti-none-eabihf -mcpu=cortex-r5 -o - %s | FileCheck --check-prefix=CHECK-R-THUMB %s +; RUN: llc -mtriple=armv7r5-ti-none-eabihf -mcpu=cortex-r5 -o - %s | FileCheck --check-prefix=CHECK-R %s + +declare arm_aapcscc void @bar() + +@bigvar = global [16 x i32] zeroinitializer + +define arm_aapcscc void @irq_fn() alignstack(8) "interrupt"="IRQ" "save-fp"{ + ; Must save all registers except banked sp and lr (we save lr anyway because + ; we actually need it at the end to execute the return ourselves). + + ; Also need special function return setting pc and CPSR simultaneously. + ; CHECK-A-LABEL: irq_fn: + ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-A: add r11, sp, #28 + ; CHECK-A: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; [...] + ; CHECK-A: sub sp, r11, #228 + ; CHECK-A: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; CHECK-A: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-A-LABEL: .Lfunc_end0 + + ; CHECK-A-THUMB-LABEL: irq_fn: + ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-A-THUMB: add r7, sp, #28 + ; CHECK-A-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A-THUMB: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; [...] + ; CHECK-A-THUMB: sub.w r4, r7, #228 + ; CHECK-A-THUMB: mov sp, r4 + ; CHECK-A-THUMB: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; CHECK-A-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-A-THUMB-LABEL: .Lfunc_end0 + + ; CHECK-R-LABEL: irq_fn: + ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-R: add r11, sp, #28 + ; CHECK-R: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-R: sub sp, r11, #100 + ; CHECK-R: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-R-LABEL: .Lfunc_end0 + + ; CHECK-R-THUMB-LABEL: irq_fn: + ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-R-THUMB: add r7, sp, #28 + ; CHECK-R-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-R-THUMB: sub.w r4, r7, #100 + ; CHECK-R-THUMB: mov sp, r4 + ; CHECK-R-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-R-THUMB-LABEL: .Lfunc_end0 + + ; Normal AAPCS function (r0-r3 pushed onto stack by hardware, lr set to + ; appropriate sentinel so no special return needed). + ; CHECK-M-LABEL: irq_fn: + ; CHECK-M: push {r4, r6, r7, lr} + ; CHECK-M: add r7, sp, #8 + ; CHECK-M: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-M: sub.w r4, r7, #80 + ; CHECK-M: mov sp, r4 + ; CHECK-M: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-M: pop {r4, r6, r7, pc} + ; CHECK-M-LABEL: .Lfunc_end0 + + call arm_aapcscc void @bar() + ret void +} + +; We don't push/pop r12, as it is banked for FIQ +define arm_aapcscc void @fiq_fn() alignstack(8) "interrupt"="FIQ" "save-fp" { + ; CHECK-A-LABEL: fiq_fn: + ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; CHECK-A: add r11, sp, #32 + ; [...] + ; CHECK-A: sub sp, r11, #40 + ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; CHECK-A-LABEL: .Lfunc_end1 + + ; CHECK-A-THUMB-LABEL: fiq_fn: + ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; CHECK-A-THUMB: add r7, sp, #28 + ; [...] + ; CHECK-A-THUMB: sub.w r4, r7, #36 + ; CHECK-A-THUMB: mov sp, r4 + ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; CHECK-A-THUMB-LABEL: .Lfunc_end1 + + ; CHECK-R-LABEL: fiq_fn: + ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; CHECK-R: add r11, sp, #32 + ; [...] + ; CHECK-R: sub sp, r11, #40 + ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; CHECK-R-LABEL: .Lfunc_end1 + + ; CHECK-R-THUMB-LABEL: fiq_fn: + ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; CHECK-R-THUMB: add r7, sp, #28 + ; [...] + ; CHECK-R-THUMB: sub.w r4, r7, #36 + ; CHECK-R-THUMB: mov sp, r4 + ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r11, lr} + ; CHECK-R-THUMB-LABEL: .Lfunc_end1 + + ; CHECK-M-LABEL: fiq_fn: + ; CHECK-M: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ; CHECK-M: add r7, sp, #12 + ; [...] + ; CHECK-M: sub.w r4, r7, #16 + ; CHECK-M: mov sp, r4 + ; CHECK-M: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} + ; CHECK-M-LABEL: .Lfunc_end1 + + %val = load volatile [16 x i32], [16 x i32]* @bigvar + store volatile [16 x i32] %val, [16 x i32]* @bigvar + ret void +} + +define arm_aapcscc void @swi_fn() alignstack(8) "interrupt"="SWI" "save-fp" { + ; CHECK-A-LABEL: swi_fn: + ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + ; CHECK-A: add r11, sp, #44 + ; [...] + ; CHECK-A: sub sp, r11, #52 + ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + ; CHECK-A-LABEL: .Lfunc_end2 + + ; CHECK-A-THUMB-LABEL: swi_fn: + ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + ; CHECK-A-THUMB: add r7, sp, #28 + ; [...] + ; CHECK-A-THUMB: sub.w r4, r7, #36 + ; CHECK-A-THUMB: mov sp, r4 + ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + ; CHECK-A-THUMB-LABEL: .Lfunc_end2 + + ; CHECK-R-LABEL: swi_fn: + ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + ; CHECK-R: add r11, sp, #44 + ; [...] + ; CHECK-R: sub sp, r11, #52 + ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + ; CHECK-R-LABEL: .Lfunc_end2 + + ; CHECK-R-THUMB-LABEL: swi_fn: + ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + ; CHECK-R-THUMB: add r7, sp, #28 + ; [...] + ; CHECK-R-THUMB: sub.w r4, r7, #36 + ; CHECK-R-THUMB: mov sp, r4 + ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, lr} + ; CHECK-R-THUMB-LABEL: .Lfunc_end2 + + ; CHECK-M-LABEL: swi_fn: + ; CHECK-M: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} + ; CHECK-M: add r7, sp, #12 + ; [...] + ; CHECK-M: sub.w r4, r7, #16 + ; CHECK-M: mov sp, r4 + ; CHECK-M: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} + ; CHECK-M-LABEL: .Lfunc_end2 + + %val = load volatile [16 x i32], [16 x i32]* @bigvar + store volatile [16 x i32] %val, [16 x i32]* @bigvar + ret void +} + +define arm_aapcscc void @undef_fn() alignstack(8) "interrupt"="UNDEF" "save-fp" { + ; CHECK-A-LABEL: undef_fn: + ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-A: add r11, sp, #28 + ; CHECK-A: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; [...] + ; CHECK-A: sub sp, r11, #228 + ; CHECK-A: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; CHECK-A: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-A-LABEL: .Lfunc_end3 + + ; CHECK-A-THUMB-LABEL: undef_fn: + ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-A-THUMB: add r7, sp, #28 + ; CHECK-A-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A-THUMB: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; [...] + ; CHECK-A-THUMB: sub.w r4, r7, #228 + ; CHECK-A-THUMB: mov sp, r4 + ; CHECK-A-THUMB: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; CHECK-A-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-A-THUMB-LABEL: .Lfunc_end3 + + ; CHECK-R-LABEL: undef_fn: + ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-R: add r11, sp, #28 + ; CHECK-R: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-R: sub sp, r11, #100 + ; CHECK-R: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-R-LABEL: .Lfunc_end3 + + ; CHECK-R-THUMB-LABEL: undef_fn: + ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-R-THUMB: add r7, sp, #28 + ; CHECK-R-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-R-THUMB: sub.w r4, r7, #100 + ; CHECK-R-THUMB: mov sp, r4 + ; CHECK-R-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-R-THUMB-LABEL: .Lfunc_end3 + + ; CHECK-M-LABEL: undef_fn: + ; CHECK-M: push {r4, r6, r7, lr} + ; CHECK-M: add r7, sp, #8 + ; CHECK-M: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-M: sub.w r4, r7, #80 + ; CHECK-M: mov sp, r4 + ; CHECK-M: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-M: pop {r4, r6, r7, pc} + ; CHECK-M-LABEL: .Lfunc_end3 + + call void @bar() + ret void +} + +define arm_aapcscc void @abort_fn() alignstack(8) "interrupt"="ABORT" "save-fp" { + ; CHECK-A-LABEL: abort_fn: + ; CHECK-A: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-A: add r11, sp, #28 + ; CHECK-A: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; [...] + ; CHECK-A: sub sp, r11, #228 + ; CHECK-A: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; CHECK-A: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-A-LABEL: .Lfunc_end4 + + ; CHECK-A-THUMB-LABEL: abort_fn: + ; CHECK-A-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-A-THUMB: add r7, sp, #28 + ; CHECK-A-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A-THUMB: vpush {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; [...] + ; CHECK-A-THUMB: sub.w r4, r7, #228 + ; CHECK-A-THUMB: mov sp, r4 + ; CHECK-A-THUMB: vpop {d16, d17, d18, d19, d20, d21, d22, d23, d24, d25, d26, d27, d28, d29, d30, d31} + ; CHECK-A-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-A-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-A-THUMB-LABEL: .Lfunc_end4 + + ; CHECK-R-LABEL: abort_fn: + ; CHECK-R: push {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-R: add r11, sp, #28 + ; CHECK-R: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-R: sub sp, r11, #100 + ; CHECK-R: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-R: pop {r0, r1, r2, r3, r4, r5, r10, r11, r12, lr} + ; CHECK-R-LABEL: .Lfunc_end4 + + ; CHECK-R-THUMB-LABEL: abort_fn: + ; CHECK-R-THUMB: push.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-R-THUMB: add r7, sp, #28 + ; CHECK-R-THUMB: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-R-THUMB: sub.w r4, r7, #100 + ; CHECK-R-THUMB: mov sp, r4 + ; CHECK-R-THUMB: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-R-THUMB: pop.w {r0, r1, r2, r3, r4, r5, r6, r7, r12, lr} + ; CHECK-R-THUMB-LABEL: .Lfunc_end4 + + ; CHECK-M-LABEL: abort_fn: + ; CHECK-M: push {r4, r6, r7, lr} + ; CHECK-M: add r7, sp, #8 + ; CHECK-M: vpush {d0, d1, d2, d3, d4, d5, d6, d7} + ; [...] + ; CHECK-M: sub.w r4, r7, #80 + ; CHECK-M: mov sp, r4 + ; CHECK-M: vpop {d0, d1, d2, d3, d4, d5, d6, d7} + ; CHECK-M: pop {r4, r6, r7, pc} + ; CHECK-M-LABEL: .Lfunc_end4 + + call void @bar() + ret void +} + +@var = global double 0.0