diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 7ff860beff3ab..da7f4310839bb 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -5032,8 +5032,6 @@ bool AArch64FastISel::selectAtomicCmpXchg(const AtomicCmpXchgInst *I) { } bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { - if (TLI.fallBackToDAGISel(*I)) - return false; switch (I->getOpcode()) { default: break; @@ -5116,10 +5114,5 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) { FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) { - - SMEAttrs CallerAttrs(*FuncInfo.Fn); - if (CallerAttrs.hasZAState() || - (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody())) - return nullptr; return new AArch64FastISel(FuncInfo, LibInfo); } diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ead3fd7f2cd3b..cc77a37f5998c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -22054,15 +22054,6 @@ bool AArch64TargetLowering::fallBackToDAGISel(const Instruction &Inst) const { return true; } - // Checks to allow the use of SME instructions - if (auto *Base = dyn_cast(&Inst)) { - auto CallerAttrs = SMEAttrs(*Inst.getFunction()); - auto CalleeAttrs = SMEAttrs(*Base); - if (CallerAttrs.requiresSMChange(CalleeAttrs, - /*BodyOverridesInterface=*/false) || - CallerAttrs.requiresLazySave(CalleeAttrs)) - return true; - } return false; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 0879826202ca0..972a0dcbf66ed 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -537,12 +537,6 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { LLVM_DEBUG(dbgs() << "Falling back to SDAG because we don't support no-NEON\n"); return true; } - - SMEAttrs Attrs(F); - if (Attrs.hasNewZAInterface() || - (!Attrs.hasStreamingInterface() && Attrs.hasStreamingBody())) - return true; - return false; } diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll deleted file mode 100644 index 3f83419e3fe15..0000000000000 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ /dev/null @@ -1,359 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -fast-isel=true -global-isel=false -fast-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-FISEL -; RUN: llc -fast-isel=false -global-isel=true -global-isel-abort=0 -mtriple=aarch64-linux-gnu -mattr=+sme < %s \ -; RUN: | FileCheck %s --check-prefixes=CHECK-COMMON,CHECK-GISEL - - -declare double @streaming_callee(double) "aarch64_pstate_sm_enabled" -declare double @normal_callee(double) - -define double @nonstreaming_caller_streaming_callee(double %x) nounwind noinline optnone { -; CHECK-FISEL-LABEL: nonstreaming_caller_streaming_callee: -; CHECK-FISEL: // %bb.0: // %entry -; CHECK-FISEL-NEXT: sub sp, sp, #96 -; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: smstart sm -; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: bl streaming_callee -; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: smstop sm -; CHECK-FISEL-NEXT: adrp x8, .LCPI0_0 -; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI0_0] -; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: fadd d0, d1, d0 -; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: add sp, sp, #96 -; CHECK-FISEL-NEXT: ret -; -; CHECK-GISEL-LABEL: nonstreaming_caller_streaming_callee: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: sub sp, sp, #96 -; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: smstart sm -; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: bl streaming_callee -; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: smstop sm -; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 -; CHECK-GISEL-NEXT: fmov d0, x8 -; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: fadd d0, d1, d0 -; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: add sp, sp, #96 -; CHECK-GISEL-NEXT: ret -entry: - %call = call double @streaming_callee(double %x) "aarch64_pstate_sm_enabled" - %add = fadd double %call, 4.200000e+01 - ret double %add -} - - -define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" { -; CHECK-FISEL-LABEL: streaming_caller_nonstreaming_callee: -; CHECK-FISEL: // %bb.0: // %entry -; CHECK-FISEL-NEXT: sub sp, sp, #96 -; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: smstop sm -; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: bl normal_callee -; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: smstart sm -; CHECK-FISEL-NEXT: adrp x8, .LCPI1_0 -; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI1_0] -; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: fadd d0, d1, d0 -; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: add sp, sp, #96 -; CHECK-FISEL-NEXT: ret -; -; CHECK-GISEL-LABEL: streaming_caller_nonstreaming_callee: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: sub sp, sp, #96 -; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: smstop sm -; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: bl normal_callee -; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: smstart sm -; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 -; CHECK-GISEL-NEXT: fmov d0, x8 -; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: fadd d0, d1, d0 -; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: add sp, sp, #96 -; CHECK-GISEL-NEXT: ret -entry: - %call = call double @normal_callee(double %x) - %add = fadd double %call, 4.200000e+01 - ret double %add -} - -define double @locally_streaming_caller_normal_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_body" { -; CHECK-COMMON-LABEL: locally_streaming_caller_normal_callee: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: sub sp, sp, #96 -; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr d0, [sp, #88] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: bl normal_callee -; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 -; CHECK-COMMON-NEXT: fmov d0, x8 -; CHECK-COMMON-NEXT: ldr d1, [sp, #8] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: fadd d0, d1, d0 -; CHECK-COMMON-NEXT: str d0, [sp] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr d0, [sp] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: add sp, sp, #96 -; CHECK-COMMON-NEXT: ret - %call = call double @normal_callee(double %x); - %add = fadd double %call, 4.200000e+01 - ret double %add; -} - -define double @normal_caller_to_locally_streaming_callee(double %x) nounwind noinline optnone { -; CHECK-COMMON-LABEL: normal_caller_to_locally_streaming_callee: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-COMMON-NEXT: bl locally_streaming_caller_normal_callee -; CHECK-COMMON-NEXT: adrp x8, .LCPI3_0 -; CHECK-COMMON-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] -; CHECK-COMMON-NEXT: fadd d0, d0, d1 -; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-COMMON-NEXT: ret - %call = call double @locally_streaming_caller_normal_callee(double %x) "aarch64_pstate_sm_body"; - %add = fadd double %call, 4.200000e+01 - ret double %add; -} - -; Check attribute in the call itself - -define void @locally_streaming_caller_streaming_callee_ptr(ptr %p) nounwind noinline optnone "aarch64_pstate_sm_body" { -; CHECK-COMMON-LABEL: locally_streaming_caller_streaming_callee_ptr: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: blr x0 -; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ret - call void %p() "aarch64_pstate_sm_enabled" - ret void -} - -define void @normal_call_to_streaming_callee_ptr(ptr %p) nounwind noinline optnone { -; CHECK-COMMON-LABEL: normal_call_to_streaming_callee_ptr: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: blr x0 -; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ret - call void %p() "aarch64_pstate_sm_enabled" - ret void -} - -; -; Check ZA state -; - -declare double @za_shared_callee(double) "aarch64_pstate_za_shared" - -define double @za_new_caller_to_za_shared_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_new"{ -; CHECK-COMMON-LABEL: za_new_caller_to_za_shared_callee: -; CHECK-COMMON: // %bb.0: // %prelude -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: sub sp, sp, #16 -; CHECK-COMMON-NEXT: rdsvl x8, #1 -; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: msub x8, x8, x8, x9 -; CHECK-COMMON-NEXT: mov sp, x8 -; CHECK-COMMON-NEXT: str x8, [x29] -; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-COMMON-NEXT: cbz x8, .LBB6_2 -; CHECK-COMMON-NEXT: b .LBB6_1 -; CHECK-COMMON-NEXT: .LBB6_1: // %save.za -; CHECK-COMMON-NEXT: bl __arm_tpidr2_save -; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr -; CHECK-COMMON-NEXT: b .LBB6_2 -; CHECK-COMMON-NEXT: .LBB6_2: // %entry -; CHECK-COMMON-NEXT: smstart za -; CHECK-COMMON-NEXT: bl za_shared_callee -; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 -; CHECK-COMMON-NEXT: fmov d1, x8 -; CHECK-COMMON-NEXT: fadd d0, d0, d1 -; CHECK-COMMON-NEXT: smstop za -; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ret -entry: - %call = call double @za_shared_callee(double %x) - %add = fadd double %call, 4.200000e+01 - ret double %add; -} - -define double @za_shared_caller_to_za_none_callee(double %x) nounwind noinline optnone "aarch64_pstate_za_shared"{ -; CHECK-COMMON-LABEL: za_shared_caller_to_za_none_callee: -; CHECK-COMMON: // %bb.0: // %entry -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: sub sp, sp, #16 -; CHECK-COMMON-NEXT: rdsvl x8, #1 -; CHECK-COMMON-NEXT: mul x8, x8, x8 -; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: subs x9, x9, x8 -; CHECK-COMMON-NEXT: mov sp, x9 -; CHECK-COMMON-NEXT: str x9, [x29] -; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] -; CHECK-COMMON-NEXT: sub x8, x29, #16 -; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x8 -; CHECK-COMMON-NEXT: bl normal_callee -; CHECK-COMMON-NEXT: smstart za -; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-COMMON-NEXT: sub x0, x29, #16 -; CHECK-COMMON-NEXT: cbz x8, .LBB7_1 -; CHECK-COMMON-NEXT: b .LBB7_2 -; CHECK-COMMON-NEXT: .LBB7_1: // %entry -; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore -; CHECK-COMMON-NEXT: b .LBB7_2 -; CHECK-COMMON-NEXT: .LBB7_2: // %entry -; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr -; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 -; CHECK-COMMON-NEXT: fmov d1, x8 -; CHECK-COMMON-NEXT: fadd d0, d0, d1 -; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ret -entry: - %call = call double @normal_callee(double %x) - %add = fadd double %call, 4.200000e+01 - ret double %add; -} - -; Ensure we set up and restore the lazy save correctly for instructions which are lowered to lib calls. -define fp128 @f128_call_za(fp128 %a, fp128 %b) "aarch64_pstate_za_shared" nounwind { -; CHECK-COMMON-LABEL: f128_call_za: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-COMMON-NEXT: mov x29, sp -; CHECK-COMMON-NEXT: sub sp, sp, #16 -; CHECK-COMMON-NEXT: rdsvl x8, #1 -; CHECK-COMMON-NEXT: mov x9, sp -; CHECK-COMMON-NEXT: mul x8, x8, x8 -; CHECK-COMMON-NEXT: sub x9, x9, x8 -; CHECK-COMMON-NEXT: mov sp, x9 -; CHECK-COMMON-NEXT: sub x10, x29, #16 -; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] -; CHECK-COMMON-NEXT: str x9, [x29] -; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x10 -; CHECK-COMMON-NEXT: bl __addtf3 -; CHECK-COMMON-NEXT: smstart za -; CHECK-COMMON-NEXT: add x0, x29, #0 -; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-COMMON-NEXT: cbnz x8, .LBB8_2 -; CHECK-COMMON-NEXT: // %bb.1: -; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore -; CHECK-COMMON-NEXT: .LBB8_2: -; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr -; CHECK-COMMON-NEXT: mov sp, x29 -; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ret - %res = fadd fp128 %a, %b - ret fp128 %res -} - - -; Ensure we fall back to SelectionDAG isel here so that we temporarily disable streaming mode to lower the fadd (with function calls). -define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounwind { -; CHECK-COMMON-LABEL: f128_call_sm: -; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: sub sp, sp, #112 -; CHECK-COMMON-NEXT: stp d15, d14, [sp, #32] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d13, d12, [sp, #48] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d11, d10, [sp, #64] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: stp d9, d8, [sp, #80] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: str x30, [sp, #96] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: stp q0, q1, [sp] // 32-byte Folded Spill -; CHECK-COMMON-NEXT: smstop sm -; CHECK-COMMON-NEXT: ldp q0, q1, [sp] // 32-byte Folded Reload -; CHECK-COMMON-NEXT: bl __addtf3 -; CHECK-COMMON-NEXT: str q0, [sp, #16] // 16-byte Folded Spill -; CHECK-COMMON-NEXT: smstart sm -; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #80] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #64] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #48] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #32] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr q0, [sp, #16] // 16-byte Folded Reload -; CHECK-COMMON-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload -; CHECK-COMMON-NEXT: add sp, sp, #112 -; CHECK-COMMON-NEXT: ret - %res = fadd fp128 %a, %b - ret fp128 %res -}