diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index aec764d5bc7a0..406f9bdddaec0 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -1169,8 +1169,6 @@ INITIALIZE_PASS(MachineSMEABI, "aarch64-machine-sme-abi", "Machine SME ABI", bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); - if (!Subtarget->hasSME()) - return false; AFI = MF.getInfo(); SMEAttrs SMEFnAttrs = AFI->getSMEFnAttrs(); @@ -1178,6 +1176,9 @@ bool MachineSMEABI::runOnMachineFunction(MachineFunction &MF) { !SMEFnAttrs.hasAgnosticZAInterface()) return false; + if (!Subtarget->hasSME() && !SMEFnAttrs.hasAgnosticZAInterface()) + return false; + assert(MF.getRegInfo().isSSA() && "Expected to be run on SSA form!"); this->MF = &MF; diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za-without-sme.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za-without-sme.ll new file mode 100644 index 0000000000000..43fbf96d8b472 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za-without-sme.ll @@ -0,0 +1,33 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; RUN: llc < %s | FileCheck %s + +target triple = "aarch64" + +; Check we preserve ZA in agnostic ZA functions without +sme. +define void @agnostic_calls_private_za() "aarch64_za_state_agnostic" { +; CHECK-LABEL: agnostic_calls_private_za: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: bl __arm_sme_state_size +; CHECK-NEXT: sub sp, sp, x0 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __arm_sme_save +; CHECK-NEXT: bl private_za +; CHECK-NEXT: mov x0, x19 +; CHECK-NEXT: bl __arm_sme_restore +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + tail call void @private_za() + ret void +} + +declare void @private_za()