From c8fa6c1a2f01cd569d872e7debe91aa85a788e0e Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Fri, 26 Sep 2025 16:58:28 +0000 Subject: [PATCH 1/3] [AArch64][SME] Fixup ABI routine insertion points to avoid clobbering NZCV This updates the `MachineSMEABIPass` to find insertion points for state changes (i.e., calls to ABI routines), where the NZCV register (status flags) are not live. It works by stepping backwards from where the state change is needed until we find an instruction where NZCV is not live, a previous state change, or a call sequence. We conservatively don't move into/over calls, as they may require a different state before the start of the call sequence. Change-Id: I545f1dec92dba406745c11e4e9be5a245770f2c7 --- llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 100 ++++++-- .../machine-sme-abi-find-insert-pt.mir | 227 ++++++++++++++++++ llvm/test/CodeGen/AArch64/sme-agnostic-za.ll | 4 +- .../AArch64/sme-lazy-sve-nzcv-live.mir | 12 +- 4 files changed, 310 insertions(+), 33 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index cced0faa28889..b1503ebc6e92f 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -294,6 +294,12 @@ struct MachineSMEABI : public MachineFunctionPass { MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs); + /// Attempts to find an insertion point before \p Inst where the status flags + /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of + /// the block is found. + std::pair + findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block, + SmallVectorImpl::const_iterator Inst); void emitStateChange(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ZAState From, ZAState To, LiveRegs PhysLiveRegs); @@ -337,6 +343,28 @@ struct MachineSMEABI : public MachineFunctionPass { MachineRegisterInfo *MRI = nullptr; }; +static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) { + LiveRegs PhysLiveRegs = LiveRegs::None; + if (!LiveUnits.available(AArch64::NZCV)) + PhysLiveRegs |= LiveRegs::NZCV; + // We have to track W0 and X0 separately as otherwise things can get + // confused if we attempt to preserve X0 but only W0 was defined. + if (!LiveUnits.available(AArch64::W0)) + PhysLiveRegs |= LiveRegs::W0; + if (!LiveUnits.available(AArch64::W0_HI)) + PhysLiveRegs |= LiveRegs::W0_HI; + return PhysLiveRegs; +} + +static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) { + if (PhysLiveRegs & LiveRegs::NZCV) + LiveUnits.addReg(AArch64::NZCV); + if (PhysLiveRegs & LiveRegs::W0) + LiveUnits.addReg(AArch64::W0); + if (PhysLiveRegs & LiveRegs::W0_HI) + LiveUnits.addReg(AArch64::W0_HI); +} + FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) && @@ -362,26 +390,13 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { LiveRegUnits LiveUnits(*TRI); LiveUnits.addLiveOuts(MBB); - auto GetPhysLiveRegs = [&] { - LiveRegs PhysLiveRegs = LiveRegs::None; - if (!LiveUnits.available(AArch64::NZCV)) - PhysLiveRegs |= LiveRegs::NZCV; - // We have to track W0 and X0 separately as otherwise things can get - // confused if we attempt to preserve X0 but only W0 was defined. - if (!LiveUnits.available(AArch64::W0)) - PhysLiveRegs |= LiveRegs::W0; - if (!LiveUnits.available(AArch64::W0_HI)) - PhysLiveRegs |= LiveRegs::W0_HI; - return PhysLiveRegs; - }; - - Block.PhysLiveRegsAtExit = GetPhysLiveRegs(); + Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits); auto FirstTerminatorInsertPt = MBB.getFirstTerminator(); auto FirstNonPhiInsertPt = MBB.getFirstNonPHI(); for (MachineInstr &MI : reverse(MBB)) { MachineBasicBlock::iterator MBBI(MI); LiveUnits.stepBackward(MI); - LiveRegs PhysLiveRegs = GetPhysLiveRegs(); + LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits); // The SMEStateAllocPseudo marker is added to a function if the save // buffer was allocated in SelectionDAG. It marks the end of the // allocation -- which is a safe point for this pass to insert any TPIDR2 @@ -476,6 +491,41 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles, return BundleStates; } +std::pair +MachineSMEABI::findStateChangeInsertionPoint( + MachineBasicBlock &MBB, const BlockInfo &Block, + SmallVectorImpl::const_iterator Inst) { + LiveRegs PhysLiveRegs; + MachineBasicBlock::iterator InsertPt; + if (Inst != Block.Insts.end()) { + InsertPt = Inst->InsertPt; + PhysLiveRegs = Inst->PhysLiveRegs; + } else { + InsertPt = MBB.getFirstTerminator(); + PhysLiveRegs = Block.PhysLiveRegsAtExit; + } + if (!(PhysLiveRegs & LiveRegs::NZCV)) + return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags). + // Find the previous state change. We can not move before this point. + MachineBasicBlock::iterator PrevStateChangeI; + if (Inst == Block.Insts.begin()) + PrevStateChangeI = MBB.begin(); + else + PrevStateChangeI = std::prev(Inst)->InsertPt; + // Note: LiveUnits will only accurately track X0 and NZCV. + LiveRegUnits LiveUnits(*TRI); + setPhysLiveRegs(LiveUnits, PhysLiveRegs); + for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) { + // Don't move before/into a call (which may have a state change before it). + if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall()) + break; + LiveUnits.stepBackward(*I); + if (LiveUnits.available(AArch64::NZCV)) + return {I, getPhysLiveRegs(LiveUnits)}; + } + return {InsertPt, PhysLiveRegs}; +} + void MachineSMEABI::insertStateChanges(EmitContext &Context, const FunctionInfo &FnInfo, const EdgeBundles &Bundles, @@ -490,10 +540,13 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, CurrentState = InState; for (auto &Inst : Block.Insts) { - if (CurrentState != Inst.NeededState) - emitStateChange(Context, MBB, Inst.InsertPt, CurrentState, - Inst.NeededState, Inst.PhysLiveRegs); - CurrentState = Inst.NeededState; + if (CurrentState != Inst.NeededState) { + auto [InsertPt, PhysLiveRegs] = + findStateChangeInsertionPoint(MBB, Block, &Inst); + emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState, + PhysLiveRegs); + CurrentState = Inst.NeededState; + } } if (MBB.succ_empty()) @@ -501,9 +554,12 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, ZAState OutState = BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)]; - if (CurrentState != OutState) - emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState, - OutState, Block.PhysLiveRegsAtExit); + if (CurrentState != OutState) { + auto [InsertPt, PhysLiveRegs] = + findStateChangeInsertionPoint(MBB, Block, Block.Insts.end()); + emitStateChange(Context, MBB, InsertPt, CurrentState, OutState, + PhysLiveRegs); + } } } diff --git a/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir b/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir new file mode 100644 index 0000000000000..3f174a62128a8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir @@ -0,0 +1,227 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme -run-pass=aarch64-machine-sme-abi -verify-machineinstrs %s -o - | FileCheck %s + +--- | + ; Test moving a state change to be before a $nzcv def + define void @move_before_nzcv_def() "aarch64_inout_za" { ret void } + + ; Test moving a state change to a point where $x0 is live + define void @move_to_x0_live() "aarch64_inout_za" { ret void } + + ; Test we don't move before a previous state change. + define void @do_not_move_before_prior_state_change() "aarch64_za_state_agnostic" { ret void } + + ; Test we don't move into a call sequence. + define void @do_not_move_into_call() "aarch64_inout_za" { ret void } + + declare void @clobber() + declare void @inout_call() "aarch64_inout_za" +... +--- +name: move_before_nzcv_def +tracksRegLiveness: true +isSSA: true +noVRegs: false +body: | + bb.0: + + ; CHECK-LABEL: name: move_before_nzcv_def + ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp + ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]] + ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]] + ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]] + ; CHECK-NEXT: MSR 56965, [[COPY1]] + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv + ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv + ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0 + ; CHECK-NEXT: MSR 56965, $xzr + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: $zab0 = IMPLICIT_DEF + ; CHECK-NEXT: FAKE_USE $nzcv + ; CHECK-NEXT: RET_ReallyLR + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + $nzcv = IMPLICIT_DEF + $zab0 = IMPLICIT_DEF + FAKE_USE $nzcv + + RET_ReallyLR +... +--- +name: move_to_x0_live +tracksRegLiveness: true +isSSA: true +noVRegs: false +body: | + bb.0: + + ; CHECK-LABEL: name: move_to_x0_live + ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp + ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]] + ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]] + ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]] + ; CHECK-NEXT: MSR 56965, [[COPY1]] + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $x0 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv + ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv + ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0 + ; CHECK-NEXT: MSR 56965, $xzr + ; CHECK-NEXT: $x0 = COPY [[COPY2]] + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: FAKE_USE $x0 + ; CHECK-NEXT: $zab0 = IMPLICIT_DEF + ; CHECK-NEXT: FAKE_USE $nzcv + ; CHECK-NEXT: RET_ReallyLR + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + $x0 = IMPLICIT_DEF + + $nzcv = IMPLICIT_DEF + FAKE_USE $x0 + + $zab0 = IMPLICIT_DEF + FAKE_USE $nzcv + + RET_ReallyLR +... +--- +name: do_not_move_before_prior_state_change +tracksRegLiveness: true +isSSA: true +noVRegs: false +body: | + ; CHECK-LABEL: name: do_not_move_before_prior_state_change + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BL &__arm_sme_state_size, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit-def $x0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: $sp = SUBXrx64 $sp, [[COPY]], 24 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $sp + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: $zab0 = IMPLICIT_DEF + ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[COPY1]] + ; CHECK-NEXT: BL &__arm_sme_save, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0 + ; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv + ; CHECK-NEXT: Bcc 2, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: FAKE_USE $nzcv + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $x0 = COPY [[COPY1]] + ; CHECK-NEXT: BL &__arm_sme_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0 + ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $x0 = COPY [[COPY1]] + ; CHECK-NEXT: BL &__arm_sme_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0 + ; CHECK-NEXT: RET_ReallyLR + bb.0: + successors: %bb.1, %bb.2 + + ; The insertion point can move before the $nzcv def (as that would require + ; moving before a $zab0 def -- that requires the ACTIVE state). + $nzcv = IMPLICIT_DEF + $zab0 = IMPLICIT_DEF + Bcc 2, %bb.1, implicit $nzcv + B %bb.2 + ; bb.1 and bb.2 both require ZA saved on entry (to force bb.0's exit bundle to + ; pick the LOCAL_SAVED state). + bb.1: + liveins: $nzcv + FAKE_USE $nzcv + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + RET_ReallyLR + bb.2: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + RET_ReallyLR +... +--- +name: do_not_move_into_call +tracksRegLiveness: true +isSSA: true +noVRegs: false +body: | + bb.0: + + ; CHECK-LABEL: name: do_not_move_into_call + ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp + ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]] + ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]] + ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]] + ; CHECK-NEXT: MSR 56965, [[COPY1]] + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv + ; CHECK-NEXT: [[MRS1:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv + ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: RestoreZAPseudo [[MRS1]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0 + ; CHECK-NEXT: MSR 56965, $xzr + ; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv + ; CHECK-NEXT: $zab0 = IMPLICIT_DEF + ; CHECK-NEXT: FAKE_USE $nzcv + ; CHECK-NEXT: RET_ReallyLR + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + + ; This is artificial test where NZCV is def'd inside a call, so we can't + ; move the insert point before it's definition. + $nzcv = IMPLICIT_DEF + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + $zab0 = IMPLICIT_DEF + FAKE_USE $nzcv + + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll index e3007a3723484..e4f9efae0e98f 100644 --- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll +++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll @@ -391,11 +391,9 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s ; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0 ; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEWLOWERING-NEXT: cmp sp, x19 ; CHECK-NEWLOWERING-NEXT: mov x0, x19 -; CHECK-NEWLOWERING-NEXT: mrs x8, NZCV ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save -; CHECK-NEWLOWERING-NEXT: msr NZCV, x8 +; CHECK-NEWLOWERING-NEXT: cmp sp, x19 ; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3 ; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 ; CHECK-NEWLOWERING-NEXT: mov x0, x19 diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir b/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir index 18764d508d0fa..9f33c0614cee0 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir +++ b/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir @@ -62,14 +62,12 @@ body: | ; CHECK-NEXT: RequiresZASavePseudo ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 101, 0, implicit-def $nzcv - ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv ; CHECK-NEXT: [[MRS1:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0 ; CHECK-NEXT: RestoreZAPseudo [[MRS1]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0 ; CHECK-NEXT: MSR 56965, $xzr - ; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 101, 0, implicit-def $nzcv ; CHECK-NEXT: Bcc 11, %bb.2, implicit $nzcv ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} @@ -116,16 +114,14 @@ body: | # CHECK-ASM-LABEL: cmp_branch # CHECK-ASM: msr TPIDR2_EL0, x10 # CHECK-ASM-NEXT: bl clobber -# CHECK-ASM-NEXT: cmp w20, #101 -# CHECK-ASM-NEXT: mrs x8, NZCV # CHECK-ASM-NEXT: smstart za -# CHECK-ASM-NEXT: mrs x9, TPIDR2_EL0 +# CHECK-ASM-NEXT: mrs x8, TPIDR2_EL0 # CHECK-ASM-NEXT: sub x0, x29, #16 -# CHECK-ASM-NEXT: cbnz x9, .LBB0_2 +# CHECK-ASM-NEXT: cbnz x8, .LBB0_2 # CHECK-ASM: bl __arm_tpidr2_restore # CHECK-ASM-NEXT: .LBB0_2: +# CHECK-ASM-NEXT: cmp w20, #101 # CHECK-ASM-NEXT: msr TPIDR2_EL0, xzr -# CHECK-ASM-NEXT: msr NZCV, x8 # CHECK-ASM-NEXT: b.lt .LBB0_4 # CHECK-ASM: bl inout_call # CHECK-ASM-NEXT: .LBB0_4: From 49155f4b6defc5791491470eaa73d28dd8a5eba8 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 7 Oct 2025 12:19:58 +0000 Subject: [PATCH 2/3] Add note Change-Id: If922c75df07dd4e027c30710330002bcf7628ad6 --- llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index b1503ebc6e92f..e04749b37ffba 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -511,6 +511,10 @@ MachineSMEABI::findStateChangeInsertionPoint( if (Inst == Block.Insts.begin()) PrevStateChangeI = MBB.begin(); else + // Note: `std::prev(Inst)` is the previous InstInfo. We only create an + // InstInfo object for instructions that require a specific ZA state, so the + // InstInfo is the site of the previous state change in the block (which can + // be several MIs earlier). PrevStateChangeI = std::prev(Inst)->InsertPt; // Note: LiveUnits will only accurately track X0 and NZCV. LiveRegUnits LiveUnits(*TRI); From 70d6e694e5fe44e615ef488b1e99e9afe60387d6 Mon Sep 17 00:00:00 2001 From: Benjamin Maxwell Date: Tue, 7 Oct 2025 12:23:05 +0000 Subject: [PATCH 3/3] Add braces (due to large comment) Change-Id: I10c419790e1291ca8cca7926fba72bb26d515201 --- llvm/lib/Target/AArch64/MachineSMEABIPass.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index e04749b37ffba..417757eec89ca 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -508,14 +508,15 @@ MachineSMEABI::findStateChangeInsertionPoint( return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags). // Find the previous state change. We can not move before this point. MachineBasicBlock::iterator PrevStateChangeI; - if (Inst == Block.Insts.begin()) + if (Inst == Block.Insts.begin()) { PrevStateChangeI = MBB.begin(); - else + } else { // Note: `std::prev(Inst)` is the previous InstInfo. We only create an // InstInfo object for instructions that require a specific ZA state, so the // InstInfo is the site of the previous state change in the block (which can // be several MIs earlier). PrevStateChangeI = std::prev(Inst)->InsertPt; + } // Note: LiveUnits will only accurately track X0 and NZCV. LiveRegUnits LiveUnits(*TRI); setPhysLiveRegs(LiveUnits, PhysLiveRegs);