diff --git a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp index cced0faa28889..417757eec89ca 100644 --- a/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp +++ b/llvm/lib/Target/AArch64/MachineSMEABIPass.cpp @@ -294,6 +294,12 @@ struct MachineSMEABI : public MachineFunctionPass { MachineBasicBlock::iterator MBBI, LiveRegs PhysLiveRegs); + /// Attempts to find an insertion point before \p Inst where the status flags + /// are not live. If \p Inst is `Block.Insts.end()` a point before the end of + /// the block is found. + std::pair + findStateChangeInsertionPoint(MachineBasicBlock &MBB, const BlockInfo &Block, + SmallVectorImpl::const_iterator Inst); void emitStateChange(EmitContext &, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, ZAState From, ZAState To, LiveRegs PhysLiveRegs); @@ -337,6 +343,28 @@ struct MachineSMEABI : public MachineFunctionPass { MachineRegisterInfo *MRI = nullptr; }; +static LiveRegs getPhysLiveRegs(LiveRegUnits const &LiveUnits) { + LiveRegs PhysLiveRegs = LiveRegs::None; + if (!LiveUnits.available(AArch64::NZCV)) + PhysLiveRegs |= LiveRegs::NZCV; + // We have to track W0 and X0 separately as otherwise things can get + // confused if we attempt to preserve X0 but only W0 was defined. + if (!LiveUnits.available(AArch64::W0)) + PhysLiveRegs |= LiveRegs::W0; + if (!LiveUnits.available(AArch64::W0_HI)) + PhysLiveRegs |= LiveRegs::W0_HI; + return PhysLiveRegs; +} + +static void setPhysLiveRegs(LiveRegUnits &LiveUnits, LiveRegs PhysLiveRegs) { + if (PhysLiveRegs & LiveRegs::NZCV) + LiveUnits.addReg(AArch64::NZCV); + if (PhysLiveRegs & LiveRegs::W0) + LiveUnits.addReg(AArch64::W0); + if (PhysLiveRegs & LiveRegs::W0_HI) + LiveUnits.addReg(AArch64::W0_HI); +} + FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { assert((SMEFnAttrs.hasAgnosticZAInterface() || SMEFnAttrs.hasZT0State() || SMEFnAttrs.hasZAState()) && @@ -362,26 +390,13 @@ FunctionInfo MachineSMEABI::collectNeededZAStates(SMEAttrs SMEFnAttrs) { LiveRegUnits LiveUnits(*TRI); LiveUnits.addLiveOuts(MBB); - auto GetPhysLiveRegs = [&] { - LiveRegs PhysLiveRegs = LiveRegs::None; - if (!LiveUnits.available(AArch64::NZCV)) - PhysLiveRegs |= LiveRegs::NZCV; - // We have to track W0 and X0 separately as otherwise things can get - // confused if we attempt to preserve X0 but only W0 was defined. - if (!LiveUnits.available(AArch64::W0)) - PhysLiveRegs |= LiveRegs::W0; - if (!LiveUnits.available(AArch64::W0_HI)) - PhysLiveRegs |= LiveRegs::W0_HI; - return PhysLiveRegs; - }; - - Block.PhysLiveRegsAtExit = GetPhysLiveRegs(); + Block.PhysLiveRegsAtExit = getPhysLiveRegs(LiveUnits); auto FirstTerminatorInsertPt = MBB.getFirstTerminator(); auto FirstNonPhiInsertPt = MBB.getFirstNonPHI(); for (MachineInstr &MI : reverse(MBB)) { MachineBasicBlock::iterator MBBI(MI); LiveUnits.stepBackward(MI); - LiveRegs PhysLiveRegs = GetPhysLiveRegs(); + LiveRegs PhysLiveRegs = getPhysLiveRegs(LiveUnits); // The SMEStateAllocPseudo marker is added to a function if the save // buffer was allocated in SelectionDAG. It marks the end of the // allocation -- which is a safe point for this pass to insert any TPIDR2 @@ -476,6 +491,46 @@ MachineSMEABI::assignBundleZAStates(const EdgeBundles &Bundles, return BundleStates; } +std::pair +MachineSMEABI::findStateChangeInsertionPoint( + MachineBasicBlock &MBB, const BlockInfo &Block, + SmallVectorImpl::const_iterator Inst) { + LiveRegs PhysLiveRegs; + MachineBasicBlock::iterator InsertPt; + if (Inst != Block.Insts.end()) { + InsertPt = Inst->InsertPt; + PhysLiveRegs = Inst->PhysLiveRegs; + } else { + InsertPt = MBB.getFirstTerminator(); + PhysLiveRegs = Block.PhysLiveRegsAtExit; + } + if (!(PhysLiveRegs & LiveRegs::NZCV)) + return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags). + // Find the previous state change. We can not move before this point. + MachineBasicBlock::iterator PrevStateChangeI; + if (Inst == Block.Insts.begin()) { + PrevStateChangeI = MBB.begin(); + } else { + // Note: `std::prev(Inst)` is the previous InstInfo. We only create an + // InstInfo object for instructions that require a specific ZA state, so the + // InstInfo is the site of the previous state change in the block (which can + // be several MIs earlier). + PrevStateChangeI = std::prev(Inst)->InsertPt; + } + // Note: LiveUnits will only accurately track X0 and NZCV. + LiveRegUnits LiveUnits(*TRI); + setPhysLiveRegs(LiveUnits, PhysLiveRegs); + for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) { + // Don't move before/into a call (which may have a state change before it). + if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall()) + break; + LiveUnits.stepBackward(*I); + if (LiveUnits.available(AArch64::NZCV)) + return {I, getPhysLiveRegs(LiveUnits)}; + } + return {InsertPt, PhysLiveRegs}; +} + void MachineSMEABI::insertStateChanges(EmitContext &Context, const FunctionInfo &FnInfo, const EdgeBundles &Bundles, @@ -490,10 +545,13 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, CurrentState = InState; for (auto &Inst : Block.Insts) { - if (CurrentState != Inst.NeededState) - emitStateChange(Context, MBB, Inst.InsertPt, CurrentState, - Inst.NeededState, Inst.PhysLiveRegs); - CurrentState = Inst.NeededState; + if (CurrentState != Inst.NeededState) { + auto [InsertPt, PhysLiveRegs] = + findStateChangeInsertionPoint(MBB, Block, &Inst); + emitStateChange(Context, MBB, InsertPt, CurrentState, Inst.NeededState, + PhysLiveRegs); + CurrentState = Inst.NeededState; + } } if (MBB.succ_empty()) @@ -501,9 +559,12 @@ void MachineSMEABI::insertStateChanges(EmitContext &Context, ZAState OutState = BundleStates[Bundles.getBundle(MBB.getNumber(), /*Out=*/true)]; - if (CurrentState != OutState) - emitStateChange(Context, MBB, MBB.getFirstTerminator(), CurrentState, - OutState, Block.PhysLiveRegsAtExit); + if (CurrentState != OutState) { + auto [InsertPt, PhysLiveRegs] = + findStateChangeInsertionPoint(MBB, Block, Block.Insts.end()); + emitStateChange(Context, MBB, InsertPt, CurrentState, OutState, + PhysLiveRegs); + } } } diff --git a/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir b/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir new file mode 100644 index 0000000000000..3f174a62128a8 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir @@ -0,0 +1,227 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6 +# RUN: llc -mtriple=aarch64 -mattr=+sve -mattr=+sme -run-pass=aarch64-machine-sme-abi -verify-machineinstrs %s -o - | FileCheck %s + +--- | + ; Test moving a state change to be before a $nzcv def + define void @move_before_nzcv_def() "aarch64_inout_za" { ret void } + + ; Test moving a state change to a point where $x0 is live + define void @move_to_x0_live() "aarch64_inout_za" { ret void } + + ; Test we don't move before a previous state change. + define void @do_not_move_before_prior_state_change() "aarch64_za_state_agnostic" { ret void } + + ; Test we don't move into a call sequence. + define void @do_not_move_into_call() "aarch64_inout_za" { ret void } + + declare void @clobber() + declare void @inout_call() "aarch64_inout_za" +... +--- +name: move_before_nzcv_def +tracksRegLiveness: true +isSSA: true +noVRegs: false +body: | + bb.0: + + ; CHECK-LABEL: name: move_before_nzcv_def + ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp + ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]] + ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]] + ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]] + ; CHECK-NEXT: MSR 56965, [[COPY1]] + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv + ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv + ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0 + ; CHECK-NEXT: MSR 56965, $xzr + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: $zab0 = IMPLICIT_DEF + ; CHECK-NEXT: FAKE_USE $nzcv + ; CHECK-NEXT: RET_ReallyLR + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + $nzcv = IMPLICIT_DEF + $zab0 = IMPLICIT_DEF + FAKE_USE $nzcv + + RET_ReallyLR +... +--- +name: move_to_x0_live +tracksRegLiveness: true +isSSA: true +noVRegs: false +body: | + bb.0: + + ; CHECK-LABEL: name: move_to_x0_live + ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp + ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]] + ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]] + ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]] + ; CHECK-NEXT: MSR 56965, [[COPY1]] + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $x0 = IMPLICIT_DEF + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv + ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv + ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0 + ; CHECK-NEXT: MSR 56965, $xzr + ; CHECK-NEXT: $x0 = COPY [[COPY2]] + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: FAKE_USE $x0 + ; CHECK-NEXT: $zab0 = IMPLICIT_DEF + ; CHECK-NEXT: FAKE_USE $nzcv + ; CHECK-NEXT: RET_ReallyLR + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + $x0 = IMPLICIT_DEF + + $nzcv = IMPLICIT_DEF + FAKE_USE $x0 + + $zab0 = IMPLICIT_DEF + FAKE_USE $nzcv + + RET_ReallyLR +... +--- +name: do_not_move_before_prior_state_change +tracksRegLiveness: true +isSSA: true +noVRegs: false +body: | + ; CHECK-LABEL: name: do_not_move_before_prior_state_change + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: BL &__arm_sme_state_size, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit-def $x0 + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 + ; CHECK-NEXT: $sp = SUBXrx64 $sp, [[COPY]], 24 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $sp + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: $zab0 = IMPLICIT_DEF + ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: $x0 = COPY [[COPY1]] + ; CHECK-NEXT: BL &__arm_sme_save, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0 + ; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv + ; CHECK-NEXT: Bcc 2, %bb.1, implicit $nzcv + ; CHECK-NEXT: B %bb.2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: liveins: $nzcv + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: FAKE_USE $nzcv + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $x0 = COPY [[COPY1]] + ; CHECK-NEXT: BL &__arm_sme_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0 + ; CHECK-NEXT: RET_ReallyLR + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.2: + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: $x0 = COPY [[COPY1]] + ; CHECK-NEXT: BL &__arm_sme_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x1, implicit-def $lr, implicit $sp, implicit $x0 + ; CHECK-NEXT: RET_ReallyLR + bb.0: + successors: %bb.1, %bb.2 + + ; The insertion point can move before the $nzcv def (as that would require + ; moving before a $zab0 def -- that requires the ACTIVE state). + $nzcv = IMPLICIT_DEF + $zab0 = IMPLICIT_DEF + Bcc 2, %bb.1, implicit $nzcv + B %bb.2 + ; bb.1 and bb.2 both require ZA saved on entry (to force bb.0's exit bundle to + ; pick the LOCAL_SAVED state). + bb.1: + liveins: $nzcv + FAKE_USE $nzcv + + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + RET_ReallyLR + bb.2: + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + RET_ReallyLR +... +--- +name: do_not_move_into_call +tracksRegLiveness: true +isSSA: true +noVRegs: false +body: | + bb.0: + + ; CHECK-LABEL: name: do_not_move_into_call + ; CHECK: [[RDSVLI_XI:%[0-9]+]]:gpr64 = RDSVLI_XI 1, implicit $vg + ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $sp + ; CHECK-NEXT: [[MSUBXrrr:%[0-9]+]]:gpr64 = MSUBXrrr [[RDSVLI_XI]], [[RDSVLI_XI]], [[COPY]] + ; CHECK-NEXT: $sp = COPY [[MSUBXrrr]] + ; CHECK-NEXT: STPXi [[MSUBXrrr]], [[RDSVLI_XI]], %stack.0, 0 + ; CHECK-NEXT: [[ADDXri:%[0-9]+]]:gpr64sp = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY [[ADDXri]] + ; CHECK-NEXT: MSR 56965, [[COPY1]] + ; CHECK-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: RequiresZASavePseudo + ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + ; CHECK-NEXT: $nzcv = IMPLICIT_DEF + ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv + ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv + ; CHECK-NEXT: [[MRS1:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv + ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0 + ; CHECK-NEXT: RestoreZAPseudo [[MRS1]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0 + ; CHECK-NEXT: MSR 56965, $xzr + ; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv + ; CHECK-NEXT: $zab0 = IMPLICIT_DEF + ; CHECK-NEXT: FAKE_USE $nzcv + ; CHECK-NEXT: RET_ReallyLR + ADJCALLSTACKDOWN 0, 0, implicit-def dead $sp, implicit $sp + RequiresZASavePseudo + BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp + + ; This is artificial test where NZCV is def'd inside a call, so we can't + ; move the insert point before it's definition. + $nzcv = IMPLICIT_DEF + ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp + + $zab0 = IMPLICIT_DEF + FAKE_USE $nzcv + + RET_ReallyLR +... diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll index e3007a3723484..e4f9efae0e98f 100644 --- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll +++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll @@ -391,11 +391,9 @@ define void @agnostic_za_buffer_alloc_with_stack_probes() nounwind "aarch64_za_s ; CHECK-NEWLOWERING-NEXT: sub x19, x8, x0 ; CHECK-NEWLOWERING-NEXT: .LBB7_1: // =>This Inner Loop Header: Depth=1 ; CHECK-NEWLOWERING-NEXT: sub sp, sp, #16, lsl #12 // =65536 -; CHECK-NEWLOWERING-NEXT: cmp sp, x19 ; CHECK-NEWLOWERING-NEXT: mov x0, x19 -; CHECK-NEWLOWERING-NEXT: mrs x8, NZCV ; CHECK-NEWLOWERING-NEXT: bl __arm_sme_save -; CHECK-NEWLOWERING-NEXT: msr NZCV, x8 +; CHECK-NEWLOWERING-NEXT: cmp sp, x19 ; CHECK-NEWLOWERING-NEXT: b.le .LBB7_3 ; CHECK-NEWLOWERING-NEXT: // %bb.2: // in Loop: Header=BB7_1 Depth=1 ; CHECK-NEWLOWERING-NEXT: mov x0, x19 diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir b/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir index 18764d508d0fa..9f33c0614cee0 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir +++ b/llvm/test/CodeGen/AArch64/sme-lazy-sve-nzcv-live.mir @@ -62,14 +62,12 @@ body: | ; CHECK-NEXT: RequiresZASavePseudo ; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp ; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp - ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 101, 0, implicit-def $nzcv - ; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 55824, implicit-def $nzcv, implicit $nzcv ; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv ; CHECK-NEXT: [[MRS1:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv ; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0 ; CHECK-NEXT: RestoreZAPseudo [[MRS1]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0 ; CHECK-NEXT: MSR 56965, $xzr - ; CHECK-NEXT: MSR 55824, [[MRS]], implicit-def $nzcv + ; CHECK-NEXT: [[SUBSWri:%[0-9]+]]:gpr32 = SUBSWri [[COPY1]], 101, 0, implicit-def $nzcv ; CHECK-NEXT: Bcc 11, %bb.2, implicit $nzcv ; CHECK-NEXT: B %bb.1 ; CHECK-NEXT: {{ $}} @@ -116,16 +114,14 @@ body: | # CHECK-ASM-LABEL: cmp_branch # CHECK-ASM: msr TPIDR2_EL0, x10 # CHECK-ASM-NEXT: bl clobber -# CHECK-ASM-NEXT: cmp w20, #101 -# CHECK-ASM-NEXT: mrs x8, NZCV # CHECK-ASM-NEXT: smstart za -# CHECK-ASM-NEXT: mrs x9, TPIDR2_EL0 +# CHECK-ASM-NEXT: mrs x8, TPIDR2_EL0 # CHECK-ASM-NEXT: sub x0, x29, #16 -# CHECK-ASM-NEXT: cbnz x9, .LBB0_2 +# CHECK-ASM-NEXT: cbnz x8, .LBB0_2 # CHECK-ASM: bl __arm_tpidr2_restore # CHECK-ASM-NEXT: .LBB0_2: +# CHECK-ASM-NEXT: cmp w20, #101 # CHECK-ASM-NEXT: msr TPIDR2_EL0, xzr -# CHECK-ASM-NEXT: msr NZCV, x8 # CHECK-ASM-NEXT: b.lt .LBB0_4 # CHECK-ASM: bl inout_call # CHECK-ASM-NEXT: .LBB0_4: