diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index a5746684308c9..1af53a000490e 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -3175,21 +3175,24 @@ MachineBasicBlock * AArch64TargetLowering::EmitEntryPStateSM(MachineInstr &MI, MachineBasicBlock *BB) const { MachineFunction *MF = BB->getParent(); - AArch64FunctionInfo *FuncInfo = MF->getInfo(); const TargetInstrInfo *TII = Subtarget->getInstrInfo(); + const DebugLoc &DL = MI.getDebugLoc(); Register ResultReg = MI.getOperand(0).getReg(); - if (FuncInfo->isPStateSMRegUsed()) { + if (MF->getRegInfo().use_empty(ResultReg)) { + // Nothing to do. Pseudo erased below. + } else if (Subtarget->hasSME()) { + BuildMI(*BB, MI, DL, TII->get(AArch64::MRS), ResultReg) + .addImm(AArch64SysReg::SVCR) + .addReg(AArch64::VG, RegState::Implicit); + } else { RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE; const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); - BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL)) + BuildMI(*BB, MI, DL, TII->get(AArch64::BL)) .addExternalSymbol(getLibcallName(LC)) .addReg(AArch64::X0, RegState::ImplicitDefine) .addRegMask(TRI->getCallPreservedMask(*MF, getLibcallCallingConv(LC))); - BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), ResultReg) + BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), ResultReg) .addReg(AArch64::X0); - } else { - assert(MI.getMF()->getRegInfo().use_empty(ResultReg) && - "Expected no users of the entry pstate.sm!"); } MI.eraseFromParent(); return BB; @@ -9102,7 +9105,6 @@ SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL, SmallVector Ops = {Chain, MSROp}; unsigned Opcode; if (Condition != AArch64SME::Always) { - FuncInfo->setPStateSMRegUsed(true); Register PStateReg = FuncInfo->getPStateSMReg(); assert(PStateReg.isValid() && "PStateSM Register is invalid"); SDValue PStateSM = diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h index 1fde87e65a34b..1dd89c3e0abb8 100644 --- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h +++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h @@ -228,9 +228,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { // on function entry to record the initial pstate of a function. Register PStateSMReg = MCRegister::NoRegister; - // true if PStateSMReg is used. - bool PStateSMRegUsed = false; - // Has the PNReg used to build PTRUE instruction. // The PTRUE is used for the LD/ST of ZReg pairs in save and restore. unsigned PredicateRegForFillSpill = 0; @@ -273,9 +270,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo { Register getPStateSMReg() const { return PStateSMReg; }; void setPStateSMReg(Register Reg) { PStateSMReg = Reg; }; - unsigned isPStateSMRegUsed() const { return PStateSMRegUsed; }; - void setPStateSMRegUsed(bool Used = true) { PStateSMRegUsed = Used; }; - bool isSVECC() const { return IsSVECC; }; void setIsSVECC(bool s) { IsSVECC = s; }; diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll index 25a7b87d37d9e..b31ae68e87ec8 100644 --- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll +++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll @@ -143,40 +143,39 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee( ; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill ; CHECK-NEXT: add x29, sp, #64 ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mrs x20, SVCR ; CHECK-NEXT: bl __arm_sme_state_size ; CHECK-NEXT: sub sp, sp, x0 -; CHECK-NEXT: mov x20, sp -; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl __arm_sme_save -; CHECK-NEXT: tbz w19, #0, .LBB5_2 +; CHECK-NEXT: tbz w20, #0, .LBB5_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB5_2: ; CHECK-NEXT: mov x0, x8 ; CHECK-NEXT: bl private_za_decl ; CHECK-NEXT: mov x1, x0 -; CHECK-NEXT: tbz w19, #0, .LBB5_4 +; CHECK-NEXT: tbz w20, #0, .LBB5_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB5_4: -; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl __arm_sme_restore -; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl __arm_sme_save -; CHECK-NEXT: tbz w19, #0, .LBB5_6 +; CHECK-NEXT: tbz w20, #0, .LBB5_6 ; CHECK-NEXT: // %bb.5: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB5_6: ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: bl private_za_decl ; CHECK-NEXT: mov x1, x0 -; CHECK-NEXT: tbz w19, #0, .LBB5_8 +; CHECK-NEXT: tbz w20, #0, .LBB5_8 ; CHECK-NEXT: // %bb.7: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB5_8: -; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: mov x0, x19 ; CHECK-NEXT: bl __arm_sme_restore ; CHECK-NEXT: mov x0, x1 ; CHECK-NEXT: sub sp, x29, #64 diff --git a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll index 8d6432ced8e1d..cf42db7aa65bd 100644 --- a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll +++ b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll @@ -42,8 +42,7 @@ define void @fbyte( %v) #0{ ; NOPAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; NOPAIR-NEXT: addvl sp, sp, #-1 ; NOPAIR-NEXT: str z0, [sp] // 16-byte Folded Spill -; NOPAIR-NEXT: bl __arm_sme_state -; NOPAIR-NEXT: mov x19, x0 +; NOPAIR-NEXT: mrs x19, SVCR ; NOPAIR-NEXT: tbz w19, #0, .LBB0_2 ; NOPAIR-NEXT: // %bb.1: ; NOPAIR-NEXT: smstop sm @@ -123,8 +122,7 @@ define void @fbyte( %v) #0{ ; PAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill ; PAIR-NEXT: addvl sp, sp, #-1 ; PAIR-NEXT: str z0, [sp] // 16-byte Folded Spill -; PAIR-NEXT: bl __arm_sme_state -; PAIR-NEXT: mov x19, x0 +; PAIR-NEXT: mrs x19, SVCR ; PAIR-NEXT: tbz w19, #0, .LBB0_2 ; PAIR-NEXT: // %bb.1: ; PAIR-NEXT: smstop sm diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 937dd417b9ec2..05d636158b92b 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -409,8 +409,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati ; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill -; CHECK-COMMON-NEXT: bl __arm_sme_state -; CHECK-COMMON-NEXT: mov x19, x0 +; CHECK-COMMON-NEXT: mrs x19, SVCR ; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2 ; CHECK-COMMON-NEXT: // %bb.1: ; CHECK-COMMON-NEXT: smstop sm diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll index 67199d9c0970c..a7d51968c5157 100644 --- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll +++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll @@ -155,10 +155,9 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za ; CHECK-NEXT: add x29, sp, #64 ; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill ; CHECK-NEXT: sub sp, sp, #16 -; CHECK-NEXT: bl __arm_sme_state ; CHECK-NEXT: rdsvl x8, #1 ; CHECK-NEXT: mov x9, sp -; CHECK-NEXT: mov x20, x0 +; CHECK-NEXT: mrs x20, SVCR ; CHECK-NEXT: msub x9, x8, x8, x9 ; CHECK-NEXT: mov sp, x9 ; CHECK-NEXT: sub x10, x29, #80 @@ -205,8 +204,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za ; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9 ; CHECK-NEWLOWERING-NEXT: mov sp, x9 ; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80] -; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state -; CHECK-NEWLOWERING-NEXT: mov x20, x0 +; CHECK-NEWLOWERING-NEXT: mrs x20, SVCR ; CHECK-NEWLOWERING-NEXT: sub x8, x29, #80 ; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8 ; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_2 diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll index ab7c661d27187..80827c2547780 100644 --- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll +++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll @@ -63,8 +63,7 @@ define void @test2() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mrs x19, SVCR ; CHECK-NEXT: tbz w19, #0, .LBB2_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm @@ -95,8 +94,7 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" { ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mrs x19, SVCR ; CHECK-NEXT: tbnz w19, #0, .LBB3_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstart sm diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll index 39ea180e7ed81..1f0581a142c4a 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll @@ -8,26 +8,24 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible"; define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind { ; CHECK-LABEL: sm_body_sm_compatible_simple: ; CHECK: // %bb.0: -; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill -; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: tbnz w0, #0, .LBB0_2 +; CHECK-NEXT: mrs x8, SVCR +; CHECK-NEXT: tbnz w8, #0, .LBB0_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB0_2: -; CHECK-NEXT: tbnz w0, #0, .LBB0_4 +; CHECK-NEXT: tbnz w8, #0, .LBB0_4 ; CHECK-NEXT: // %bb.3: ; CHECK-NEXT: smstop sm ; CHECK-NEXT: .LBB0_4: ; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload ; CHECK-NEXT: fmov s0, wzr ; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload ; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload +; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload ; CHECK-NEXT: ret ret float zeroinitializer } @@ -40,8 +38,7 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mrs x19, SVCR ; CHECK-NEXT: tbnz w19, #0, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstart sm @@ -69,17 +66,15 @@ define void @streaming_body_and_streaming_compatible_interface_multi_basic_block ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill ; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: mov w8, w0 ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mrs x19, SVCR ; CHECK-NEXT: tbnz w19, #0, .LBB2_2 ; CHECK-NEXT: // %bb.1: // %entry ; CHECK-NEXT: smstart sm ; CHECK-NEXT: .LBB2_2: // %entry -; CHECK-NEXT: cbz w8, .LBB2_6 +; CHECK-NEXT: cbz w0, .LBB2_6 ; CHECK-NEXT: // %bb.3: // %if.else ; CHECK-NEXT: bl streaming_compatible_callee ; CHECK-NEXT: tbnz w19, #0, .LBB2_5 diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll index ff4f36363edcf..9088986ee9b72 100644 --- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll +++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll @@ -41,8 +41,7 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mrs x19, SVCR ; CHECK-NEXT: tbz w19, #0, .LBB1_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstop sm @@ -77,8 +76,7 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c ; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: mrs x19, SVCR ; CHECK-NEXT: tbnz w19, #0, .LBB2_2 ; CHECK-NEXT: // %bb.1: ; CHECK-NEXT: smstart sm @@ -134,10 +132,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) " ; CHECK-NEXT: add x8, sp, #16 ; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 ; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill -; CHECK-NEXT: bl __arm_sme_state -; CHECK-NEXT: add x8, sp, #16 -; CHECK-NEXT: mov x19, x0 -; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload +; CHECK-NEXT: mrs x19, SVCR ; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 ; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill ; CHECK-NEXT: tbz w19, #0, .LBB4_2 @@ -209,8 +204,7 @@ define @streaming_compatible_with_scalable_vectors( @streaming_compatible_with_predicate_vectors(:0:0: 2 spills 2.000000e+00 total spills cost 3 reloads 3.000000e+00 total reloads cost 1 virtual registers copies 1.000000e+00 total copies cost generated in function +; CHECK: remark: :0:0: 2 spills 2.000000e+00 total spills cost 3 reloads 3.000000e+00 total reloads cost generated in function define @streaming_compatible_with_predicate_vectors( %arg) "aarch64_pstate_sm_compatible" nounwind #0 { %res = call @normal_callee_predicate_vec_arg( %arg) %and = and %res, %arg diff --git a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll index 7ad95429949a0..6021f9fab2cdd 100644 --- a/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll +++ b/llvm/test/CodeGen/AArch64/streaming-compatible-memory-ops.ll @@ -169,15 +169,14 @@ define void @sc_memcpy(i64 noundef %n) "aarch64_pstate_sm_compatible" nounwind { ; CHECK-NO-SME-ROUTINES-LABEL: sc_memcpy: ; CHECK-NO-SME-ROUTINES: // %bb.0: // %entry ; CHECK-NO-SME-ROUTINES-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill -; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NO-SME-ROUTINES-NEXT: mov x2, x0 +; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst +; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src +; CHECK-NO-SME-ROUTINES-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill ; CHECK-NO-SME-ROUTINES-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill ; CHECK-NO-SME-ROUTINES-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill ; CHECK-NO-SME-ROUTINES-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill -; CHECK-NO-SME-ROUTINES-NEXT: bl __arm_sme_state -; CHECK-NO-SME-ROUTINES-NEXT: mov x19, x0 -; CHECK-NO-SME-ROUTINES-NEXT: adrp x0, :got:dst -; CHECK-NO-SME-ROUTINES-NEXT: adrp x1, :got:src +; CHECK-NO-SME-ROUTINES-NEXT: mrs x19, SVCR ; CHECK-NO-SME-ROUTINES-NEXT: ldr x0, [x0, :got_lo12:dst] ; CHECK-NO-SME-ROUTINES-NEXT: ldr x1, [x1, :got_lo12:src] ; CHECK-NO-SME-ROUTINES-NEXT: tbz w19, #0, .LBB3_2