-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[AArch64][SME] Use mrs %reg, SVCR
rather than ABI routine with +sme
#156878
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Calling `__arm_sme_state` does more than we need in most cases and clobbers both x0 and x1. This also removes (set|get)PStateSMRegUsed, since we can directly check the virtual register instead.
@llvm/pr-subscribers-backend-aarch64 Author: Benjamin Maxwell (MacDue) ChangesCalling This also removes Patch is 24.43 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/156878.diff 13 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index a5746684308c9..d7c248e444826 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -3175,21 +3175,25 @@ MachineBasicBlock *
AArch64TargetLowering::EmitEntryPStateSM(MachineInstr &MI,
MachineBasicBlock *BB) const {
MachineFunction *MF = BB->getParent();
- AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
const TargetInstrInfo *TII = Subtarget->getInstrInfo();
+ const DebugLoc &DL = MI.getDebugLoc();
Register ResultReg = MI.getOperand(0).getReg();
- if (FuncInfo->isPStateSMRegUsed()) {
+ if (MF->getRegInfo().use_empty(ResultReg)) {
+ // Nothing to do. Pseudo erased below.
+ } else if (Subtarget->hasSME()) {
+ BuildMI(*BB, MI, DL, TII->get(AArch64::MRS))
+ .addReg(ResultReg, RegState::Define)
+ .addImm(AArch64SysReg::SVCR)
+ .addReg(AArch64::VG, RegState::Implicit);
+ } else {
RTLIB::Libcall LC = RTLIB::SMEABI_SME_STATE;
const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo();
- BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(AArch64::BL))
+ BuildMI(*BB, MI, DL, TII->get(AArch64::BL))
.addExternalSymbol(getLibcallName(LC))
.addReg(AArch64::X0, RegState::ImplicitDefine)
.addRegMask(TRI->getCallPreservedMask(*MF, getLibcallCallingConv(LC)));
- BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(TargetOpcode::COPY), ResultReg)
+ BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY), ResultReg)
.addReg(AArch64::X0);
- } else {
- assert(MI.getMF()->getRegInfo().use_empty(ResultReg) &&
- "Expected no users of the entry pstate.sm!");
}
MI.eraseFromParent();
return BB;
@@ -9102,7 +9106,6 @@ SDValue AArch64TargetLowering::changeStreamingMode(SelectionDAG &DAG, SDLoc DL,
SmallVector<SDValue> Ops = {Chain, MSROp};
unsigned Opcode;
if (Condition != AArch64SME::Always) {
- FuncInfo->setPStateSMRegUsed(true);
Register PStateReg = FuncInfo->getPStateSMReg();
assert(PStateReg.isValid() && "PStateSM Register is invalid");
SDValue PStateSM =
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index 1fde87e65a34b..1dd89c3e0abb8 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -228,9 +228,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
// on function entry to record the initial pstate of a function.
Register PStateSMReg = MCRegister::NoRegister;
- // true if PStateSMReg is used.
- bool PStateSMRegUsed = false;
-
// Has the PNReg used to build PTRUE instruction.
// The PTRUE is used for the LD/ST of ZReg pairs in save and restore.
unsigned PredicateRegForFillSpill = 0;
@@ -273,9 +270,6 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
Register getPStateSMReg() const { return PStateSMReg; };
void setPStateSMReg(Register Reg) { PStateSMReg = Reg; };
- unsigned isPStateSMRegUsed() const { return PStateSMRegUsed; };
- void setPStateSMRegUsed(bool Used = true) { PStateSMRegUsed = Used; };
-
bool isSVECC() const { return IsSVECC; };
void setIsSVECC(bool s) { IsSVECC = s; };
diff --git a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
index 25a7b87d37d9e..b31ae68e87ec8 100644
--- a/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
+++ b/llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
@@ -143,40 +143,39 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(
; CHECK-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x20, SVCR
; CHECK-NEXT: bl __arm_sme_state_size
; CHECK-NEXT: sub sp, sp, x0
-; CHECK-NEXT: mov x20, sp
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x19, sp
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
-; CHECK-NEXT: tbz w19, #0, .LBB5_2
+; CHECK-NEXT: tbz w20, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_2:
; CHECK-NEXT: mov x0, x8
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
-; CHECK-NEXT: tbz w19, #0, .LBB5_4
+; CHECK-NEXT: tbz w20, #0, .LBB5_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_4:
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_save
-; CHECK-NEXT: tbz w19, #0, .LBB5_6
+; CHECK-NEXT: tbz w20, #0, .LBB5_6
; CHECK-NEXT: // %bb.5:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB5_6:
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: bl private_za_decl
; CHECK-NEXT: mov x1, x0
-; CHECK-NEXT: tbz w19, #0, .LBB5_8
+; CHECK-NEXT: tbz w20, #0, .LBB5_8
; CHECK-NEXT: // %bb.7:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB5_8:
-; CHECK-NEXT: mov x0, x20
+; CHECK-NEXT: mov x0, x19
; CHECK-NEXT: bl __arm_sme_restore
; CHECK-NEXT: mov x0, x1
; CHECK-NEXT: sub sp, x29, #64
diff --git a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
index 8d6432ced8e1d..cf42db7aa65bd 100644
--- a/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
+++ b/llvm/test/CodeGen/AArch64/sme-callee-save-restore-pairs.ll
@@ -42,8 +42,7 @@ define void @fbyte(<vscale x 16 x i8> %v) #0{
; NOPAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; NOPAIR-NEXT: addvl sp, sp, #-1
; NOPAIR-NEXT: str z0, [sp] // 16-byte Folded Spill
-; NOPAIR-NEXT: bl __arm_sme_state
-; NOPAIR-NEXT: mov x19, x0
+; NOPAIR-NEXT: mrs x19, SVCR
; NOPAIR-NEXT: tbz w19, #0, .LBB0_2
; NOPAIR-NEXT: // %bb.1:
; NOPAIR-NEXT: smstop sm
@@ -123,8 +122,7 @@ define void @fbyte(<vscale x 16 x i8> %v) #0{
; PAIR-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; PAIR-NEXT: addvl sp, sp, #-1
; PAIR-NEXT: str z0, [sp] // 16-byte Folded Spill
-; PAIR-NEXT: bl __arm_sme_state
-; PAIR-NEXT: mov x19, x0
+; PAIR-NEXT: mrs x19, SVCR
; PAIR-NEXT: tbz w19, #0, .LBB0_2
; PAIR-NEXT: // %bb.1:
; PAIR-NEXT: smstop sm
diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
index 937dd417b9ec2..05d636158b92b 100644
--- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
+++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
@@ -409,8 +409,7 @@ define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compati
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
-; CHECK-COMMON-NEXT: bl __arm_sme_state
-; CHECK-COMMON-NEXT: mov x19, x0
+; CHECK-COMMON-NEXT: mrs x19, SVCR
; CHECK-COMMON-NEXT: tbz w19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
diff --git a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
index 67199d9c0970c..a7d51968c5157 100644
--- a/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
+++ b/llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
@@ -155,10 +155,9 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEXT: add x29, sp, #64
; CHECK-NEXT: stp x20, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-NEXT: sub sp, sp, #16
-; CHECK-NEXT: bl __arm_sme_state
; CHECK-NEXT: rdsvl x8, #1
; CHECK-NEXT: mov x9, sp
-; CHECK-NEXT: mov x20, x0
+; CHECK-NEXT: mrs x20, SVCR
; CHECK-NEXT: msub x9, x8, x8, x9
; CHECK-NEXT: mov sp, x9
; CHECK-NEXT: sub x10, x29, #80
@@ -205,8 +204,7 @@ define void @test_lazy_save_and_conditional_smstart() nounwind "aarch64_inout_za
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80]
-; CHECK-NEWLOWERING-NEXT: bl __arm_sme_state
-; CHECK-NEWLOWERING-NEXT: mov x20, x0
+; CHECK-NEWLOWERING-NEXT: mrs x20, SVCR
; CHECK-NEWLOWERING-NEXT: sub x8, x29, #80
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x8
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB3_2
diff --git a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
index ab7c661d27187..80827c2547780 100644
--- a/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
+++ b/llvm/test/CodeGen/AArch64/sme-peephole-opts.ll
@@ -63,8 +63,7 @@ define void @test2() nounwind "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -95,8 +94,7 @@ define void @test3() nounwind "aarch64_pstate_sm_compatible" {
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB3_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
index 39ea180e7ed81..1f0581a142c4a 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-body-streaming-compatible-interface.ll
@@ -8,26 +8,24 @@ declare void @streaming_compatible_callee() "aarch64_pstate_sm_compatible";
define float @sm_body_sm_compatible_simple() "aarch64_pstate_sm_compatible" "aarch64_pstate_sm_body" nounwind {
; CHECK-LABEL: sm_body_sm_compatible_simple:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #-64]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: tbnz w0, #0, .LBB0_2
+; CHECK-NEXT: mrs x8, SVCR
+; CHECK-NEXT: tbnz w8, #0, .LBB0_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB0_2:
-; CHECK-NEXT: tbnz w0, #0, .LBB0_4
+; CHECK-NEXT: tbnz w8, #0, .LBB0_4
; CHECK-NEXT: // %bb.3:
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB0_4:
; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-NEXT: fmov s0, wzr
; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp], #64 // 16-byte Folded Reload
; CHECK-NEXT: ret
ret float zeroinitializer
}
@@ -40,8 +38,7 @@ define void @sm_body_caller_sm_compatible_caller_normal_callee() "aarch64_pstate
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
@@ -69,17 +66,15 @@ define void @streaming_body_and_streaming_compatible_interface_multi_basic_block
; CHECK: // %bb.0: // %entry
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB2_2: // %entry
-; CHECK-NEXT: cbz w8, .LBB2_6
+; CHECK-NEXT: cbz w0, .LBB2_6
; CHECK-NEXT: // %bb.3: // %if.else
; CHECK-NEXT: bl streaming_compatible_callee
; CHECK-NEXT: tbnz w19, #0, .LBB2_5
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
index ff4f36363edcf..9088986ee9b72 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-compatible-interface.ll
@@ -41,8 +41,7 @@ define void @streaming_compatible_caller_normal_callee() "aarch64_pstate_sm_comp
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB1_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -77,8 +76,7 @@ define void @streaming_compatible_caller_streaming_callee() "aarch64_pstate_sm_c
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB2_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
@@ -134,10 +132,7 @@ define <2 x double> @streaming_compatible_with_neon_vectors(<2 x double> %arg) "
; CHECK-NEXT: add x8, sp, #16
; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0
; CHECK-NEXT: str z0, [x8] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: add x8, sp, #16
-; CHECK-NEXT: mov x19, x0
-; CHECK-NEXT: ldr z0, [x8] // 16-byte Folded Reload
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0
; CHECK-NEXT: str q0, [sp] // 16-byte Folded Spill
; CHECK-NEXT: tbz w19, #0, .LBB4_2
@@ -209,8 +204,7 @@ define <vscale x 2 x double> @streaming_compatible_with_scalable_vectors(<vscale
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-2
; CHECK-NEXT: str z0, [sp, #1, mul vl] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB5_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -301,8 +295,7 @@ define <vscale x 2 x i1> @streaming_compatible_with_predicate_vectors(<vscale x
; CHECK-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill
; CHECK-NEXT: addvl sp, sp, #-1
; CHECK-NEXT: str p0, [sp, #7, mul vl] // 2-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB6_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -365,8 +358,7 @@ define i32 @conditional_smstart_unreachable_block() "aarch64_pstate_sm_compatibl
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbnz w19, #0, .LBB7_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstart sm
@@ -381,18 +373,16 @@ define void @conditional_smstart_no_successor_block(i1 %p) "aarch64_pstate_sm_co
; CHECK: // %bb.0:
; CHECK-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: tbz w8, #0, .LBB8_5
+; CHECK-NEXT: mrs x19, SVCR
+; CHECK-NEXT: tbz w0, #0, .LBB8_5
; CHECK-NEXT: // %bb.1: // %if.then
-; CHECK-NEXT: tbnz w0, #0, .LBB8_3
+; CHECK-NEXT: tbnz w19, #0, .LBB8_3
; CHECK-NEXT: // %bb.2: // %if.then
; CHECK-NEXT: smstart sm
; CHECK-NEXT: .LBB8_3: // %if.then
-; CHECK-NEXT: mov x19, x0
; CHECK-NEXT: bl streaming_callee
; CHECK-NEXT: tbnz w19, #0, .LBB8_5
; CHECK-NEXT: // %bb.4: // %if.then
@@ -422,8 +412,7 @@ define void @disable_tailcallopt() "aarch64_pstate_sm_compatible" nounwind {
; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-NEXT: stp x30, x19, [sp, #64] // 16-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB9_2
; CHECK-NEXT: // %bb.1:
; CHECK-NEXT: smstop sm
@@ -469,19 +458,14 @@ define void @call_to_non_streaming_pass_args(ptr nocapture noundef readnone %ptr
; CHECK-NEXT: .cfi_offset b14, -88
; CHECK-NEXT: .cfi_offset b15, -96
; CHECK-NEXT: stp d2, d3, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: mov x8, x1
-; CHECK-NEXT: mov x9, x0
; CHECK-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19, SVCR
; CHECK-NEXT: tbz w19, #0, .LBB10_2
; CHECK-NEXT: // %bb.1: // %entry
; CHECK-NEXT: smstop sm
; CHECK-NEXT: .LBB10_2: // %entry
; CHECK-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
-; CHECK-NEXT: mov x0, x9
; CHECK-NEXT: ldp d2, d3, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: mov x1, x8
; CHECK-NEXT: bl bar
; CHECK-NEXT: tbz w19, #0, .LBB10_4
; CHECK-NEXT: // %bb.3: // %entry
diff --git a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll
index 991776f11ae40..7be5e6fe29869 100644
--- a/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll
+++ b/llvm/test/CodeGen/AArch64/sme-streaming-mode-changes-unwindinfo.ll
@@ -283,8 +283,7 @@ define aarch64_sve_vector_pcs void @streaming_compatible_caller_conditional_mode
; CHECK: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d13 @ cfa - 48 * IncomingVG - 48
; CHECK: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d14 @ cfa - 56 * IncomingVG - 48
; CHECK: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x60, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x50, 0x22 // $d15 @ cfa - 64 * IncomingVG - 48
-; CHECK: bl __arm_sme_state
-; CHECK: mov x19, x0
+; CHECK: mrs x19, SVCR
; CHECK: tbnz w19, #0, .LBB5_2
; CHECK: smstart sm
; CHECK: .LBB5_2:
diff --git a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
index dec8eb0d8a936..c72077bd311b4 100644
--- a/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
+++ b/llvm/test/CodeGen/AArch64/sme-vg-to-stack.ll
@@ -786,8 +786,7 @@ define void @streaming_compatible_to_streaming() #4 {
; CHECK-NEXT: .cfi_offset b13, -80
; CHECK-NEXT: .cfi_offset b14, -88
; CHECK-NEXT: .cfi_offset b15, -96
-; CHECK-NEXT: bl __arm_sme_state
-; CHECK-NEXT: mov x19, x0
+; CHECK-NEXT: mrs x19,...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is there still any test remaining that covers streaming-compatible -> non-streaming without +sme
?
Yes, there's at least the aptly named |
Calling
__arm_sme_state
does more than we need in most cases and clobbers both x0 and x1.This also removes
(set|get)PStateSMRegUsed
, since we can directly check the virtual register instead.