diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index 1ae3709e9588a..b2c46939e5846 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -5187,8 +5187,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) { SMEAttrs CallerAttrs(*FuncInfo.Fn); - if (CallerAttrs.hasZAState() || - (!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody())) + if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() || + CallerAttrs.hasStreamingCompatibleInterface()) return nullptr; return new AArch64FastISel(FuncInfo, LibInfo); } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index e78d8bb487a93..1c5d1e904b87a 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -532,8 +532,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const { } SMEAttrs Attrs(F); - if (Attrs.hasNewZAInterface() || - (!Attrs.hasStreamingInterface() && Attrs.hasStreamingBody())) + if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() || + Attrs.hasStreamingCompatibleInterface()) return true; return false; diff --git a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll index 46ef8f96320a4..75cf0838e4913 100644 --- a/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll +++ b/llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll @@ -68,57 +68,31 @@ entry: define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" { -; CHECK-FISEL-LABEL: streaming_caller_nonstreaming_callee: -; CHECK-FISEL: // %bb.0: // %entry -; CHECK-FISEL-NEXT: sub sp, sp, #96 -; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: smstop sm -; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: bl normal_callee -; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-FISEL-NEXT: smstart sm -; CHECK-FISEL-NEXT: adrp x8, .LCPI1_0 -; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI1_0] -; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: fadd d0, d1, d0 -; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-FISEL-NEXT: add sp, sp, #96 -; CHECK-FISEL-NEXT: ret -; -; CHECK-GISEL-LABEL: streaming_caller_nonstreaming_callee: -; CHECK-GISEL: // %bb.0: // %entry -; CHECK-GISEL-NEXT: sub sp, sp, #96 -; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill -; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: smstop sm -; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: bl normal_callee -; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill -; CHECK-GISEL-NEXT: smstart sm -; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 -; CHECK-GISEL-NEXT: fmov d0, x8 -; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: fadd d0, d1, d0 -; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload -; CHECK-GISEL-NEXT: add sp, sp, #96 -; CHECK-GISEL-NEXT: ret +; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee: +; CHECK-COMMON: // %bb.0: // %entry +; CHECK-COMMON-NEXT: sub sp, sp, #96 +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: bl normal_callee +; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000 +; CHECK-COMMON-NEXT: fmov d0, x8 +; CHECK-COMMON-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: fadd d0, d1, d0 +; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #96 +; CHECK-COMMON-NEXT: ret entry: %call = call double @normal_callee(double %x) %add = fadd double %call, 4.200000e+01 @@ -358,64 +332,95 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw ret fp128 %res } -; FIXME: As above this should use Selection DAG to make sure the libcall call is lowered correctly. +; As above this should use Selection DAG to make sure the libcall call is lowered correctly. define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nounwind { -; CHECK-FISEL-LABEL: frem_call_za: -; CHECK-FISEL: // %bb.0: -; CHECK-FISEL-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill -; CHECK-FISEL-NEXT: mov x29, sp -; CHECK-FISEL-NEXT: sub sp, sp, #16 -; CHECK-FISEL-NEXT: rdsvl x8, #1 -; CHECK-FISEL-NEXT: mov x9, sp -; CHECK-FISEL-NEXT: mul x8, x8, x8 -; CHECK-FISEL-NEXT: sub x9, x9, x8 -; CHECK-FISEL-NEXT: mov sp, x9 -; CHECK-FISEL-NEXT: stur x9, [x29, #-16] -; CHECK-FISEL-NEXT: sub x9, x29, #16 -; CHECK-FISEL-NEXT: sturh w8, [x29, #-8] -; CHECK-FISEL-NEXT: msr TPIDR2_EL0, x9 -; CHECK-FISEL-NEXT: bl fmod -; CHECK-FISEL-NEXT: smstart za -; CHECK-FISEL-NEXT: mrs x8, TPIDR2_EL0 -; CHECK-FISEL-NEXT: sub x0, x29, #16 -; CHECK-FISEL-NEXT: cbnz x8, .LBB10_2 -; CHECK-FISEL-NEXT: // %bb.1: -; CHECK-FISEL-NEXT: bl __arm_tpidr2_restore -; CHECK-FISEL-NEXT: .LBB10_2: -; CHECK-FISEL-NEXT: msr TPIDR2_EL0, xzr -; CHECK-FISEL-NEXT: mov sp, x29 -; CHECK-FISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload -; CHECK-FISEL-NEXT: ret -; -; CHECK-GISEL-LABEL: frem_call_za: -; CHECK-GISEL: // %bb.0: -; CHECK-GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill -; CHECK-GISEL-NEXT: bl fmod -; CHECK-GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload -; CHECK-GISEL-NEXT: ret +; CHECK-COMMON-LABEL: frem_call_za: +; CHECK-COMMON: // %bb.0: +; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: mov x29, sp +; CHECK-COMMON-NEXT: sub sp, sp, #16 +; CHECK-COMMON-NEXT: rdsvl x8, #1 +; CHECK-COMMON-NEXT: mov x9, sp +; CHECK-COMMON-NEXT: mul x8, x8, x8 +; CHECK-COMMON-NEXT: sub x9, x9, x8 +; CHECK-COMMON-NEXT: mov sp, x9 +; CHECK-COMMON-NEXT: stur x9, [x29, #-16] +; CHECK-COMMON-NEXT: sub x9, x29, #16 +; CHECK-COMMON-NEXT: sturh w8, [x29, #-8] +; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9 +; CHECK-COMMON-NEXT: bl fmod +; CHECK-COMMON-NEXT: smstart za +; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0 +; CHECK-COMMON-NEXT: sub x0, x29, #16 +; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2 +; CHECK-COMMON-NEXT: // %bb.1: +; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore +; CHECK-COMMON-NEXT: .LBB10_2: +; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr +; CHECK-COMMON-NEXT: mov sp, x29 +; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ret %res = frem double %a, %b ret double %res } -; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly. +; As above this should use Selection DAG to make sure the libcall is lowered correctly. define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind { ; CHECK-COMMON-LABEL: frem_call_sm: ; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: stp s0, s1, [sp, #72] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #72] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: bl fmodf -; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-COMMON-NEXT: str s0, [sp, #76] // 4-byte Folded Spill +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr s0, [sp, #76] // 4-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload ; CHECK-COMMON-NEXT: ret %res = frem float %a, %b ret float %res } -; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly. +; As above this should use Selection DAG to make sure the libcall is lowered correctly. define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind { ; CHECK-COMMON-LABEL: frem_call_sm_compat: ; CHECK-COMMON: // %bb.0: -; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill +; CHECK-COMMON-NEXT: sub sp, sp, #96 +; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill +; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill +; CHECK-COMMON-NEXT: bl __arm_sme_state +; CHECK-COMMON-NEXT: and x19, x0, #0x1 +; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_2 +; CHECK-COMMON-NEXT: // %bb.1: +; CHECK-COMMON-NEXT: smstop sm +; CHECK-COMMON-NEXT: .LBB12_2: +; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload ; CHECK-COMMON-NEXT: bl fmodf -; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill +; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_4 +; CHECK-COMMON-NEXT: // %bb.3: +; CHECK-COMMON-NEXT: smstart sm +; CHECK-COMMON-NEXT: .LBB12_4: +; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload +; CHECK-COMMON-NEXT: add sp, sp, #96 ; CHECK-COMMON-NEXT: ret %res = frem float %a, %b ret float %res