Skip to content

Commit

Permalink
[AArch64] Disable GlobalISel/FastISel for more SME functions
Browse files Browse the repository at this point in the history
The patch D136361 disabled GlobalISel and FastISel for some SME functions, as
the saving and restoring of SM is not yet handled. There were several tests
added for fp128 fadd, which will be expanded to a libcall, that only happened
to work by accident and did not handle other cases such as f32/f64 frem
libcalls.

This extends the cases where GlobalISel / FastISel is disabled for functions
with SME attributes, under the assumption that it is difficult to tell what
will become a libcall reliably, and so should fall back for all function until
GlobalISel and/or FastISel can handle them.

Differential Revision: https://reviews.llvm.org/D158490
  • Loading branch information
davemgreen committed Aug 22, 2023
1 parent 08f034f commit 13c2514
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 95 deletions.
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/AArch64FastISel.cpp
Expand Up @@ -5187,8 +5187,8 @@ FastISel *AArch64::createFastISel(FunctionLoweringInfo &FuncInfo,
const TargetLibraryInfo *LibInfo) {

SMEAttrs CallerAttrs(*FuncInfo.Fn);
if (CallerAttrs.hasZAState() ||
(!CallerAttrs.hasStreamingInterface() && CallerAttrs.hasStreamingBody()))
if (CallerAttrs.hasZAState() || CallerAttrs.hasStreamingInterfaceOrBody() ||
CallerAttrs.hasStreamingCompatibleInterface())
return nullptr;
return new AArch64FastISel(FuncInfo, LibInfo);
}
4 changes: 2 additions & 2 deletions llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Expand Up @@ -532,8 +532,8 @@ bool AArch64CallLowering::fallBackToDAGISel(const MachineFunction &MF) const {
}

SMEAttrs Attrs(F);
if (Attrs.hasNewZAInterface() ||
(!Attrs.hasStreamingInterface() && Attrs.hasStreamingBody()))
if (Attrs.hasZAState() || Attrs.hasStreamingInterfaceOrBody() ||
Attrs.hasStreamingCompatibleInterface())
return true;

return false;
Expand Down
187 changes: 96 additions & 91 deletions llvm/test/CodeGen/AArch64/sme-disable-gisel-fisel.ll
Expand Up @@ -68,57 +68,31 @@ entry:


define double @streaming_caller_nonstreaming_callee(double %x) nounwind noinline optnone "aarch64_pstate_sm_enabled" {
; CHECK-FISEL-LABEL: streaming_caller_nonstreaming_callee:
; CHECK-FISEL: // %bb.0: // %entry
; CHECK-FISEL-NEXT: sub sp, sp, #96
; CHECK-FISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-FISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-FISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-FISEL-NEXT: smstop sm
; CHECK-FISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-FISEL-NEXT: bl normal_callee
; CHECK-FISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
; CHECK-FISEL-NEXT: smstart sm
; CHECK-FISEL-NEXT: adrp x8, .LCPI1_0
; CHECK-FISEL-NEXT: ldr d0, [x8, :lo12:.LCPI1_0]
; CHECK-FISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
; CHECK-FISEL-NEXT: fadd d0, d1, d0
; CHECK-FISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-FISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-FISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-FISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-FISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-FISEL-NEXT: add sp, sp, #96
; CHECK-FISEL-NEXT: ret
;
; CHECK-GISEL-LABEL: streaming_caller_nonstreaming_callee:
; CHECK-GISEL: // %bb.0: // %entry
; CHECK-GISEL-NEXT: sub sp, sp, #96
; CHECK-GISEL-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-GISEL-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-GISEL-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-GISEL-NEXT: smstop sm
; CHECK-GISEL-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-GISEL-NEXT: bl normal_callee
; CHECK-GISEL-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
; CHECK-GISEL-NEXT: smstart sm
; CHECK-GISEL-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
; CHECK-GISEL-NEXT: fmov d0, x8
; CHECK-GISEL-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
; CHECK-GISEL-NEXT: fadd d0, d1, d0
; CHECK-GISEL-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-GISEL-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-GISEL-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-GISEL-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-GISEL-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-GISEL-NEXT: add sp, sp, #96
; CHECK-GISEL-NEXT: ret
; CHECK-COMMON-LABEL: streaming_caller_nonstreaming_callee:
; CHECK-COMMON: // %bb.0: // %entry
; CHECK-COMMON-NEXT: sub sp, sp, #96
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: str d0, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldr d0, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl normal_callee
; CHECK-COMMON-NEXT: str d0, [sp, #88] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: mov x8, #4631107791820423168 // =0x4045000000000000
; CHECK-COMMON-NEXT: fmov d0, x8
; CHECK-COMMON-NEXT: ldr d1, [sp, #88] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: fadd d0, d1, d0
; CHECK-COMMON-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: add sp, sp, #96
; CHECK-COMMON-NEXT: ret
entry:
%call = call double @normal_callee(double %x)
%add = fadd double %call, 4.200000e+01
Expand Down Expand Up @@ -358,64 +332,95 @@ define fp128 @f128_call_sm(fp128 %a, fp128 %b) "aarch64_pstate_sm_enabled" nounw
ret fp128 %res
}

; FIXME: As above this should use Selection DAG to make sure the libcall call is lowered correctly.
; As above this should use Selection DAG to make sure the libcall call is lowered correctly.
define double @frem_call_za(double %a, double %b) "aarch64_pstate_za_shared" nounwind {
; CHECK-FISEL-LABEL: frem_call_za:
; CHECK-FISEL: // %bb.0:
; CHECK-FISEL-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-FISEL-NEXT: mov x29, sp
; CHECK-FISEL-NEXT: sub sp, sp, #16
; CHECK-FISEL-NEXT: rdsvl x8, #1
; CHECK-FISEL-NEXT: mov x9, sp
; CHECK-FISEL-NEXT: mul x8, x8, x8
; CHECK-FISEL-NEXT: sub x9, x9, x8
; CHECK-FISEL-NEXT: mov sp, x9
; CHECK-FISEL-NEXT: stur x9, [x29, #-16]
; CHECK-FISEL-NEXT: sub x9, x29, #16
; CHECK-FISEL-NEXT: sturh w8, [x29, #-8]
; CHECK-FISEL-NEXT: msr TPIDR2_EL0, x9
; CHECK-FISEL-NEXT: bl fmod
; CHECK-FISEL-NEXT: smstart za
; CHECK-FISEL-NEXT: mrs x8, TPIDR2_EL0
; CHECK-FISEL-NEXT: sub x0, x29, #16
; CHECK-FISEL-NEXT: cbnz x8, .LBB10_2
; CHECK-FISEL-NEXT: // %bb.1:
; CHECK-FISEL-NEXT: bl __arm_tpidr2_restore
; CHECK-FISEL-NEXT: .LBB10_2:
; CHECK-FISEL-NEXT: msr TPIDR2_EL0, xzr
; CHECK-FISEL-NEXT: mov sp, x29
; CHECK-FISEL-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-FISEL-NEXT: ret
;
; CHECK-GISEL-LABEL: frem_call_za:
; CHECK-GISEL: // %bb.0:
; CHECK-GISEL-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-GISEL-NEXT: bl fmod
; CHECK-GISEL-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-GISEL-NEXT: ret
; CHECK-COMMON-LABEL: frem_call_za:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: mov x29, sp
; CHECK-COMMON-NEXT: sub sp, sp, #16
; CHECK-COMMON-NEXT: rdsvl x8, #1
; CHECK-COMMON-NEXT: mov x9, sp
; CHECK-COMMON-NEXT: mul x8, x8, x8
; CHECK-COMMON-NEXT: sub x9, x9, x8
; CHECK-COMMON-NEXT: mov sp, x9
; CHECK-COMMON-NEXT: stur x9, [x29, #-16]
; CHECK-COMMON-NEXT: sub x9, x29, #16
; CHECK-COMMON-NEXT: sturh w8, [x29, #-8]
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, x9
; CHECK-COMMON-NEXT: bl fmod
; CHECK-COMMON-NEXT: smstart za
; CHECK-COMMON-NEXT: mrs x8, TPIDR2_EL0
; CHECK-COMMON-NEXT: sub x0, x29, #16
; CHECK-COMMON-NEXT: cbnz x8, .LBB10_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: bl __arm_tpidr2_restore
; CHECK-COMMON-NEXT: .LBB10_2:
; CHECK-COMMON-NEXT: msr TPIDR2_EL0, xzr
; CHECK-COMMON-NEXT: mov sp, x29
; CHECK-COMMON-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = frem double %a, %b
ret double %res
}

; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly.
; As above this should use Selection DAG to make sure the libcall is lowered correctly.
define float @frem_call_sm(float %a, float %b) "aarch64_pstate_sm_enabled" nounwind {
; CHECK-COMMON-LABEL: frem_call_sm:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #-80]! // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #72] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #72] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-COMMON-NEXT: str s0, [sp, #76] // 4-byte Folded Spill
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldr s0, [sp, #76] // 4-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d15, d14, [sp], #80 // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ret
%res = frem float %a, %b
ret float %res
}

; FIXME: As above this should use Selection DAG to make sure the libcall is lowered correctly.
; As above this should use Selection DAG to make sure the libcall is lowered correctly.
define float @frem_call_sm_compat(float %a, float %b) "aarch64_pstate_sm_compatible" nounwind {
; CHECK-COMMON-LABEL: frem_call_sm_compat:
; CHECK-COMMON: // %bb.0:
; CHECK-COMMON-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
; CHECK-COMMON-NEXT: sub sp, sp, #96
; CHECK-COMMON-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp x30, x19, [sp, #80] // 16-byte Folded Spill
; CHECK-COMMON-NEXT: stp s0, s1, [sp, #8] // 8-byte Folded Spill
; CHECK-COMMON-NEXT: bl __arm_sme_state
; CHECK-COMMON-NEXT: and x19, x0, #0x1
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_2
; CHECK-COMMON-NEXT: // %bb.1:
; CHECK-COMMON-NEXT: smstop sm
; CHECK-COMMON-NEXT: .LBB12_2:
; CHECK-COMMON-NEXT: ldp s0, s1, [sp, #8] // 8-byte Folded Reload
; CHECK-COMMON-NEXT: bl fmodf
; CHECK-COMMON-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
; CHECK-COMMON-NEXT: str s0, [sp, #12] // 4-byte Folded Spill
; CHECK-COMMON-NEXT: tbz x19, #0, .LBB12_4
; CHECK-COMMON-NEXT: // %bb.3:
; CHECK-COMMON-NEXT: smstart sm
; CHECK-COMMON-NEXT: .LBB12_4:
; CHECK-COMMON-NEXT: ldp x30, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldr s0, [sp, #12] // 4-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
; CHECK-COMMON-NEXT: add sp, sp, #96
; CHECK-COMMON-NEXT: ret
%res = frem float %a, %b
ret float %res
Expand Down

0 comments on commit 13c2514

Please sign in to comment.