Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 11 additions & 5 deletions llvm/lib/Target/AArch64/MachineSMEABIPass.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -632,8 +632,8 @@ MachineSMEABI::findStateChangeInsertionPoint(
PhysLiveRegs = Block.PhysLiveRegsAtExit;
}

if (!(PhysLiveRegs & LiveRegs::NZCV))
return {InsertPt, PhysLiveRegs}; // Nothing to do (no live flags).
if (PhysLiveRegs == LiveRegs::None)
return {InsertPt, PhysLiveRegs}; // Nothing to do (no live regs).

// Find the previous state change. We can not move before this point.
MachineBasicBlock::iterator PrevStateChangeI;
Expand All @@ -650,15 +650,21 @@ MachineSMEABI::findStateChangeInsertionPoint(
// Note: LiveUnits will only accurately track X0 and NZCV.
LiveRegUnits LiveUnits(*TRI);
setPhysLiveRegs(LiveUnits, PhysLiveRegs);
auto BestCandidate = std::make_pair(InsertPt, PhysLiveRegs);
for (MachineBasicBlock::iterator I = InsertPt; I != PrevStateChangeI; --I) {
// Don't move before/into a call (which may have a state change before it).
if (I->getOpcode() == TII->getCallFrameDestroyOpcode() || I->isCall())
break;
LiveUnits.stepBackward(*I);
if (LiveUnits.available(AArch64::NZCV))
return {I, getPhysLiveRegs(LiveUnits)};
LiveRegs CurrentPhysLiveRegs = getPhysLiveRegs(LiveUnits);
// Find places where NZCV is available, but keep looking for locations where
// both NZCV and X0 are available, which can avoid some copies.
if (!(CurrentPhysLiveRegs & LiveRegs::NZCV))
BestCandidate = {I, CurrentPhysLiveRegs};
if (CurrentPhysLiveRegs == LiveRegs::None)
break;
}
return {InsertPt, PhysLiveRegs};
return BestCandidate;
}

void MachineSMEABI::insertStateChanges(EmitContext &Context,
Expand Down
4 changes: 1 addition & 3 deletions llvm/test/CodeGen/AArch64/machine-sme-abi-find-insert-pt.mir
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,12 @@ body: |
; CHECK-NEXT: RequiresZASavePseudo
; CHECK-NEXT: BL @clobber, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp, implicit-def $sp
; CHECK-NEXT: ADJCALLSTACKUP 0, 0, implicit-def dead $sp, implicit $sp
; CHECK-NEXT: $x0 = IMPLICIT_DEF
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64 = COPY $x0
; CHECK-NEXT: MSRpstatesvcrImm1 2, 1, implicit-def $nzcv
; CHECK-NEXT: [[MRS:%[0-9]+]]:gpr64 = MRS 56965, implicit-def $nzcv
; CHECK-NEXT: $x0 = ADDXri %stack.0, 0, 0
; CHECK-NEXT: RestoreZAPseudo [[MRS]], $x0, &__arm_tpidr2_restore, csr_aarch64_sme_abi_support_routines_preservemost_from_x0
; CHECK-NEXT: MSR 56965, $xzr
; CHECK-NEXT: $x0 = COPY [[COPY2]]
; CHECK-NEXT: $x0 = IMPLICIT_DEF
; CHECK-NEXT: $nzcv = IMPLICIT_DEF
; CHECK-NEXT: FAKE_USE $x0
; CHECK-NEXT: $zab0 = IMPLICIT_DEF
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/sme-agnostic-za.ll
Original file line number Diff line number Diff line change
Expand Up @@ -67,10 +67,10 @@ define i64 @agnostic_caller_private_za_callee(i64 %v) nounwind "aarch64_za_state
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x1, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: mov x0, x1
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
Expand Down Expand Up @@ -170,11 +170,11 @@ define i64 @streaming_agnostic_caller_nonstreaming_private_za_callee(i64 %v) nou
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: mov x1, x0
; CHECK-NEWLOWERING-NEXT: smstart sm
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x20
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: mov x0, x1
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
Expand Down Expand Up @@ -267,14 +267,14 @@ define i64 @streaming_compatible_agnostic_caller_nonstreaming_private_za_callee(
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: bl private_za_decl
; CHECK-NEWLOWERING-NEXT: mov x1, x0
; CHECK-NEWLOWERING-NEXT: tbz w20, #0, .LBB5_4
; CHECK-NEWLOWERING-NEXT: // %bb.3:
; CHECK-NEWLOWERING-NEXT: smstart sm
; CHECK-NEWLOWERING-NEXT: .LBB5_4:
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: mov x0, x1
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
; CHECK-NEWLOWERING-NEXT: ldp x20, x19, [sp, #80] // 16-byte Folded Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
Expand Down Expand Up @@ -336,10 +336,10 @@ define i64 @test_many_callee_arguments(
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee
; CHECK-NEWLOWERING-NEXT: add sp, sp, #16
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x1, x0
; CHECK-NEWLOWERING-NEXT: mov x0, x19
; CHECK-NEWLOWERING-NEXT: bl __arm_sme_restore
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: mov x0, x1
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload
; CHECK-NEWLOWERING-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
Expand Down
6 changes: 2 additions & 4 deletions llvm/test/CodeGen/AArch64/sme-dynamic-tls.ll
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ define i32 @load_tls_shared_za() nounwind "aarch64_inout_za" {
; CHECK-NEXT: .tlsdesccall x
; CHECK-NEXT: blr x1
; CHECK-NEXT: mrs x8, TPIDR_EL0
; CHECK-NEXT: ldr w0, [x8, x0]
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ldr w8, [x8, x0]
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x9, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #16
Expand Down Expand Up @@ -133,8 +132,7 @@ define i32 @load_tls_streaming_shared_za() nounwind "aarch64_inout_za" "aarch64_
; CHECK-NEXT: blr x1
; CHECK-NEXT: smstart sm
; CHECK-NEXT: mrs x8, TPIDR_EL0
; CHECK-NEXT: ldr w0, [x8, x0]
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: ldr w8, [x8, x0]
; CHECK-NEXT: smstart za
; CHECK-NEXT: mrs x9, TPIDR2_EL0
; CHECK-NEXT: sub x0, x29, #80
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/CodeGen/AArch64/sme-lazy-save-call.ll
Original file line number Diff line number Diff line change
Expand Up @@ -621,15 +621,15 @@ define i64 @test_many_callee_arguments(
; CHECK-NEWLOWERING-NEXT: stp x10, x11, [sp, #-16]!
; CHECK-NEWLOWERING-NEXT: bl many_args_private_za_callee
; CHECK-NEWLOWERING-NEXT: add sp, sp, #16
; CHECK-NEWLOWERING-NEXT: mov x8, x0
; CHECK-NEWLOWERING-NEXT: mov x1, x0
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #16
; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB9_2
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB9_2
; CHECK-NEWLOWERING-NEXT: // %bb.1:
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB9_2:
; CHECK-NEWLOWERING-NEXT: mov x0, x8
; CHECK-NEWLOWERING-NEXT: mov x0, x1
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: mov sp, x29
; CHECK-NEWLOWERING-NEXT: ldr x19, [sp, #16] // 8-byte Reload
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/AArch64/sve-stack-frame-layout.ll
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ define i32 @csr_d8_allocnxv4i32i32f64(double %d) "aarch64_pstate_sm_compatible"
; CHECK-COMMON-NEXT: ldr x29, [sp, #8] // 8-byte Reload
; CHECK-COMMON-NEXT: ldr d8, [sp], #16 // 8-byte Folded Reload
; CHECK-COMMON-NEXT: ret
; CHECK-COMMON-NE
; CHECK-NE
entry:
%a = alloca <vscale x 4 x i32>
%b = alloca i32
Expand Down Expand Up @@ -626,23 +626,21 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK-NEWLOWERING-NEXT: mov x9, sp
; CHECK-NEWLOWERING-NEXT: msub x9, x8, x8, x9
; CHECK-NEWLOWERING-NEXT: mov sp, x9
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #80
; CHECK-NEWLOWERING-NEXT: mov w20, w0
; CHECK-NEWLOWERING-NEXT: sub x10, x29, #80
; CHECK-NEWLOWERING-NEXT: stp x9, x8, [x29, #-80]
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, x10
; CHECK-NEWLOWERING-NEXT: smstop sm
; CHECK-NEWLOWERING-NEXT: bl other
; CHECK-NEWLOWERING-NEXT: smstart sm
; CHECK-NEWLOWERING-NEXT: mov w0, w20
; CHECK-NEWLOWERING-NEXT: mov w8, w0
; CHECK-NEWLOWERING-NEXT: smstart za
; CHECK-NEWLOWERING-NEXT: mrs x9, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: mrs x8, TPIDR2_EL0
; CHECK-NEWLOWERING-NEXT: sub x0, x29, #80
; CHECK-NEWLOWERING-NEXT: cbnz x9, .LBB8_2
; CHECK-NEWLOWERING-NEXT: cbnz x8, .LBB8_2
; CHECK-NEWLOWERING-NEXT: // %bb.1: // %entry
; CHECK-NEWLOWERING-NEXT: bl __arm_tpidr2_restore
; CHECK-NEWLOWERING-NEXT: .LBB8_2: // %entry
; CHECK-NEWLOWERING-NEXT: mov w0, w8
; CHECK-NEWLOWERING-NEXT: mov w0, w20
; CHECK-NEWLOWERING-NEXT: msr TPIDR2_EL0, xzr
; CHECK-NEWLOWERING-NEXT: sub sp, x29, #64
; CHECK-NEWLOWERING-NEXT: .cfi_def_cfa wsp, 112
Expand Down Expand Up @@ -671,4 +669,4 @@ entry:
tail call void @other()
ret i32 %x
}
declare void @other()
declare void @other()