-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AArch64] Check X16&X17 in prologue if the fn has an SwiftAsyncContext. #73945
[AArch64] Check X16&X17 in prologue if the fn has an SwiftAsyncContext. #73945
Conversation
Simpler initial alternative to #73332 |
@llvm/pr-subscribers-backend-aarch64 Author: Florian Hahn (fhahn) ChangesStoreSwiftAsyncContext clobbers X16 & X17. Make sure they are available in canUseAsPrologue, to avoid shrink wrapping moving the pseudo to a place where X16 or X17 are live. Patch is 20.96 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/73945.diff 2 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index fd47970bd050596..417d699300ec587 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -859,6 +859,16 @@ void AArch64FrameLowering::emitZeroCallUsedRegs(BitVector RegsToZero,
}
}
+static void getLiveRegsForEntryMBB(LivePhysRegs &LiveRegs,
+ const MachineBasicBlock &MBB) {
+ const MachineFunction *MF = MBB.getParent();
+ LiveRegs.addLiveIns(MBB);
+ // Mark callee saved registers as used so we will not choose them.
+ const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
+ for (unsigned i = 0; CSRegs[i]; ++i)
+ LiveRegs.addReg(CSRegs[i]);
+}
+
// Find a scratch register that we can use at the start of the prologue to
// re-align the stack pointer. We avoid using callee-save registers since they
// may appear to be free when this is called from canUseAsPrologue (during
@@ -880,12 +890,7 @@ static unsigned findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB) {
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
LivePhysRegs LiveRegs(TRI);
- LiveRegs.addLiveIns(*MBB);
-
- // Mark callee saved registers as used so we will not choose them.
- const MCPhysReg *CSRegs = MF->getRegInfo().getCalleeSavedRegs();
- for (unsigned i = 0; CSRegs[i]; ++i)
- LiveRegs.addReg(CSRegs[i]);
+ getLiveRegsForEntryMBB(LiveRegs, *MBB);
// Prefer X9 since it was historically used for the prologue scratch reg.
const MachineRegisterInfo &MRI = MF->getRegInfo();
@@ -905,6 +910,19 @@ bool AArch64FrameLowering::canUseAsPrologue(
MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
const AArch64Subtarget &Subtarget = MF->getSubtarget<AArch64Subtarget>();
const AArch64RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const AArch64FunctionInfo *AFI = MF->getInfo<AArch64FunctionInfo>();
+
+ if (AFI->hasSwiftAsyncContext()) {
+ const AArch64RegisterInfo &TRI = *Subtarget.getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ LivePhysRegs LiveRegs(TRI);
+ getLiveRegsForEntryMBB(LiveRegs, MBB);
+ // The StoreSwiftAsyncContext clobbers X16 and X17. Make sure they are
+ // available.
+ if (!LiveRegs.available(MRI, AArch64::X16) ||
+ !LiveRegs.available(MRI, AArch64::X17))
+ return false;
+ }
// Don't need a scratch register if we're not going to re-align the stack.
if (!RegInfo->hasStackRealignment(*MF))
diff --git a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
index 217fb9bbfbeb9c3..a202bfb6bca42fc 100644
--- a/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
+++ b/llvm/test/CodeGen/AArch64/store-swift-async-context-clobber-live-reg.ll
@@ -1,10 +1,64 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: not --crash llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s
-; REQUIRES: asserts
+; RUN: llc -o - -mtriple=arm64e-apple-macosx -aarch64-min-jump-table-entries=2 %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define swifttailcc void @test_async_with_jumptable_x16_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x16_clobbered:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x16, sp, #8
+; CHECK-NEXT: movk x16, #49946, lsl #48
+; CHECK-NEXT: mov x17, x22
+; CHECK-NEXT: pacdb x17, x16
+; CHECK-NEXT: str x17, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x9, LJTI0_0@PAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: add x9, x9, LJTI0_0@PAGEOFF
+; CHECK-NEXT: Ltmp0:
+; CHECK-NEXT: adr x10, Ltmp0
+; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x10
+; CHECK-NEXT: LBB0_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB0_3
+; CHECK-NEXT: LBB0_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB0_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI0_0:
+; CHECK-NEXT: .long LBB0_3-Ltmp0
+; CHECK-NEXT: .long LBB0_1-Ltmp0
+; CHECK-NEXT: .long LBB0_1-Ltmp0
+; CHECK-NEXT: .long LBB0_2-Ltmp0
entry:
%x16 = tail call i64 asm "", "={x16}"()
%l = load i64, ptr %src, align 8
@@ -37,6 +91,61 @@ exit:
}
define swifttailcc void @test_async_with_jumptable_x17_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x17_clobbered:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x16, sp, #8
+; CHECK-NEXT: movk x16, #49946, lsl #48
+; CHECK-NEXT: mov x17, x22
+; CHECK-NEXT: pacdb x17, x16
+; CHECK-NEXT: str x17, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: adrp x9, LJTI1_0@PAGE
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: add x9, x9, LJTI1_0@PAGEOFF
+; CHECK-NEXT: Ltmp1:
+; CHECK-NEXT: adr x10, Ltmp1
+; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x10
+; CHECK-NEXT: LBB1_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB1_3
+; CHECK-NEXT: LBB1_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB1_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh2, Lloh3
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI1_0:
+; CHECK-NEXT: .long LBB1_3-Ltmp1
+; CHECK-NEXT: .long LBB1_1-Ltmp1
+; CHECK-NEXT: .long LBB1_1-Ltmp1
+; CHECK-NEXT: .long LBB1_2-Ltmp1
entry:
%x17 = tail call i64 asm "", "={x17}"()
%l = load i64, ptr %src, align 8
@@ -69,6 +178,61 @@ exit:
}
define swifttailcc void @test_async_with_jumptable_x1_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x1_clobbered:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x16, sp, #8
+; CHECK-NEXT: movk x16, #49946, lsl #48
+; CHECK-NEXT: mov x17, x22
+; CHECK-NEXT: pacdb x17, x16
+; CHECK-NEXT: str x17, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh4:
+; CHECK-NEXT: adrp x9, LJTI2_0@PAGE
+; CHECK-NEXT: Lloh5:
+; CHECK-NEXT: add x9, x9, LJTI2_0@PAGEOFF
+; CHECK-NEXT: Ltmp2:
+; CHECK-NEXT: adr x10, Ltmp2
+; CHECK-NEXT: ldrsw x11, [x9, x8, lsl #2]
+; CHECK-NEXT: add x10, x10, x11
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x10
+; CHECK-NEXT: LBB2_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB2_3
+; CHECK-NEXT: LBB2_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB2_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh4, Lloh5
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI2_0:
+; CHECK-NEXT: .long LBB2_3-Ltmp2
+; CHECK-NEXT: .long LBB2_1-Ltmp2
+; CHECK-NEXT: .long LBB2_1-Ltmp2
+; CHECK-NEXT: .long LBB2_2-Ltmp2
entry:
%x1 = tail call i64 asm "", "={x1}"()
%l = load i64, ptr %src, align 8
@@ -101,6 +265,65 @@ exit:
}
define swifttailcc void @test_async_with_jumptable_x1_x9_clobbered(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_x1_x9_clobbered:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x8, [x0]
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x16, sp, #8
+; CHECK-NEXT: movk x16, #49946, lsl #48
+; CHECK-NEXT: mov x17, x22
+; CHECK-NEXT: pacdb x17, x16
+; CHECK-NEXT: str x17, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh6:
+; CHECK-NEXT: adrp x10, LJTI3_0@PAGE
+; CHECK-NEXT: Lloh7:
+; CHECK-NEXT: add x10, x10, LJTI3_0@PAGEOFF
+; CHECK-NEXT: Ltmp3:
+; CHECK-NEXT: adr x11, Ltmp3
+; CHECK-NEXT: ldrsw x12, [x10, x8, lsl #2]
+; CHECK-NEXT: add x11, x11, x12
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x11
+; CHECK-NEXT: LBB3_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB3_3
+; CHECK-NEXT: LBB3_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB3_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh6, Lloh7
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI3_0:
+; CHECK-NEXT: .long LBB3_3-Ltmp3
+; CHECK-NEXT: .long LBB3_1-Ltmp3
+; CHECK-NEXT: .long LBB3_1-Ltmp3
+; CHECK-NEXT: .long LBB3_2-Ltmp3
entry:
%x1 = tail call i64 asm "", "={x1}"()
%x9 = tail call i64 asm "", "={x9}"()
@@ -136,6 +359,117 @@ exit:
; There are 2 available scratch registers left, shrink-wrapping can happen.
define swifttailcc void @test_async_with_jumptable_2_available_regs_left(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_2_available_regs_left:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: str x19, [sp, #-32]! ; 8-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: add x16, sp, #8
+; CHECK-NEXT: movk x16, #49946, lsl #48
+; CHECK-NEXT: mov x17, x22
+; CHECK-NEXT: pacdb x17, x16
+; CHECK-NEXT: str x17, [sp, #8]
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x10, [x0]
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh8:
+; CHECK-NEXT: adrp x17, LJTI4_0@PAGE
+; CHECK-NEXT: Lloh9:
+; CHECK-NEXT: add x17, x17, LJTI4_0@PAGEOFF
+; CHECK-NEXT: Ltmp4:
+; CHECK-NEXT: adr x0, Ltmp4
+; CHECK-NEXT: ldrsw x19, [x17, x10, lsl #2]
+; CHECK-NEXT: add x0, x0, x19
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x0
+; CHECK-NEXT: LBB4_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB4_3
+; CHECK-NEXT: LBB4_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB4_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: bl _foo
+; CHECK-NEXT: mov x2, x0
+; CHECK-NEXT: mov x0, x19
+; CHECK-NEXT: mov x1, x20
+; CHECK-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x19, [sp], #32 ; 8-byte Folded Reload
+; CHECK-NEXT: and x29, x29, #0xefffffffffffffff
+; CHECK-NEXT: br x2
+; CHECK-NEXT: .loh AdrpAdd Lloh8, Lloh9
+; CHECK-NEXT: .cfi_endproc
+; CHECK-NEXT: .section __TEXT,__const
+; CHECK-NEXT: .p2align 2, 0x0
+; CHECK-NEXT: LJTI4_0:
+; CHECK-NEXT: .long LBB4_3-Ltmp4
+; CHECK-NEXT: .long LBB4_1-Ltmp4
+; CHECK-NEXT: .long LBB4_1-Ltmp4
+; CHECK-NEXT: .long LBB4_2-Ltmp4
entry:
%x1 = tail call i64 asm "", "={x1}"()
%x2 = tail call i64 asm "", "={x2}"()
@@ -198,6 +532,124 @@ exit:
; There is only 1 available scratch registers left, shrink-wrapping cannot
; happen because StoreSwiftAsyncContext needs 2 free scratch registers.
define swifttailcc void @test_async_with_jumptable_1_available_reg_left(ptr %src, ptr swiftasync %as) #0 {
+; CHECK-LABEL: test_async_with_jumptable_1_available_reg_left:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: orr x29, x29, #0x1000000000000000
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: stp x21, x19, [sp, #8] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: add x16, sp, #24
+; CHECK-NEXT: movk x16, #49946, lsl #48
+; CHECK-NEXT: mov x17, x22
+; CHECK-NEXT: pacdb x17, x16
+; CHECK-NEXT: str x17, [sp, #24]
+; CHECK-NEXT: add x29, sp, #32
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -32
+; CHECK-NEXT: .cfi_offset w21, -40
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ldr x10, [x0]
+; CHECK-NEXT: mov x20, x22
+; CHECK-NEXT: mov x22, x0
+; CHECK-NEXT: Lloh10:
+; CHECK-NEXT: adrp x0, LJTI5_0@PAGE
+; CHECK-NEXT: Lloh11:
+; CHECK-NEXT: add x0, x0, LJTI5_0@PAGEOFF
+; CHECK-NEXT: Ltmp5:
+; CHECK-NEXT: adr x21, Ltmp5
+; CHECK-NEXT: ldrsw x19, [x0, x10, lsl #2]
+; CHECK-NEXT: add x21, x21, x19
+; CHECK-NEXT: mov x19, x20
+; CHECK-NEXT: br x21
+; CHECK-NEXT: LBB5_1: ; %then.2
+; CHECK-NEXT: mov x19, #0 ; =0x0
+; CHECK-NEXT: b LBB5_3
+; CHECK-NEXT: LBB5_2: ; %then.3
+; CHECK-NEXT: mov x19, x22
+; CHECK-NEXT: LBB5_3: ; %exit
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm End
+; CHECK-NEXT: ; InlineAsm Start
+; CHECK-NEXT: ; InlineAsm ...
[truncated]
|
Think this is reasonable. |
StoreSwiftAsyncContext clobbers X16 & X17. Make sure they are available in canUseAsPrologue, to avoid shrink wrapping moving the pseudo to a place where X16 or X17 are live.
0da857b
to
8e8e24c
Compare
…t. (llvm#73945) StoreSwiftAsyncContext clobbers X16 & X17. Make sure they are available in canUseAsPrologue, to avoid shrink wrapping moving the pseudo to a place where X16 or X17 are live. (cherry-picked from 58dcac3)
…ping The issue was fixed in llvm/llvm-project/pull/73945 rdar://117925937
…ping The issue was fixed in llvm/llvm-project/pull/73945 rdar://117925937
StoreSwiftAsyncContext clobbers X16 & X17. Make sure they are available in canUseAsPrologue, to avoid shrink wrapping moving the pseudo to a place where X16 or X17 are live.