diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index f110558fcb0d4..7e03b97584fe1 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -1360,14 +1360,24 @@ void AArch64EpilogueEmitter::emitEpilogue() { } bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes); - // Assume we can't combine the last pop with the sp restore. - bool CombineAfterCSRBump = false; + + unsigned ProloguePopSize = PrologueSaveSize; if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + // With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack + // that needs to be popped until we reach the start of the SVE save area. + // The "FixedObject" stack occurs after the SVE area and must be popped + // later. + ProloguePopSize -= FixedObject; AfterCSRPopSize += FixedObject; - } else if (!CombineSPBump && PrologueSaveSize != 0) { + } + + // Assume we can't combine the last pop with the sp restore. + if (!CombineSPBump && ProloguePopSize != 0) { MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator()); while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION || - AArch64InstrInfo::isSEHInstruction(*Pop)) + AArch64InstrInfo::isSEHInstruction(*Pop) || + (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord && + isPartOfSVECalleeSaves(Pop))) Pop = std::prev(Pop); // Converting the last ldp to a post-index ldp is valid only if the last // ldp's offset is 0. @@ -1377,18 +1387,27 @@ void AArch64EpilogueEmitter::emitEpilogue() { // may clobber), convert it to a post-index ldp. if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) { convertCalleeSaveRestoreToSPPrePostIncDec( - Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy, - PrologueSaveSize); + Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy, + ProloguePopSize); + } else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + MachineBasicBlock::iterator AfterLastPop = std::next(Pop); + if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop)) + ++AfterLastPop; + // If not, and CalleeSavesAboveFrameRecord is enabled, deallocate + // callee-save non-SVE registers to move the stack pointer to the start of + // the SVE area. + emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP, + StackOffset::getFixed(ProloguePopSize), TII, + MachineInstr::FrameDestroy, false, NeedsWinCFI, + &HasWinCFI); } else { - // If not, make sure to emit an add after the last ldp. + // Otherwise, make sure to emit an add after the last ldp. // We're doing this by transferring the size to be restored from the // adjustment *before* the CSR pops to the adjustment *after* the CSR // pops. - AfterCSRPopSize += PrologueSaveSize; - CombineAfterCSRBump = true; + AfterCSRPopSize += ProloguePopSize; } } - // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. @@ -1419,6 +1438,17 @@ void AArch64EpilogueEmitter::emitEpilogue() { --SEHEpilogueStartI; } + // Determine the ranges of SVE callee-saves. This is done before emitting any + // code at the end of the epilogue (for Swift async), which can get in the way + // of finding SVE callee-saves with CalleeSavesAboveFrameRecord. + auto [PPR, ZPR] = getSVEStackFrameSizes(); + auto [PPRRange, ZPRRange] = partitionSVECS( + MBB, + SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord + ? MBB.getFirstTerminator() + : FirstGPRRestoreI, + PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); + if (HasFP && AFI->hasSwiftAsyncContext()) emitSwiftAsyncContextFramePointer(EpilogueEndI, DL); @@ -1441,14 +1471,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { NumBytes -= PrologueSaveSize; assert(NumBytes >= 0 && "Negative stack allocation size!?"); - auto [PPR, ZPR] = getSVEStackFrameSizes(); - auto [PPRRange, ZPRRange] = partitionSVECS( - MBB, - SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord - ? MBB.getFirstTerminator() - : FirstGPRRestoreI, - PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true); - StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; StackOffset SVEStackSize = SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; @@ -1467,16 +1489,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { NeedsWinCFI, &HasWinCFI); } - // Deallocate callee-save non-SVE registers. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - - // Deallocate fixed objects. - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - StackOffset::getFixed(FixedObject), TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); - // Deallocate callee-save SVE registers. emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, @@ -1619,7 +1631,7 @@ void AArch64EpilogueEmitter::emitEpilogue() { MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP, StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI, EmitCFI, - StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0)); + StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore)); } } diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir index 5933c5daa67ed..b8302e64f282d 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir @@ -380,10 +380,8 @@ body: | ; CHECK-NEXT: frame-destroy SEH_EpilogStart ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0 ; CHECK-NEXT: frame-destroy SEH_StackAlloc 32 - ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1) - ; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0 - ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 - ; CHECK-NEXT: frame-destroy SEH_StackAlloc 16 + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16 ; CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.4) ; CHECK-NEXT: frame-destroy SEH_SavePReg 4, 0 ; CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 1 :: (load (s16) from %stack.3) @@ -430,10 +428,8 @@ body: | ; CHECK-NEXT: frame-destroy SEH_EpilogStart ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0 ; CHECK-NEXT: frame-destroy SEH_StackAlloc 32 - ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1) - ; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0 - ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 - ; CHECK-NEXT: frame-destroy SEH_StackAlloc 16 + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1) + ; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16 ; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4) ; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 0 ; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3) @@ -557,10 +553,8 @@ body: | ; CHECK-NEXT: frame-destroy SEH_StackAlloc 32 ; CHECK-NEXT: $x21, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3) ; CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 30, 16 - ; CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.4), (load (s64) from %stack.5) - ; CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 0 - ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0 - ; CHECK-NEXT: frame-destroy SEH_StackAlloc 32 + ; CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.4), (load (s64) from %stack.5) + ; CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -32 ; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.21) ; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2 ; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.20) @@ -745,10 +739,8 @@ body: | ; CHECK-NEXT: frame-destroy SEH_EpilogStart ; CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 ; CHECK-NEXT: frame-destroy SEH_SetFP - ; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.2), (load (s64) from %stack.3) - ; CHECK-NEXT: frame-destroy SEH_SaveFPLR 0 - ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 - ; CHECK-NEXT: frame-destroy SEH_StackAlloc 16 + ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3) + ; CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -16 ; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19) ; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2 ; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18) @@ -869,10 +861,8 @@ body: | ; CHECK-NEXT: frame-destroy SEH_EpilogStart ; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 7, implicit $vg ; CHECK-NEXT: frame-destroy SEH_AllocZ 7 - ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.6) - ; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0 - ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0 - ; CHECK-NEXT: frame-destroy SEH_StackAlloc 16 + ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.6) + ; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16 ; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.8) ; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 1 ; CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.7) diff --git a/llvm/test/CodeGen/AArch64/win-sve.ll b/llvm/test/CodeGen/AArch64/win-sve.ll index 53ac9344175a3..3ba4a1c10a024 100644 --- a/llvm/test/CodeGen/AArch64/win-sve.ll +++ b/llvm/test/CodeGen/AArch64/win-sve.ll @@ -75,10 +75,8 @@ define i32 @f( %x) { ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x30, 8 -; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x28, 0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x28, 16 ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 2 ; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload @@ -234,10 +232,8 @@ define void @f2(i64 %n, %x) { ; CHECK-NEXT: .seh_save_fplr 16 ; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x28, 8 -; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x19, 0 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x19, 32 ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 2 ; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload @@ -384,10 +380,8 @@ define void @f3(i64 %n, %x) { ; CHECK-NEXT: .seh_stackalloc 16 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x30, 8 -; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x28, 0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x28, 16 ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 2 ; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload @@ -538,10 +532,8 @@ define void @f4(i64 %n, %x) { ; CHECK-NEXT: .seh_stackalloc 16 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x30, 8 -; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x28, 0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x28, 16 ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 2 ; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload @@ -702,10 +694,8 @@ define void @f5(i64 %n, %x) { ; CHECK-NEXT: .seh_save_fplr 16 ; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x28, 8 -; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x19, 0 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x19, 32 ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 2 ; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload @@ -860,10 +850,10 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: stur x0, [x8, #16] ; CHECK-NEXT: addvl x8, x29, #18 ; CHECK-NEXT: ldr x1, [x8, #32] -; CHECK-NEXT: .Ltmp0: +; CHECK-NEXT: .Ltmp0: // EH_LABEL ; CHECK-NEXT: add x0, x19, #0 ; CHECK-NEXT: bl g6 -; CHECK-NEXT: .Ltmp1: +; CHECK-NEXT: .Ltmp1: // EH_LABEL ; CHECK-NEXT: // %bb.1: // %invoke.cont ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: add sp, sp, #64 @@ -872,10 +862,8 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_save_fplr 16 ; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x28, 8 -; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x19, 0 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x19, 32 ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 2 ; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload @@ -932,8 +920,6 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_save_preg p14, 10 ; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload ; CHECK-NEXT: .seh_save_preg p15, 11 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 ; CHECK-NEXT: addvl sp, sp, #18 ; CHECK-NEXT: .seh_allocz 18 ; CHECK-NEXT: add sp, sp, #16 @@ -1024,10 +1010,8 @@ define void @f6( %x, [8 x i64] %pad, i64 %n9) personality ptr ; CHECK-NEXT: .seh_save_fplr 16 ; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x28, 8 -; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x19, 0 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x19, 32 ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 2 ; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1144,10 +1128,8 @@ define void @f8( %v) { ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x30, 0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -1196,14 +1178,10 @@ define void @f9( %v, ...) { ; CHECK-NEXT: //APP ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x30, 0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 0 -; CHECK-NEXT: add sp, sp, #64 -; CHECK-NEXT: .seh_stackalloc 64 ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .seh_allocz 1 ; CHECK-NEXT: add sp, sp, #64 @@ -1301,10 +1279,8 @@ define void @f10(i64 %n, %x) "frame-pointer"="all" { ; CHECK-NEXT: .seh_stackalloc 16 ; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 8 -; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x28, 0 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x28, 32 ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 2 ; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload @@ -1390,10 +1366,8 @@ define i32 @f11(double %d, %vs) "aarch64_pstate_sm_compatible ; CHECK-NEXT: //NO_APP ; CHECK-NEXT: str d0, [sp, #8] ; CHECK-NEXT: .seh_startepilogue -; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x30, 0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -1431,10 +1405,8 @@ define i32 @f12(double %d, %vs) "aarch64_pstate_sm_compatible ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: addvl sp, sp, #1 ; CHECK-NEXT: .seh_allocz 1 -; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x30, 0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x30, 16 ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -1475,10 +1447,8 @@ define i32 @f13(double %d, %vs) "frame-pointer"="all" { ; CHECK-NEXT: .seh_startepilogue ; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 8 -; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x28, 0 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x28, 32 ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -1521,10 +1491,8 @@ define i32 @f14(double %d, %vs) "frame-pointer"="all" { ; CHECK-NEXT: .seh_allocz 1 ; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_fplr 8 -; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x28, 0 -; CHECK-NEXT: add sp, sp, #32 -; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x28, 32 ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -1572,10 +1540,8 @@ define tailcc void @f15(double %d, %vs, [9 x i64], i32 %i) { ; CHECK-NEXT: .seh_stackalloc 16 ; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload ; CHECK-NEXT: .seh_save_reg x30, 8 -; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload -; CHECK-NEXT: .seh_save_reg x28, 0 -; CHECK-NEXT: add sp, sp, #16 -; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload +; CHECK-NEXT: .seh_save_reg_x x28, 16 ; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload ; CHECK-NEXT: .seh_save_zreg z8, 0 ; CHECK-NEXT: addvl sp, sp, #1 @@ -1594,3 +1560,53 @@ define tailcc void @f15(double %d, %vs, [9 x i64], i32 %i) { store i32 %i, ptr %a ret void } + +declare ptr @llvm.swift.async.context.addr() + +define void @f16(ptr swiftasync %ctx, %foo) { +; CHECK-LABEL: f16: +; CHECK: .seh_proc f16 +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: orr x29, x29, #0x1000000000000000 +; CHECK-NEXT: .seh_nop +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: .seh_allocz 1 +; CHECK-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_zreg z8, 0 +; CHECK-NEXT: sub sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: .seh_save_fplr 8 +; CHECK-NEXT: str x22, [sp] +; CHECK-NEXT: .seh_nop +; CHECK-NEXT: add x29, sp, #8 +; CHECK-NEXT: .seh_add_fp 8 +; CHECK-NEXT: .seh_endprologue +; CHECK-NEXT: sub sp, sp, #16 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: ldr x8, [x22] +; CHECK-NEXT: stur x8, [x29, #-8] +; CHECK-NEXT: .seh_startepilogue +; CHECK-NEXT: add sp, sp, #16 +; CHECK-NEXT: .seh_stackalloc 16 +; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_fplr 8 +; CHECK-NEXT: add sp, sp, #32 +; CHECK-NEXT: .seh_stackalloc 32 +; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK-NEXT: .seh_save_zreg z8, 0 +; CHECK-NEXT: and x29, x29, #0xefffffffffffffff +; CHECK-NEXT: .seh_nop +; CHECK-NEXT: addvl sp, sp, #1 +; CHECK-NEXT: .seh_allocz 1 +; CHECK-NEXT: .seh_endepilogue +; CHECK-NEXT: ret +; CHECK-NEXT: .seh_endfunclet +; CHECK-NEXT: .seh_endproc + tail call void asm sideeffect "", "~{z8}"() + %1 = load ptr, ptr %ctx, align 8 + %2 = tail call ptr @llvm.swift.async.context.addr() + store ptr %1, ptr %2, align 8 + ret void +} diff --git a/llvm/test/DebugInfo/COFF/AArch64/codeview-sve.ll b/llvm/test/DebugInfo/COFF/AArch64/codeview-sve.ll index 446a84dc0294c..ffdc80a350a24 100644 --- a/llvm/test/DebugInfo/COFF/AArch64/codeview-sve.ll +++ b/llvm/test/DebugInfo/COFF/AArch64/codeview-sve.ll @@ -101,7 +101,7 @@ ; CHECK-NEXT: LocalVariableAddrRange { ; CHECK-NEXT: OffsetStart: .text+0x0 ; CHECK-NEXT: ISectStart: 0x0 -; CHECK-NEXT: Range: 0xBC +; CHECK-NEXT: Range: 0xB8 ; CHECK-NEXT: } ; CHECK-NEXT: } ; CHECK-NEXT: ProcEnd {