diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index 7e03b97584fe1..45b7120112af2 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -370,6 +370,22 @@ SVEFrameSizes AArch64PrologueEpilogueCommon::getSVEStackFrameSizes() const { {ZPRCalleeSavesSize, PPRLocalsSize + ZPRLocalsSize}}; } +SVEStackAllocations AArch64PrologueEpilogueCommon::getSVEStackAllocations( + SVEFrameSizes const &SVE) { + StackOffset AfterZPRs = SVE.ZPR.LocalsSize; + StackOffset BeforePPRs = SVE.ZPR.CalleeSavesSize + SVE.PPR.CalleeSavesSize; + StackOffset AfterPPRs = {}; + if (SVELayout == SVEStackLayout::Split) { + BeforePPRs = SVE.PPR.CalleeSavesSize; + // If there are no ZPR CSRs, place all local allocations after the ZPRs. + if (SVE.ZPR.CalleeSavesSize) + AfterPPRs += SVE.PPR.LocalsSize + SVE.ZPR.CalleeSavesSize; + else + AfterZPRs += SVE.PPR.LocalsSize; // Group allocation of locals. + } + return {BeforePPRs, AfterPPRs, AfterZPRs}; +} + struct SVEPartitions { struct { MachineBasicBlock::iterator Begin, End; @@ -687,16 +703,19 @@ void AArch64PrologueEmitter::emitPrologue() { // All of the remaining stack allocations are for locals. determineLocalsStackSize(NumBytes, PrologueSaveSize); + auto [PPR, ZPR] = getSVEStackFrameSizes(); + SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR}); + MachineBasicBlock::iterator FirstGPRSaveI = PrologueBeginI; if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { + assert(!SVEAllocs.AfterPPRs && + "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord"); // If we're doing SVE saves first, we need to immediately allocate space // for fixed objects, then space for the SVE callee saves. // // Windows unwind requires that the scalable size is a multiple of 16; // that's handled when the callee-saved size is computed. - auto SaveSize = - StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) + - StackOffset::getFixed(FixedObject); + auto SaveSize = SVEAllocs.BeforePPRs + StackOffset::getFixed(FixedObject); allocateStackSpace(PrologueBeginI, 0, SaveSize, false, StackOffset{}, /*FollowupAllocs=*/true); NumBytes -= FixedObject; @@ -764,12 +783,11 @@ void AArch64PrologueEmitter::emitPrologue() { if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding); - auto [PPR, ZPR] = getSVEStackFrameSizes(); - StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; StackOffset NonSVELocalsSize = StackOffset::getFixed(NumBytes); + SVEAllocs.AfterZPRs += NonSVELocalsSize; + StackOffset CFAOffset = StackOffset::getFixed(MFI.getStackSize()) - NonSVELocalsSize; - MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI; // Allocate space for the callee saves and PPR locals (if any). if (SVELayout != SVEStackLayout::CalleeSavesAboveFrameRecord) { @@ -780,31 +798,23 @@ void AArch64PrologueEmitter::emitPrologue() { if (EmitAsyncCFI) emitCalleeSavedSVELocations(AfterSVESavesI); - StackOffset AllocateBeforePPRs = SVECalleeSavesSize; - StackOffset AllocateAfterPPRs = PPR.LocalsSize; - if (SVELayout == SVEStackLayout::Split) { - AllocateBeforePPRs = PPR.CalleeSavesSize; - AllocateAfterPPRs = PPR.LocalsSize + ZPR.CalleeSavesSize; - } - allocateStackSpace(PPRRange.Begin, 0, AllocateBeforePPRs, + allocateStackSpace(PPRRange.Begin, 0, SVEAllocs.BeforePPRs, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || AllocateAfterPPRs || - ZPR.LocalsSize || NonSVELocalsSize); - CFAOffset += AllocateBeforePPRs; + MFI.hasVarSizedObjects() || SVEAllocs.AfterPPRs || + SVEAllocs.AfterZPRs); + CFAOffset += SVEAllocs.BeforePPRs; assert(PPRRange.End == ZPRRange.Begin && "Expected ZPR callee saves after PPR locals"); - allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs, + allocateStackSpace(PPRRange.End, RealignmentPadding, SVEAllocs.AfterPPRs, EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || ZPR.LocalsSize || - NonSVELocalsSize); - CFAOffset += AllocateAfterPPRs; + MFI.hasVarSizedObjects() || SVEAllocs.AfterZPRs); + CFAOffset += SVEAllocs.AfterPPRs; } else { assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord); - // Note: With CalleeSavesAboveFrameRecord, the SVE CS have already been - // allocated (and separate PPR locals are not supported, all SVE locals, - // both PPR and ZPR, are within the ZPR locals area). - assert(!PPR.LocalsSize && "Unexpected PPR locals!"); - CFAOffset += SVECalleeSavesSize; + // Note: With CalleeSavesAboveFrameRecord, the SVE CS (BeforePPRs) have + // already been allocated. PPR locals (included in AfterPPRs) are not + // supported (note: this is asserted above). + CFAOffset += SVEAllocs.BeforePPRs; } // Allocate space for the rest of the frame including ZPR locals. Align the @@ -815,9 +825,9 @@ void AArch64PrologueEmitter::emitPrologue() { // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have the // correct value here, as NumBytes also includes padding bytes, which // shouldn't be counted here. - allocateStackSpace( - AfterSVESavesI, RealignmentPadding, ZPR.LocalsSize + NonSVELocalsSize, - EmitAsyncCFI && !HasFP, CFAOffset, MFI.hasVarSizedObjects()); + allocateStackSpace(AfterSVESavesI, RealignmentPadding, SVEAllocs.AfterZPRs, + EmitAsyncCFI && !HasFP, CFAOffset, + MFI.hasVarSizedObjects()); } // If we need a base pointer, set it up here. It's whatever the value of the @@ -1472,27 +1482,26 @@ void AArch64EpilogueEmitter::emitEpilogue() { assert(NumBytes >= 0 && "Negative stack allocation size!?"); StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; - StackOffset SVEStackSize = - SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; + SVEStackAllocations SVEAllocs = getSVEStackAllocations({PPR, ZPR}); MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin; - MachineBasicBlock::iterator RestoreEnd = PPRRange.End; // Deallocate the SVE area. if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { - StackOffset SVELocalsSize = ZPR.LocalsSize + PPR.LocalsSize; + assert(!SVEAllocs.AfterPPRs && + "unexpected SVE allocs after PPRs with CalleeSavesAboveFrameRecord"); // If the callee-save area is before FP, restoring the FP implicitly - // deallocates non-callee-save SVE allocations. Otherwise, deallocate them + // deallocates non-callee-save SVE allocations. Otherwise, deallocate them // explicitly. if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) { emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP, - SVELocalsSize, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI); + SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI, &HasWinCFI); } // Deallocate callee-save SVE registers. - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, - SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI); + emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, + SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI, &HasWinCFI); } else if (AFI->hasSVEStackSize()) { // If we have stack realignment or variable-sized objects we must use the FP // to restore SVE callee saves (as there is an unknown amount of @@ -1524,46 +1533,33 @@ void AArch64EpilogueEmitter::emitEpilogue() { emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase, -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy); } else if (BaseForSVEDealloc == AArch64::SP) { - auto CFAOffset = - SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize); - - if (SVECalleeSavesSize) { - // Deallocate the non-SVE locals first before we can deallocate (and - // restore callee saves) from the SVE area. - auto NonSVELocals = StackOffset::getFixed(NumBytes); - emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, - NonSVELocals, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); - CFAOffset -= NonSVELocals; - NumBytes = 0; - } - - if (ZPR.LocalsSize) { - emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, - ZPR.LocalsSize, TII, MachineInstr::FrameDestroy, false, - NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, CFAOffset); - CFAOffset -= ZPR.LocalsSize; + auto NonSVELocals = StackOffset::getFixed(NumBytes); + auto CFAOffset = NonSVELocals + StackOffset::getFixed(PrologueSaveSize) + + SVEAllocs.totalSize(); + + if (SVECalleeSavesSize || SVELayout == SVEStackLayout::Split) { + // Deallocate non-SVE locals now. This is needed to reach the SVE callee + // saves, but may also allow combining stack hazard bumps for split SVE. + SVEAllocs.AfterZPRs += NonSVELocals; + NumBytes -= NonSVELocals.getFixed(); } - - StackOffset SVECalleeSavesToDealloc = SVECalleeSavesSize; - if (SVELayout == SVEStackLayout::Split && - (PPR.LocalsSize || ZPR.CalleeSavesSize)) { - assert(PPRRange.Begin == ZPRRange.End && - "Expected PPR restores after ZPR"); - emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, - PPR.LocalsSize + ZPR.CalleeSavesSize, TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, - &HasWinCFI, EmitCFI && !HasFP, CFAOffset); - CFAOffset -= PPR.LocalsSize + ZPR.CalleeSavesSize; - SVECalleeSavesToDealloc -= ZPR.CalleeSavesSize; - } - - // If split SVE is on, this dealloc PPRs, otherwise, deallocs ZPRs + PPRs: - if (SVECalleeSavesToDealloc) - emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, - SVECalleeSavesToDealloc, TII, - MachineInstr::FrameDestroy, false, NeedsWinCFI, - &HasWinCFI, EmitCFI && !HasFP, CFAOffset); + // To deallocate the SVE stack adjust by the allocations in reverse. + emitFrameOffset(MBB, ZPRRange.Begin, DL, AArch64::SP, AArch64::SP, + SVEAllocs.AfterZPRs, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, + CFAOffset); + CFAOffset -= SVEAllocs.AfterZPRs; + assert(PPRRange.Begin == ZPRRange.End && + "Expected PPR restores after ZPR"); + emitFrameOffset(MBB, PPRRange.Begin, DL, AArch64::SP, AArch64::SP, + SVEAllocs.AfterPPRs, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, + CFAOffset); + CFAOffset -= SVEAllocs.AfterPPRs; + emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, + SVEAllocs.BeforePPRs, TII, MachineInstr::FrameDestroy, + false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP, + CFAOffset); } if (EmitCFI) diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h index bccaddaad9eec..6e0e28324a0ac 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h @@ -33,6 +33,11 @@ struct SVEFrameSizes { } PPR, ZPR; }; +struct SVEStackAllocations { + StackOffset BeforePPRs, AfterPPRs, AfterZPRs; + StackOffset totalSize() const { return BeforePPRs + AfterPPRs + AfterZPRs; } +}; + class AArch64PrologueEpilogueCommon { public: AArch64PrologueEpilogueCommon(MachineFunction &MF, MachineBasicBlock &MBB, @@ -66,6 +71,7 @@ class AArch64PrologueEpilogueCommon { bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const; SVEFrameSizes getSVEStackFrameSizes() const; + SVEStackAllocations getSVEStackAllocations(SVEFrameSizes const &); MachineFunction &MF; MachineBasicBlock &MBB; diff --git a/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir index 35eafe8b7d99c..f535e0fe8b387 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir @@ -68,13 +68,9 @@ # CHECK: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.4) # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2080 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 # # CHECK-NEXT: $x8 = ADDXri $sp, 1040, 0 @@ -83,14 +79,10 @@ # CHECK-NEXT: $x8 = ADDXri $sp, 2064, 0 # CHECK-NEXT: STR_PXI $p0, killed $x8, 18 :: (store () into %stack.1) # -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1056 -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 +# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.4) # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 @@ -100,38 +92,26 @@ # ASM: str x29, [sp, #-16]! # ASM-NEXT: .cfi_def_cfa_offset 16 # ASM-NEXT: .cfi_offset w29, -16 -# ASM-NEXT: sub sp, sp, #1024 -# ASM-NEXT: .cfi_def_cfa_offset 1040 -# ASM-NEXT: addvl sp, sp, #-1 -# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG -# ASM-NEXT: sub sp, sp, #1040 -# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -# ASM-NEXT: addvl sp, sp, #-2 +# ASM-NEXT: sub sp, sp, #2064 +# ASM-NEXT: .cfi_def_cfa_offset 2080 +# ASM-NEXT: addvl sp, sp, #-3 # ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG # -# ASM: addvl sp, sp, #2 -# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -# ASM-NEXT: add sp, sp, #1024 -# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG -# ASM-NEXT: addvl sp, sp, #1 -# ASM-NEXT: .cfi_def_cfa wsp, 1056 -# ASM-NEXT: add sp, sp, #1040 -# ASM-NEXT: .cfi_def_cfa_offset 16 +# ASM: add sp, sp, #2064 +# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG +# ASM-NEXT: addvl sp, sp, #3 +# ASM-NEXT: .cfi_def_cfa wsp, 16 # ASM-NEXT: ldr x29, [sp], #16 # ASM-NEXT: .cfi_def_cfa_offset 0 # ASM-NEXT: .cfi_restore w29 # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_offset: +1040 -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_offset: +2080 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1056, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa: reg31 +1056 -# UNWINDINFO: DW_CFA_def_cfa_offset: +16 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 # UNWINDINFO: DW_CFA_def_cfa_offset: +0 # UNWINDINFO-NEXT: DW_CFA_restore: reg29 @@ -270,13 +250,9 @@ body: | # CHECK-NEXT: early-clobber $sp = frame-setup STRXpre killed $fp, $sp, -16 :: (store (s64) into %stack.5) # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0 +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 2080 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg # CHECK-NEXT: frame-setup CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 # # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 1040, 0 @@ -286,14 +262,10 @@ body: | # CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 2064, 0 # CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 23 # -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1056 -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 16 +# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 +# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: early-clobber $sp, $fp = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.5) # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 @@ -303,38 +275,27 @@ body: | # ASM: str x29, [sp, #-16]! # ASM-NEXT: .cfi_def_cfa_offset 16 # ASM-NEXT: .cfi_offset w29, -16 -# ASM-NEXT: sub sp, sp, #1024 -# ASM-NEXT: .cfi_def_cfa_offset 1040 -# ASM-NEXT: addvl sp, sp, #-1 -# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1040 + 8 * VG -# ASM-NEXT: sub sp, sp, #1040 -# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -# ASM-NEXT: addvl sp, sp, #-2 +# ASM-NEXT: sub sp, sp, #2064 +# ASM-NEXT: .cfi_def_cfa_offset 2080 +# ASM-NEXT: addvl sp, sp, #-3 # ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 2080 + 24 * VG # -# ASM: addvl sp, sp, #2 -# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -# ASM-NEXT: add sp, sp, #1024 -# ASM-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x08, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 1056 + 8 * VG -# ASM-NEXT: addvl sp, sp, #1 -# ASM-NEXT: .cfi_def_cfa wsp, 1056 -# ASM-NEXT: add sp, sp, #1040 -# ASM-NEXT: .cfi_def_cfa_offset 16 +# ASM: add sp, sp, #2064 +# ASM-NEXT: .cfi_escape 0x0f, 0x08, 0x8f, 0x10, 0x92, 0x2e, 0x00, 0x48, 0x1e, 0x22 // sp + 16 + 24 * VG +# ASM-NEXT: addvl sp, sp, #3 +# ASM-NEXT: .cfi_def_cfa wsp, 16 # ASM-NEXT: ldr x29, [sp], #16 # ASM-NEXT: .cfi_def_cfa_offset 0 # ASM-NEXT: .cfi_restore w29 +# ASM-NEXT: ret # UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 -# UNWINDINFO: DW_CFA_def_cfa_offset: +1040 -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1040, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa_offset: +2080 # UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus # -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +2080, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +1056, DW_OP_bregx 0x2e +0, DW_OP_lit8, DW_OP_mul, DW_OP_plus -# UNWINDINFO: DW_CFA_def_cfa: reg31 +1056 -# UNWINDINFO: DW_CFA_def_cfa_offset: +16 +# UNWINDINFO: DW_CFA_def_cfa_expression: DW_OP_breg31 +16, DW_OP_bregx 0x2e +0, DW_OP_lit24, DW_OP_mul, DW_OP_plus +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 # UNWINDINFO: DW_CFA_def_cfa_offset: +0 # UNWINDINFO-NEXT: DW_CFA_restore: reg29 @@ -385,10 +346,8 @@ body: | # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg -# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1040, 0 -# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2, implicit $vg +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 2064, 0 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3, implicit $vg # # CHECK-NEXT: $[[TMP:x[0-9]+]] = SUBXri $fp, 1024, 0 # CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], -2 @@ -396,10 +355,8 @@ body: | # CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], -3 # CHECK-NEXT: STR_PXI $p0, $fp, -1 # -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2, implicit $vg -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1024, 0 -# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1, implicit $vg -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 +# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 2064, 0 +# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3, implicit $vg # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 # CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.6), (load (s64) from %stack.5) # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 @@ -414,15 +371,11 @@ body: | # ASM-NEXT: .cfi_def_cfa w29, 16 # ASM-NEXT: .cfi_offset w30, -8 # ASM-NEXT: .cfi_offset w29, -16 -# ASM-NEXT: sub sp, sp, #1024 -# ASM-NEXT: addvl sp, sp, #-1 -# ASM-NEXT: sub sp, sp, #1040 -# ASM-NEXT: addvl sp, sp, #-2 +# ASM-NEXT: sub sp, sp, #2064 +# ASM-NEXT: addvl sp, sp, #-3 # -# ASM: addvl sp, sp, #2 -# ASM-NEXT: add sp, sp, #1024 -# ASM-NEXT: addvl sp, sp, #1 -# ASM-NEXT: add sp, sp, #1040 +# ASM: add sp, sp, #2064 +# ASM-NEXT: addvl sp, sp, #3 # ASM-NEXT: .cfi_def_cfa wsp, 16 # ASM-NEXT: ldp x29, x30, [sp], #16 # ASM-NEXT: .cfi_def_cfa_offset 0 diff --git a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll index 690a39d12e6f1..c13dd33865c37 100644 --- a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll +++ b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll @@ -19,20 +19,16 @@ define void @zpr_and_ppr_local( %pred, %vec ; CHECK-LABEL: zpr_and_ppr_local: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: add x8, sp, #2048 ; CHECK-NEXT: str p0, [x8, #15, mul vl] ; CHECK-NEXT: add x8, sp, #1024 ; CHECK-NEXT: str z0, [x8] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: add sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca @@ -62,20 +58,16 @@ define void @zpr_and_ppr_local_fp( %pred, % ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: sub sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #-2 ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x8, x29, #1024 ; CHECK-NEXT: str p0, [x29, #-1, mul vl] ; CHECK-NEXT: str z0, [x8, #-2, mul vl] -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 -; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: add sp, sp, #2048 +; CHECK-NEXT: addvl sp, sp, #2 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca @@ -103,17 +95,15 @@ define void @fpr_and_ppr_local( %pred, double %double) "aarch6 ; CHECK-LABEL: fpr_and_ppr_local: ; CHECK: // %bb.0: ; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill -; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: sub sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: sub sp, sp, #1040 ; CHECK-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: add x8, sp, #2064 ; CHECK-NEXT: str p0, [x8, #7, mul vl] ; CHECK-NEXT: str d0, [sp, #1032] -; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: add sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1040 ; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca @@ -144,17 +134,15 @@ define void @fpr_and_ppr_local_fp( %pred, double %double) "aar ; CHECK: // %bb.0: ; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill ; CHECK-NEXT: mov x29, sp -; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: sub sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #-1 -; CHECK-NEXT: sub sp, sp, #1040 ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: str p0, [x29, #-1, mul vl] ; CHECK-NEXT: str d0, [sp, #1032] -; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: add sp, sp, #2064 ; CHECK-NEXT: addvl sp, sp, #1 -; CHECK-NEXT: add sp, sp, #1040 ; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca @@ -793,11 +781,8 @@ define void @zpr_and_ppr_local_stack_probing( %pred, %pred,