diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index c76689f47d91c..115d7b1b6de8f 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2373,13 +2373,6 @@ void AArch64FrameLowering::determineStackHazardSlot( return; } - const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); - if (MFI.hasVarSizedObjects() || TRI->hasStackRealignment(MF)) { - LLVM_DEBUG(dbgs() << "SplitSVEObjects is not supported with variable " - "sized objects or realignment\n"); - return; - } - // If another calling convention is explicitly set FPRs can't be promoted to // ZPR callee-saves. if (!is_contained({CallingConv::C, CallingConv::Fast, @@ -2395,6 +2388,7 @@ void AArch64FrameLowering::determineStackHazardSlot( assert(Subtarget.isSVEorStreamingSVEAvailable() && "Expected SVE to be available for PPRs"); + const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo(); // With SplitSVEObjects the CS hazard padding is placed between the // PPRs and ZPRs. If there are any FPR CS there would be a hazard between // them and the CS GRPs. Avoid this by promoting all FPR CS to ZPRs. diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp index 7e03b97584fe1..8a06f1aa826e7 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp @@ -793,10 +793,9 @@ void AArch64PrologueEmitter::emitPrologue() { CFAOffset += AllocateBeforePPRs; assert(PPRRange.End == ZPRRange.Begin && "Expected ZPR callee saves after PPR locals"); - allocateStackSpace(PPRRange.End, RealignmentPadding, AllocateAfterPPRs, - EmitAsyncCFI && !HasFP, CFAOffset, - MFI.hasVarSizedObjects() || ZPR.LocalsSize || - NonSVELocalsSize); + allocateStackSpace( + PPRRange.End, 0, AllocateAfterPPRs, EmitAsyncCFI && !HasFP, CFAOffset, + MFI.hasVarSizedObjects() || ZPR.LocalsSize || NonSVELocalsSize); CFAOffset += AllocateAfterPPRs; } else { assert(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord); @@ -1308,6 +1307,26 @@ AArch64EpilogueEmitter::AArch64EpilogueEmitter(MachineFunction &MF, SEHEpilogueStartI = MBB.end(); } +void AArch64EpilogueEmitter::moveSPBelowFP(MachineBasicBlock::iterator MBBI, + StackOffset Offset) { + // Other combinations could be supported, but are not currently needed. + assert(Offset.getScalable() < 0 && Offset.getFixed() <= 0 && + "expected negative offset (with optional fixed portion)"); + Register Base = AArch64::FP; + if (int64_t FixedOffset = Offset.getFixed()) { + // If we have a negative fixed offset, we need to first subtract it in a + // temporary register first (to avoid briefly deallocating the scalable + // portion of the offset). + Base = MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass); + emitFrameOffset(MBB, MBBI, DL, Base, AArch64::FP, + StackOffset::getFixed(FixedOffset), TII, + MachineInstr::FrameDestroy); + } + emitFrameOffset(MBB, MBBI, DL, AArch64::SP, Base, + StackOffset::getScalable(Offset.getScalable()), TII, + MachineInstr::FrameDestroy); +} + void AArch64EpilogueEmitter::emitEpilogue() { MachineBasicBlock::iterator EpilogueEndI = MBB.getLastNonDebugInstr(); if (MBB.end() != EpilogueEndI) { @@ -1408,6 +1427,7 @@ void AArch64EpilogueEmitter::emitEpilogue() { AfterCSRPopSize += ProloguePopSize; } } + // Move past the restores of the callee-saved registers. // If we plan on combining the sp bump of the local stack size and the callee // save stack size, we might need to adjust the CSR save and restore offsets. @@ -1474,8 +1494,6 @@ void AArch64EpilogueEmitter::emitEpilogue() { StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize; StackOffset SVEStackSize = SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize; - MachineBasicBlock::iterator RestoreBegin = ZPRRange.Begin; - MachineBasicBlock::iterator RestoreEnd = PPRRange.End; // Deallocate the SVE area. if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) { @@ -1490,7 +1508,7 @@ void AArch64EpilogueEmitter::emitEpilogue() { } // Deallocate callee-save SVE registers. - emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP, + emitFrameOffset(MBB, PPRRange.End, DL, AArch64::SP, AArch64::SP, SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI); } else if (AFI->hasSVEStackSize()) { @@ -1501,28 +1519,26 @@ void AArch64EpilogueEmitter::emitEpilogue() { (AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP : AArch64::SP; if (SVECalleeSavesSize && BaseForSVEDealloc == AArch64::FP) { - // TODO: Support stack realigment and variable-sized objects. - assert( - SVELayout != SVEStackLayout::Split && - "unexpected stack realignment or variable sized objects with split " - "SVE stack objects"); - - Register CalleeSaveBase = AArch64::FP; - if (int64_t CalleeSaveBaseOffset = - AFI->getCalleeSaveBaseToFrameRecordOffset()) { - // If we have have an non-zero offset to the non-SVE CS base we need to - // compute the base address by subtracting the offest in a temporary - // register first (to avoid briefly deallocating the SVE CS). - CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister( - &AArch64::GPR64RegClass); - emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP, - StackOffset::getFixed(-CalleeSaveBaseOffset), TII, + // The offset from the frame-pointer to the start of the ZPR saves. + StackOffset FPOffsetZPR = + -SVECalleeSavesSize - + StackOffset::getFixed(AFI->getCalleeSaveBaseToFrameRecordOffset()); + + // With split SVE, the PPR locals are above the ZPR callee-saves. + if (ZPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) + FPOffsetZPR -= PPR.LocalsSize; + + // Deallocate the stack space space by moving the SP to the start of the + // ZPR/PPR callee-save area. + moveSPBelowFP(ZPRRange.Begin, FPOffsetZPR); + + if (PPR.CalleeSavesSize && SVELayout == SVEStackLayout::Split) { + // Move to the start of the PPR area (this offset may be zero). + StackOffset FPOffsetPPR = -PPR.CalleeSavesSize; + emitFrameOffset(MBB, ZPRRange.End, DL, AArch64::SP, AArch64::SP, + FPOffsetPPR - FPOffsetZPR, TII, MachineInstr::FrameDestroy); } - // The code below will deallocate the stack space space by moving the SP - // to the start of the SVE callee-save area. - emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase, - -SVECalleeSavesSize, TII, MachineInstr::FrameDestroy); } else if (BaseForSVEDealloc == AArch64::SP) { auto CFAOffset = SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize); diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h index bccaddaad9eec..029e607253b7f 100644 --- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h +++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.h @@ -174,6 +174,10 @@ class AArch64EpilogueEmitter final : public AArch64PrologueEpilogueCommon { private: bool shouldCombineCSRLocalStackBump(uint64_t StackBumpBytes) const; + /// A helper for moving the SP to a negative offset from the FP, without + /// deallocating any stack in the range FP to FP + Offset. + void moveSPBelowFP(MachineBasicBlock::iterator MBBI, StackOffset Offset); + void emitSwiftAsyncContextFramePointer(MachineBasicBlock::iterator MBBI, const DebugLoc &DL) const; diff --git a/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir index 35eafe8b7d99c..318710ccc80a0 100644 --- a/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir +++ b/llvm/test/CodeGen/AArch64/framelayout-split-sve.mir @@ -182,63 +182,56 @@ body: | RET_ReallyLR # CHECK-LABEL: name: test_allocate_split_sve_realigned -# CHECK: stackSize: 2080 +# CHECK: stackSize: 1056 # CHECK: bb.0.entry: # CHECK: liveins: $z0, $p0, $lr -# CHECK: $sp = frame-setup SUBXri $sp, 1040, 0 -# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 1040 -# CHECK-NEXT: frame-setup STRXui killed $fp, $sp, 128 :: (store (s64) into %stack.5) -# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 129 :: (store (s64) into %stack.4) -# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 1024, 0 +# CHECK: early-clobber $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2 :: (store (s64) into %stack.5), (store (s64) into %stack.4) +# CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa_offset 16 +# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0 # CHECK-NEXT: frame-setup CFI_INSTRUCTION def_cfa $w29, 16 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w30, -8 # CHECK-NEXT: frame-setup CFI_INSTRUCTION offset $w29, -16 -# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 1040, 0 +# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 1024, 0 +# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1, implicit $vg +# CHECK-NEXT: $[[TMP:x[0-9]+]] = frame-setup SUBXri $sp, 1040, 0 # CHECK-NEXT: $[[TMP]] = frame-setup ADDVL_XXI $[[TMP]], -2, implicit $vg -# CHECK-NEXT: $sp = frame-setup ANDXri killed $x9, 7930 +# CHECK-NEXT: $sp = frame-setup ANDXri killed $[[TMP]], 7930 # # CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0 # CHECK-NEXT: $x8 = ADDPL_XXI $x8, -1, implicit $vg -# CHECK-NEXT: STR_ZXI $z0, killed $x8, -1 :: (store () into %stack.0) -# CHECK-NEXT: $x8 = SUBXri $fp, 1024, 0 -# CHECK-NEXT: STR_PXI $p0, killed $x8, -15 :: (store () into %stack.1) +# CHECK-NEXT: STR_ZXI $z0, killed $x8, -2 :: (store () into %stack.0) +# CHECK-NEXT: STR_PXI $p0, $fp, -6 :: (store () into %stack.1) # -# CHECK-NEXT: $sp = frame-destroy SUBXri $fp, 1024, 0 -# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 1040 -# CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 129 :: (load (s64) from %stack.4) -# CHECK-NEXT: $fp = frame-destroy LDRXui $sp, 128 :: (load (s64) from %stack.5) -# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 1040, 0 +# CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0 +# CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa $wsp, 16 +# CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.5), (load (s64) from %stack.4) # CHECK-NEXT: frame-destroy CFI_INSTRUCTION def_cfa_offset 0 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w30 # CHECK-NEXT: frame-destroy CFI_INSTRUCTION restore $w29 # CHECK-NEXT: RET_ReallyLR # ASM-LABEL: test_allocate_split_sve_realigned -# ASM: sub sp, sp, #1040 -# ASM-NEXT: .cfi_def_cfa_offset 1040 -# ASM-NEXT: str x29, [sp, #1024] -# ASM-NEXT: str x30, [sp, #1032] -# ASM-NEXT: add x29, sp, #1024 +# ASM: stp x29, x30, [sp, #-16]! +# ASM-NEXT: .cfi_def_cfa_offset 16 +# ASM-NEXT: mov x29, sp # ASM-NEXT: .cfi_def_cfa w29, 16 # ASM-NEXT: .cfi_offset w30, -8 # ASM-NEXT: .cfi_offset w29, -16 # -# ASM: sub sp, x29, #1024 -# ASM-NEXT: .cfi_def_cfa wsp, 1040 -# ASM-NEXT: ldr x30, [sp, #1032] -# ASM-NEXT: ldr x29, [sp, #1024] -# ASM-NEXT: add sp, sp, #1040 +# ASM: mov sp, x29 +# ASM-NEXT: .cfi_def_cfa wsp, 16 +# ASM-NEXT: ldp x29, x30, [sp], #16 # ASM-NEXT: .cfi_def_cfa_offset 0 # ASM-NEXT: .cfi_restore w30 # ASM-NEXT: .cfi_restore w29 -# UNWINDINFO: DW_CFA_def_cfa_offset: +1040 +# UNWINDINFO: DW_CFA_def_cfa_offset: +16 # UNWINDINFO: DW_CFA_def_cfa: reg29 +16 # UNWINDINFO-NEXT: DW_CFA_offset: reg30 -8 # UNWINDINFO-NEXT: DW_CFA_offset: reg29 -16 # -# UNWINDINFO: DW_CFA_def_cfa: reg31 +1040 +# UNWINDINFO: DW_CFA_def_cfa: reg31 +16 # UNWINDINFO: DW_CFA_def_cfa_offset: +0 # UNWINDINFO-NEXT: DW_CFA_restore: reg30 # UNWINDINFO-NEXT: DW_CFA_restore: reg29 diff --git a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll index 690a39d12e6f1..ec4666206359e 100644 --- a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll +++ b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll @@ -749,36 +749,25 @@ entry: } declare ptr @memset(ptr, i32, i32) -; FIXME: aarch64-split-sve-objects is currently not supported in this function -; as it requires stack reealignment (for the 32-byte aligned alloca). -; GPR CSRs -; -; FPR CSRs -; -; <--- hazard between PPRs and ZPRs here! -; -; -> sp define void @zpr_and_ppr_local_realignment( %pred, %vector, i64 %gpr) "aarch64_pstate_sm_compatible" { ; CHECK-LABEL: zpr_and_ppr_local_realignment: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #1040 +; CHECK-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-1 ; CHECK-NEXT: sub x9, sp, #1040 -; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK-NEXT: add x29, sp, #1024 -; CHECK-NEXT: addvl x9, x9, #-2 -; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK-NEXT: addvl x9, x9, #-1 ; CHECK-NEXT: and sp, x9, #0xffffffffffffffe0 ; CHECK-NEXT: .cfi_def_cfa w29, 16 ; CHECK-NEXT: .cfi_offset w30, -8 ; CHECK-NEXT: .cfi_offset w29, -16 ; CHECK-NEXT: sub x8, x29, #1024 -; CHECK-NEXT: str p0, [x8, #-1, mul vl] +; CHECK-NEXT: str p0, [x29, #-1, mul vl] ; CHECK-NEXT: str z0, [x8, #-2, mul vl] ; CHECK-NEXT: str x0, [sp] -; CHECK-NEXT: sub sp, x29, #1024 -; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #1040 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload ; CHECK-NEXT: ret %ppr_local = alloca %zpr_local = alloca @@ -822,3 +811,301 @@ define void @zpr_and_ppr_local_stack_probing( %pred, %pred, %vector) { +; CHECK-LABEL: sve_locals_only_ppr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: sub x8, x29, #1024 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: str p0, [x29, #-9, mul vl] +; CHECK-NEXT: str z0, [x8, #-3, mul vl] +; CHECK-NEXT: addvl sp, x29, #-1 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + %ppr_local = alloca + %zpr_local = alloca + tail call void asm sideeffect "", "~{p4},~{p5},~{p6}"() + call void (...) @llvm.fake.use(ptr %alloc) + store volatile %pred, ptr %ppr_local + store volatile %vector, ptr %zpr_local + ret void +} + +; Only ZPR callee-saves (and ZPR/PPR locals) + a VLA +define void @sve_locals_only_zpr_csr_vla(i64 %n, %pred, %vector) { +; CHECK-LABEL: sve_locals_only_zpr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 16 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 24 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 32 * VG - 1056 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: sub x8, x29, #1024 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: str p0, [x29, #-1, mul vl] +; CHECK-NEXT: str z0, [x8, #-5, mul vl] +; CHECK-NEXT: addvl sp, x8, #-4 +; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + %ppr_local = alloca + %zpr_local = alloca + tail call void asm sideeffect "", "~{z8},~{z9},~{z10}"() + call void (...) @llvm.fake.use(ptr %alloc) + store volatile %pred, ptr %ppr_local + store volatile %vector, ptr %zpr_local + ret void +} + +; PPR+ZPR callee-saves (and ZPR/PPR locals) + a VLA +define void @sve_locals_zpr_ppr_csr_vla(i64 %n, %pred, %vector) { +; CHECK-LABEL: sve_locals_zpr_ppr_csr_vla: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: str p6, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p5, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-4 +; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill +; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: str z8, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK-NEXT: sub sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #-1 +; CHECK-NEXT: mov x19, sp +; CHECK-NEXT: .cfi_def_cfa w29, 32 +; CHECK-NEXT: .cfi_offset w19, -16 +; CHECK-NEXT: .cfi_offset w30, -24 +; CHECK-NEXT: .cfi_offset w29, -32 +; CHECK-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d8 @ cfa - 24 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d9 @ cfa - 32 * VG - 1056 +; CHECK-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xe0, 0x77, 0x22 // $d10 @ cfa - 40 * VG - 1056 +; CHECK-NEXT: add x9, x0, #15 +; CHECK-NEXT: mov x8, sp +; CHECK-NEXT: and x9, x9, #0xfffffffffffffff0 +; CHECK-NEXT: sub x8, x8, x9 +; CHECK-NEXT: mov sp, x8 +; CHECK-NEXT: // fake_use: $x8 +; CHECK-NEXT: sub x8, x29, #1024 +; CHECK-NEXT: //APP +; CHECK-NEXT: //NO_APP +; CHECK-NEXT: str p0, [x29, #-9, mul vl] +; CHECK-NEXT: str z0, [x8, #-6, mul vl] +; CHECK-NEXT: addvl sp, x8, #-5 +; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload +; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK-NEXT: add sp, sp, #1024 +; CHECK-NEXT: addvl sp, sp, #4 +; CHECK-NEXT: ldr p6, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p5, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret + %alloc = alloca i8, i64 %n, align 1 + %ppr_local = alloca + %zpr_local = alloca + tail call void asm sideeffect "", "~{p4},~{p5},~{p6},~{z8},~{z9},~{z10}"() + call void (...) @llvm.fake.use(ptr %alloc) + store volatile %pred, ptr %ppr_local + store volatile %vector, ptr %zpr_local + ret void +} diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index bdee359487ce6..555b164b1fd62 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -3512,14 +3512,13 @@ define i32 @svecc_call_dynamic_alloca(<4 x i16> %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { -; CHECK0-LABEL: svecc_call_realign: -; CHECK0: // %bb.0: // %entry -; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill -; CHECK0-NEXT: .cfi_def_cfa_offset 64 -; CHECK0-NEXT: cntd x9 -; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill -; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill -; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill -; CHECK0-NEXT: mov x29, sp -; CHECK0-NEXT: .cfi_def_cfa w29, 64 -; CHECK0-NEXT: .cfi_offset w19, -8 -; CHECK0-NEXT: .cfi_offset w26, -16 -; CHECK0-NEXT: .cfi_offset w27, -24 -; CHECK0-NEXT: .cfi_offset w28, -32 -; CHECK0-NEXT: .cfi_offset vg, -48 -; CHECK0-NEXT: .cfi_offset w30, -56 -; CHECK0-NEXT: .cfi_offset w29, -64 -; CHECK0-NEXT: addvl sp, sp, #-18 -; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill -; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d8 @ cfa - 8 * IncomingVG - 64 -; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d9 @ cfa - 16 * IncomingVG - 64 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d10 @ cfa - 24 * IncomingVG - 64 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d11 @ cfa - 32 * IncomingVG - 64 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d12 @ cfa - 40 * IncomingVG - 64 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d13 @ cfa - 48 * IncomingVG - 64 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d14 @ cfa - 56 * IncomingVG - 64 -; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d15 @ cfa - 64 * IncomingVG - 64 -; CHECK0-NEXT: sub x9, sp, #1024 -; CHECK0-NEXT: and sp, x9, #0xffffffffffffffe0 -; CHECK0-NEXT: mov w2, w1 -; CHECK0-NEXT: bl __arm_sme_state -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: //APP -; CHECK0-NEXT: //NO_APP -; CHECK0-NEXT: tbz w19, #0, .LBB36_2 -; CHECK0-NEXT: // %bb.1: // %entry -; CHECK0-NEXT: smstop sm -; CHECK0-NEXT: .LBB36_2: // %entry -; CHECK0-NEXT: mov x0, sp -; CHECK0-NEXT: mov w1, #45 // =0x2d -; CHECK0-NEXT: bl memset -; CHECK0-NEXT: tbz w19, #0, .LBB36_4 -; CHECK0-NEXT: // %bb.3: // %entry -; CHECK0-NEXT: smstart sm -; CHECK0-NEXT: .LBB36_4: // %entry -; CHECK0-NEXT: mov w0, #22647 // =0x5877 -; CHECK0-NEXT: movk w0, #59491, lsl #16 -; CHECK0-NEXT: addvl sp, x29, #-18 -; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload -; CHECK0-NEXT: .cfi_restore z8 -; CHECK0-NEXT: .cfi_restore z9 -; CHECK0-NEXT: .cfi_restore z10 -; CHECK0-NEXT: .cfi_restore z11 -; CHECK0-NEXT: .cfi_restore z12 -; CHECK0-NEXT: .cfi_restore z13 -; CHECK0-NEXT: .cfi_restore z14 -; CHECK0-NEXT: .cfi_restore z15 -; CHECK0-NEXT: mov sp, x29 -; CHECK0-NEXT: .cfi_def_cfa wsp, 64 -; CHECK0-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload -; CHECK0-NEXT: .cfi_def_cfa_offset 0 -; CHECK0-NEXT: .cfi_restore w19 -; CHECK0-NEXT: .cfi_restore w26 -; CHECK0-NEXT: .cfi_restore w27 -; CHECK0-NEXT: .cfi_restore w28 -; CHECK0-NEXT: .cfi_restore vg -; CHECK0-NEXT: .cfi_restore w30 -; CHECK0-NEXT: .cfi_restore w29 -; CHECK0-NEXT: ret -; -; CHECK64-LABEL: svecc_call_realign: -; CHECK64: // %bb.0: // %entry -; CHECK64-NEXT: sub sp, sp, #128 -; CHECK64-NEXT: .cfi_def_cfa_offset 128 -; CHECK64-NEXT: cntd x9 -; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK64-NEXT: stp x9, x28, [sp, #80] // 16-byte Folded Spill -; CHECK64-NEXT: stp x27, x26, [sp, #96] // 16-byte Folded Spill -; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill -; CHECK64-NEXT: add x29, sp, #64 -; CHECK64-NEXT: .cfi_def_cfa w29, 64 -; CHECK64-NEXT: .cfi_offset w19, -16 -; CHECK64-NEXT: .cfi_offset w26, -24 -; CHECK64-NEXT: .cfi_offset w27, -32 -; CHECK64-NEXT: .cfi_offset w28, -40 +; CHECK1024-NOSPLITSVE-LABEL: svecc_call_dynamic_alloca: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 1088 +; CHECK1024-NOSPLITSVE-NEXT: cntd x9 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x9, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x20, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1080] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: add x29, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w19, -8 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w20, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w26, -24 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w27, -32 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w28, -40 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset vg, -48 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w30, -56 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -64 +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-18 +; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * IncomingVG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: mov x19, sp +; CHECK1024-NOSPLITSVE-NEXT: mov w2, w1 +; CHECK1024-NOSPLITSVE-NEXT: mov w8, w0 +; CHECK1024-NOSPLITSVE-NEXT: bl __arm_sme_state +; CHECK1024-NOSPLITSVE-NEXT: mov w8, w8 +; CHECK1024-NOSPLITSVE-NEXT: mov x9, sp +; CHECK1024-NOSPLITSVE-NEXT: mov x20, x0 +; CHECK1024-NOSPLITSVE-NEXT: add x8, x8, #15 +; CHECK1024-NOSPLITSVE-NEXT: and x8, x8, #0x1fffffff0 +; CHECK1024-NOSPLITSVE-NEXT: sub x8, x9, x8 +; CHECK1024-NOSPLITSVE-NEXT: mov sp, x8 +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: tbz w20, #0, .LBB35_2 +; CHECK1024-NOSPLITSVE-NEXT: // %bb.1: // %entry +; CHECK1024-NOSPLITSVE-NEXT: smstop sm +; CHECK1024-NOSPLITSVE-NEXT: .LBB35_2: // %entry +; CHECK1024-NOSPLITSVE-NEXT: mov x0, x8 +; CHECK1024-NOSPLITSVE-NEXT: mov w1, #45 // =0x2d +; CHECK1024-NOSPLITSVE-NEXT: bl memset +; CHECK1024-NOSPLITSVE-NEXT: tbz w20, #0, .LBB35_4 +; CHECK1024-NOSPLITSVE-NEXT: // %bb.3: // %entry +; CHECK1024-NOSPLITSVE-NEXT: smstart sm +; CHECK1024-NOSPLITSVE-NEXT: .LBB35_4: // %entry +; CHECK1024-NOSPLITSVE-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NOSPLITSVE-NEXT: sub x8, x29, #1024 +; CHECK1024-NOSPLITSVE-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, x8, #-18 +; CHECK1024-NOSPLITSVE-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z8 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z9 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z10 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z11 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z12 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z13 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z14 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore z15 +; CHECK1024-NOSPLITSVE-NEXT: sub sp, x29, #1024 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa wsp, 1088 +; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1080] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x20, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w19 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w20 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w26 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w27 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w28 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore vg +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w30 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_restore w29 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_call_dynamic_alloca: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 64 +; CHECK1024-SPLITSVE-NEXT: cntd x9 +; CHECK1024-SPLITSVE-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: stp x9, x28, [sp, #16] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: mov x29, sp +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w19, -8 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w20, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w26, -24 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w27, -32 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w28, -40 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset vg, -48 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w30, -56 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -64 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2 +; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-16 +; CHECK1024-SPLITSVE-NEXT: str z23, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0d, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0e, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * IncomingVG - 1088 +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: mov x19, sp +; CHECK1024-SPLITSVE-NEXT: mov w2, w1 +; CHECK1024-SPLITSVE-NEXT: mov w8, w0 +; CHECK1024-SPLITSVE-NEXT: bl __arm_sme_state +; CHECK1024-SPLITSVE-NEXT: mov w8, w8 +; CHECK1024-SPLITSVE-NEXT: mov x9, sp +; CHECK1024-SPLITSVE-NEXT: mov x20, x0 +; CHECK1024-SPLITSVE-NEXT: add x8, x8, #15 +; CHECK1024-SPLITSVE-NEXT: and x8, x8, #0x1fffffff0 +; CHECK1024-SPLITSVE-NEXT: sub x8, x9, x8 +; CHECK1024-SPLITSVE-NEXT: mov sp, x8 +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: tbz w20, #0, .LBB35_2 +; CHECK1024-SPLITSVE-NEXT: // %bb.1: // %entry +; CHECK1024-SPLITSVE-NEXT: smstop sm +; CHECK1024-SPLITSVE-NEXT: .LBB35_2: // %entry +; CHECK1024-SPLITSVE-NEXT: mov x0, x8 +; CHECK1024-SPLITSVE-NEXT: mov w1, #45 // =0x2d +; CHECK1024-SPLITSVE-NEXT: bl memset +; CHECK1024-SPLITSVE-NEXT: tbz w20, #0, .LBB35_4 +; CHECK1024-SPLITSVE-NEXT: // %bb.3: // %entry +; CHECK1024-SPLITSVE-NEXT: smstart sm +; CHECK1024-SPLITSVE-NEXT: .LBB35_4: // %entry +; CHECK1024-SPLITSVE-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-SPLITSVE-NEXT: sub x8, x29, #1024 +; CHECK1024-SPLITSVE-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-SPLITSVE-NEXT: addvl sp, x8, #-18 +; CHECK1024-SPLITSVE-NEXT: ldr z23, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z8 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z9 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z10 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z11 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z12 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z13 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z14 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore z15 +; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: mov sp, x29 +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa wsp, 64 +; CHECK1024-SPLITSVE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr x28, [sp, #24] // 8-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa_offset 0 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w19 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w20 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w26 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w27 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w28 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore vg +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w30 +; CHECK1024-SPLITSVE-NEXT: .cfi_restore w29 +; CHECK1024-SPLITSVE-NEXT: ret +entry: + %ptr = alloca i8, i32 %P1 + tail call void asm sideeffect "", "~{x0},~{x28},~{x27},~{x3}"() #2 + %call = call ptr @memset(ptr noundef nonnull %ptr, i32 noundef 45, i32 noundef %P2) + ret i32 -396142473 +} + + +define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, %P3, i16 %P4) "aarch64_pstate_sm_compatible" { +; CHECK0-LABEL: svecc_call_realign: +; CHECK0: // %bb.0: // %entry +; CHECK0-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_def_cfa_offset 64 +; CHECK0-NEXT: cntd x9 +; CHECK0-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK0-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK0-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK0-NEXT: mov x29, sp +; CHECK0-NEXT: .cfi_def_cfa w29, 64 +; CHECK0-NEXT: .cfi_offset w19, -8 +; CHECK0-NEXT: .cfi_offset w26, -16 +; CHECK0-NEXT: .cfi_offset w27, -24 +; CHECK0-NEXT: .cfi_offset w28, -32 +; CHECK0-NEXT: .cfi_offset vg, -48 +; CHECK0-NEXT: .cfi_offset w30, -56 +; CHECK0-NEXT: .cfi_offset w29, -64 +; CHECK0-NEXT: addvl sp, sp, #-18 +; CHECK0-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK0-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK0-NEXT: .cfi_escape 0x10, 0x48, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d8 @ cfa - 8 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x49, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d9 @ cfa - 16 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4a, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d10 @ cfa - 24 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4b, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d11 @ cfa - 32 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4c, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d12 @ cfa - 40 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4d, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d13 @ cfa - 48 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d14 @ cfa - 56 * IncomingVG - 64 +; CHECK0-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x12, 0x11, 0x50, 0x22, 0x06, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x40, 0x22 // $d15 @ cfa - 64 * IncomingVG - 64 +; CHECK0-NEXT: sub x9, sp, #1024 +; CHECK0-NEXT: and sp, x9, #0xffffffffffffffe0 +; CHECK0-NEXT: mov w2, w1 +; CHECK0-NEXT: bl __arm_sme_state +; CHECK0-NEXT: mov x19, x0 +; CHECK0-NEXT: //APP +; CHECK0-NEXT: //NO_APP +; CHECK0-NEXT: tbz w19, #0, .LBB36_2 +; CHECK0-NEXT: // %bb.1: // %entry +; CHECK0-NEXT: smstop sm +; CHECK0-NEXT: .LBB36_2: // %entry +; CHECK0-NEXT: mov x0, sp +; CHECK0-NEXT: mov w1, #45 // =0x2d +; CHECK0-NEXT: bl memset +; CHECK0-NEXT: tbz w19, #0, .LBB36_4 +; CHECK0-NEXT: // %bb.3: // %entry +; CHECK0-NEXT: smstart sm +; CHECK0-NEXT: .LBB36_4: // %entry +; CHECK0-NEXT: mov w0, #22647 // =0x5877 +; CHECK0-NEXT: movk w0, #59491, lsl #16 +; CHECK0-NEXT: addvl sp, x29, #-18 +; CHECK0-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK0-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK0-NEXT: .cfi_restore z8 +; CHECK0-NEXT: .cfi_restore z9 +; CHECK0-NEXT: .cfi_restore z10 +; CHECK0-NEXT: .cfi_restore z11 +; CHECK0-NEXT: .cfi_restore z12 +; CHECK0-NEXT: .cfi_restore z13 +; CHECK0-NEXT: .cfi_restore z14 +; CHECK0-NEXT: .cfi_restore z15 +; CHECK0-NEXT: mov sp, x29 +; CHECK0-NEXT: .cfi_def_cfa wsp, 64 +; CHECK0-NEXT: ldp x26, x19, [sp, #48] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x28, x27, [sp, #32] // 16-byte Folded Reload +; CHECK0-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK0-NEXT: .cfi_def_cfa_offset 0 +; CHECK0-NEXT: .cfi_restore w19 +; CHECK0-NEXT: .cfi_restore w26 +; CHECK0-NEXT: .cfi_restore w27 +; CHECK0-NEXT: .cfi_restore w28 +; CHECK0-NEXT: .cfi_restore vg +; CHECK0-NEXT: .cfi_restore w30 +; CHECK0-NEXT: .cfi_restore w29 +; CHECK0-NEXT: ret +; +; CHECK64-LABEL: svecc_call_realign: +; CHECK64: // %bb.0: // %entry +; CHECK64-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK64-NEXT: .cfi_def_cfa_offset 64 +; CHECK64-NEXT: cntd x9 +; CHECK64-NEXT: stp x28, x27, [sp, #32] // 16-byte Folded Spill +; CHECK64-NEXT: str x9, [sp, #16] // 8-byte Folded Spill +; CHECK64-NEXT: stp x26, x19, [sp, #48] // 16-byte Folded Spill +; CHECK64-NEXT: mov x29, sp +; CHECK64-NEXT: .cfi_def_cfa w29, 64 +; CHECK64-NEXT: .cfi_offset w19, -8 +; CHECK64-NEXT: .cfi_offset w26, -16 +; CHECK64-NEXT: .cfi_offset w27, -24 +; CHECK64-NEXT: .cfi_offset w28, -32 ; CHECK64-NEXT: .cfi_offset vg, -48 ; CHECK64-NEXT: .cfi_offset w30, -56 ; CHECK64-NEXT: .cfi_offset w29, -64 -; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: addvl sp, sp, #-2 ; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill @@ -3960,30 +4102,32 @@ define i32 @svecc_call_realign(<4 x i16> %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 %P2, %P0, i32 %P1, i32 % ; ; CHECK64-LABEL: svecc_call_dynamic_and_scalable_alloca: ; CHECK64: // %bb.0: // %entry -; CHECK64-NEXT: sub sp, sp, #128 -; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill -; CHECK64-NEXT: add x29, sp, #64 -; CHECK64-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill -; CHECK64-NEXT: stp x26, x20, [sp, #96] // 16-byte Folded Spill -; CHECK64-NEXT: str x19, [sp, #112] // 8-byte Folded Spill -; CHECK64-NEXT: addvl sp, sp, #-18 +; CHECK64-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK64-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK64-NEXT: mov x29, sp +; CHECK64-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK64-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK64-NEXT: addvl sp, sp, #-2 ; CHECK64-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill ; CHECK64-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill ; CHECK64-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill @@ -4330,41 +4605,43 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK64-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill ; CHECK64-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill ; CHECK64-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill -; CHECK64-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK64-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #64 +; CHECK64-NEXT: addvl sp, sp, #-16 +; CHECK64-NEXT: str z23, [sp] // 16-byte Folded Spill +; CHECK64-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK64-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill ; CHECK64-NEXT: sub sp, sp, #112 ; CHECK64-NEXT: addvl sp, sp, #-1 ; CHECK64-NEXT: mov x19, sp ; CHECK64-NEXT: .cfi_def_cfa w29, 64 -; CHECK64-NEXT: .cfi_offset w19, -16 -; CHECK64-NEXT: .cfi_offset w20, -24 -; CHECK64-NEXT: .cfi_offset w26, -32 -; CHECK64-NEXT: .cfi_offset w27, -40 +; CHECK64-NEXT: .cfi_offset w19, -8 +; CHECK64-NEXT: .cfi_offset w20, -16 +; CHECK64-NEXT: .cfi_offset w26, -24 +; CHECK64-NEXT: .cfi_offset w27, -32 ; CHECK64-NEXT: .cfi_offset w28, -48 ; CHECK64-NEXT: .cfi_offset w30, -56 ; CHECK64-NEXT: .cfi_offset w29, -64 -; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d8 @ cfa - 8 * VG - 128 -; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d9 @ cfa - 16 * VG - 128 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d10 @ cfa - 24 * VG - 128 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d11 @ cfa - 32 * VG - 128 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d12 @ cfa - 40 * VG - 128 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d13 @ cfa - 48 * VG - 128 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d14 @ cfa - 56 * VG - 128 -; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d15 @ cfa - 64 * VG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d8 @ cfa - 24 * VG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d9 @ cfa - 32 * VG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d10 @ cfa - 40 * VG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d11 @ cfa - 48 * VG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d12 @ cfa - 56 * VG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d13 @ cfa - 64 * VG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d14 @ cfa - 72 * VG - 128 +; CHECK64-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0x80, 0x7f, 0x22 // $d15 @ cfa - 80 * VG - 128 ; CHECK64-NEXT: // kill: def $w0 killed $w0 def $x0 ; CHECK64-NEXT: ubfiz x8, x0, #2, #32 ; CHECK64-NEXT: mov x9, sp @@ -4385,22 +4662,24 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK64-NEXT: sub x8, x29, #64 ; CHECK64-NEXT: movk w0, #59491, lsl #16 ; CHECK64-NEXT: addvl sp, x8, #-18 -; CHECK64-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload -; CHECK64-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z23, [sp] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #64 +; CHECK64-NEXT: addvl sp, sp, #16 ; CHECK64-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload ; CHECK64-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload ; CHECK64-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload @@ -4413,131 +4692,244 @@ define i32 @svecc_call_dynamic_and_scalable_alloca(<4 x i16> %P0, i32 %P1, i32 % ; CHECK64-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload ; CHECK64-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload ; CHECK64-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload -; CHECK64-NEXT: sub sp, x29, #64 -; CHECK64-NEXT: ldp x20, x19, [sp, #104] // 16-byte Folded Reload -; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload -; CHECK64-NEXT: ldp x27, x26, [sp, #88] // 16-byte Folded Reload -; CHECK64-NEXT: ldp x30, x28, [sp, #72] // 16-byte Folded Reload -; CHECK64-NEXT: add sp, sp, #128 +; CHECK64-NEXT: mov sp, x29 +; CHECK64-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK64-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK64-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK64-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload ; CHECK64-NEXT: ret ; -; CHECK1024-LABEL: svecc_call_dynamic_and_scalable_alloca: -; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1088 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: add x29, sp, #1024 -; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill -; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill -; CHECK1024-NEXT: str x27, [sp, #1048] // 8-byte Folded Spill -; CHECK1024-NEXT: str x26, [sp, #1056] // 8-byte Folded Spill -; CHECK1024-NEXT: str x20, [sp, #1064] // 8-byte Folded Spill -; CHECK1024-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill -; CHECK1024-NEXT: addvl sp, sp, #-18 -; CHECK1024-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill -; CHECK1024-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: sub sp, sp, #1072 -; CHECK1024-NEXT: addvl sp, sp, #-1 -; CHECK1024-NEXT: mov x19, sp -; CHECK1024-NEXT: .cfi_def_cfa w29, 64 -; CHECK1024-NEXT: .cfi_offset w19, -16 -; CHECK1024-NEXT: .cfi_offset w20, -24 -; CHECK1024-NEXT: .cfi_offset w26, -32 -; CHECK1024-NEXT: .cfi_offset w27, -40 -; CHECK1024-NEXT: .cfi_offset w28, -48 -; CHECK1024-NEXT: .cfi_offset w30, -56 -; CHECK1024-NEXT: .cfi_offset w29, -64 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * VG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * VG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * VG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * VG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * VG - 1088 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * VG - 1088 -; CHECK1024-NEXT: // kill: def $w0 killed $w0 def $x0 -; CHECK1024-NEXT: ubfiz x8, x0, #2, #32 -; CHECK1024-NEXT: mov x9, sp -; CHECK1024-NEXT: add x8, x8, #15 -; CHECK1024-NEXT: and x8, x8, #0x7fffffff0 -; CHECK1024-NEXT: sub x20, x9, x8 -; CHECK1024-NEXT: mov sp, x20 -; CHECK1024-NEXT: //APP -; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: add x0, x19, #8 -; CHECK1024-NEXT: bl bar -; CHECK1024-NEXT: sub x0, x29, #1024 -; CHECK1024-NEXT: addvl x0, x0, #-19 -; CHECK1024-NEXT: bl bar -; CHECK1024-NEXT: mov x0, x20 -; CHECK1024-NEXT: bl bar -; CHECK1024-NEXT: mov w0, #22647 // =0x5877 -; CHECK1024-NEXT: sub x8, x29, #1024 -; CHECK1024-NEXT: movk w0, #59491, lsl #16 -; CHECK1024-NEXT: addvl sp, x8, #-18 -; CHECK1024-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload -; CHECK1024-NEXT: sub sp, x29, #1024 -; CHECK1024-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x20, [sp, #1064] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x26, [sp, #1056] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x27, [sp, #1048] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1088 -; CHECK1024-NEXT: ret +; CHECK1024-NOSPLITSVE-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1088 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: add x29, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x27, [sp, #1048] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x26, [sp, #1056] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x20, [sp, #1064] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str x19, [sp, #1072] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-18 +; CHECK1024-NOSPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z23, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z22, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z21, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z20, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z19, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z18, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z17, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z16, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z15, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z14, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z13, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z12, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z11, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z10, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp, #16, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #17, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1072 +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-NOSPLITSVE-NEXT: mov x19, sp +; CHECK1024-NOSPLITSVE-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w19, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w20, -24 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w26, -32 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w27, -40 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w28, -48 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w30, -56 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -64 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 24 * VG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 32 * VG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 40 * VG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 48 * VG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 56 * VG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 64 * VG - 1088 +; CHECK1024-NOSPLITSVE-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK1024-NOSPLITSVE-NEXT: ubfiz x8, x0, #2, #32 +; CHECK1024-NOSPLITSVE-NEXT: mov x9, sp +; CHECK1024-NOSPLITSVE-NEXT: add x8, x8, #15 +; CHECK1024-NOSPLITSVE-NEXT: and x8, x8, #0x7fffffff0 +; CHECK1024-NOSPLITSVE-NEXT: sub x20, x9, x8 +; CHECK1024-NOSPLITSVE-NEXT: mov sp, x20 +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: add x0, x19, #8 +; CHECK1024-NOSPLITSVE-NEXT: bl bar +; CHECK1024-NOSPLITSVE-NEXT: sub x0, x29, #1024 +; CHECK1024-NOSPLITSVE-NEXT: addvl x0, x0, #-19 +; CHECK1024-NOSPLITSVE-NEXT: bl bar +; CHECK1024-NOSPLITSVE-NEXT: mov x0, x20 +; CHECK1024-NOSPLITSVE-NEXT: bl bar +; CHECK1024-NOSPLITSVE-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-NOSPLITSVE-NEXT: sub x8, x29, #1024 +; CHECK1024-NOSPLITSVE-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, x8, #-18 +; CHECK1024-NOSPLITSVE-NEXT: ldr z23, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z22, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z21, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z20, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z19, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z18, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z17, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z16, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z15, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z14, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z13, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z12, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z11, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z10, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp, #16, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #17, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: sub sp, x29, #1024 +; CHECK1024-NOSPLITSVE-NEXT: ldr x19, [sp, #1072] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x20, [sp, #1064] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x26, [sp, #1056] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x27, [sp, #1048] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1088 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_call_dynamic_and_scalable_alloca: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: stp x29, x30, [sp, #-64]! // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str x28, [sp, #16] // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: mov x29, sp +; CHECK1024-SPLITSVE-NEXT: stp x27, x26, [sp, #32] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: stp x20, x19, [sp, #48] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2 +; CHECK1024-SPLITSVE-NEXT: str p15, [sp, #4, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p14, [sp, #5, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p13, [sp, #6, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p12, [sp, #7, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p11, [sp, #8, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p10, [sp, #9, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p9, [sp, #10, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p8, [sp, #11, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p7, [sp, #12, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p6, [sp, #13, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p5, [sp, #14, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str p4, [sp, #15, mul vl] // 2-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-16 +; CHECK1024-SPLITSVE-NEXT: str z23, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z22, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z21, [sp, #2, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z20, [sp, #3, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z19, [sp, #4, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z18, [sp, #5, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z17, [sp, #6, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z16, [sp, #7, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z15, [sp, #8, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z14, [sp, #9, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z13, [sp, #10, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z12, [sp, #11, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z11, [sp, #12, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z10, [sp, #13, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z9, [sp, #14, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z8, [sp, #15, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1072 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-SPLITSVE-NEXT: mov x19, sp +; CHECK1024-SPLITSVE-NEXT: .cfi_def_cfa w29, 64 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w19, -8 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w20, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w26, -24 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w27, -32 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w28, -48 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w30, -56 +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -64 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x68, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d8 @ cfa - 24 * VG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x60, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d9 @ cfa - 32 * VG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4a, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x58, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d10 @ cfa - 40 * VG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4b, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x50, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d11 @ cfa - 48 * VG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4c, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x48, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d12 @ cfa - 56 * VG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4d, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x40, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d13 @ cfa - 64 * VG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4e, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb8, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d14 @ cfa - 72 * VG - 1088 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x4f, 0x0c, 0x92, 0x2e, 0x00, 0x11, 0xb0, 0x7f, 0x1e, 0x22, 0x11, 0xc0, 0x77, 0x22 // $d15 @ cfa - 80 * VG - 1088 +; CHECK1024-SPLITSVE-NEXT: // kill: def $w0 killed $w0 def $x0 +; CHECK1024-SPLITSVE-NEXT: ubfiz x8, x0, #2, #32 +; CHECK1024-SPLITSVE-NEXT: mov x9, sp +; CHECK1024-SPLITSVE-NEXT: add x8, x8, #15 +; CHECK1024-SPLITSVE-NEXT: and x8, x8, #0x7fffffff0 +; CHECK1024-SPLITSVE-NEXT: sub x20, x9, x8 +; CHECK1024-SPLITSVE-NEXT: mov sp, x20 +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: add x0, x19, #8 +; CHECK1024-SPLITSVE-NEXT: bl bar +; CHECK1024-SPLITSVE-NEXT: sub x0, x29, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl x0, x0, #-19 +; CHECK1024-SPLITSVE-NEXT: bl bar +; CHECK1024-SPLITSVE-NEXT: mov x0, x20 +; CHECK1024-SPLITSVE-NEXT: bl bar +; CHECK1024-SPLITSVE-NEXT: mov w0, #22647 // =0x5877 +; CHECK1024-SPLITSVE-NEXT: sub x8, x29, #1024 +; CHECK1024-SPLITSVE-NEXT: movk w0, #59491, lsl #16 +; CHECK1024-SPLITSVE-NEXT: addvl sp, x8, #-18 +; CHECK1024-SPLITSVE-NEXT: ldr z23, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z22, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z21, [sp, #2, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z20, [sp, #3, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z19, [sp, #4, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z18, [sp, #5, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z17, [sp, #6, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z16, [sp, #7, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z15, [sp, #8, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z14, [sp, #9, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z13, [sp, #10, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z12, [sp, #11, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z11, [sp, #12, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z10, [sp, #13, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp, #14, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #15, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #16 +; CHECK1024-SPLITSVE-NEXT: ldr p15, [sp, #4, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p14, [sp, #5, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p13, [sp, #6, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p12, [sp, #7, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p11, [sp, #8, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p10, [sp, #9, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p9, [sp, #10, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p8, [sp, #11, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p7, [sp, #12, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p6, [sp, #13, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p5, [sp, #14, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr p4, [sp, #15, mul vl] // 2-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: mov sp, x29 +; CHECK1024-SPLITSVE-NEXT: ldp x20, x19, [sp, #48] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr x28, [sp, #16] // 8-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldp x27, x26, [sp, #32] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldp x29, x30, [sp], #64 // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ret entry: %a = alloca i32, i32 10 %b = alloca