diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index de55704a37531..c934d9269ea1e 100644 --- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -2412,9 +2412,31 @@ void AArch64FrameLowering::determineStackHazardSlot( AFI->setStackHazardSlotIndex(ID); } - // Determine if we should use SplitSVEObjects. This should only be used if - // there's a possibility of a stack hazard between PPRs and ZPRs or FPRs. + if (!AFI->hasStackHazardSlotIndex()) + return; + if (SplitSVEObjects) { + CallingConv::ID CC = MF.getFunction().getCallingConv(); + if (AFI->isSVECC() || CC == CallingConv::AArch64_SVE_VectorCall) { + AFI->setSplitSVEObjects(true); + LLVM_DEBUG(dbgs() << "Using SplitSVEObjects for SVE CC function\n"); + return; + } + + // We only use SplitSVEObjects in non-SVE CC functions if there's a + // possibility of a stack hazard between PPRs and ZPRs/FPRs. + LLVM_DEBUG(dbgs() << "Determining if SplitSVEObjects should be used in " + "non-SVE CC function...\n"); + + // If another calling convention is explicitly set FPRs can't be promoted to + // ZPR callee-saves. + if (!is_contained({CallingConv::C, CallingConv::Fast}, CC)) { + LLVM_DEBUG( + dbgs() + << "Calling convention is not supported with SplitSVEObjects\n"); + return; + } + if (!HasPPRCSRs && !HasPPRStackObjects) { LLVM_DEBUG( dbgs() << "Not using SplitSVEObjects as no PPRs are on the stack\n"); @@ -2428,16 +2450,6 @@ void AArch64FrameLowering::determineStackHazardSlot( return; } - // If another calling convention is explicitly set FPRs can't be promoted to - // ZPR callee-saves. - if (!is_contained({CallingConv::C, CallingConv::Fast, - CallingConv::AArch64_SVE_VectorCall}, - MF.getFunction().getCallingConv())) { - LLVM_DEBUG( - dbgs() << "Calling convention is not supported with SplitSVEObjects"); - return; - } - [[maybe_unused]] const AArch64Subtarget &Subtarget = MF.getSubtarget(); assert(Subtarget.isSVEorStreamingSVEAvailable() && diff --git a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll index f65aec6665cec..9d8b077e9268e 100644 --- a/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll +++ b/llvm/test/CodeGen/AArch64/split-sve-stack-frame-layout.ll @@ -839,11 +839,10 @@ define aarch64_sve_vector_pcs void @only_ppr_csr_vla(i64 %n) { define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) { ; CHECK-LABEL: only_zpr_csr_vla: ; CHECK: // %bb.0: -; CHECK-NEXT: sub sp, sp, #1056 -; CHECK-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK-NEXT: add x29, sp, #1024 -; CHECK-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill -; CHECK-NEXT: str x19, [sp, #1040] // 8-byte Folded Spill +; CHECK-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: str x19, [sp, #16] // 8-byte Folded Spill +; CHECK-NEXT: mov x29, sp +; CHECK-NEXT: sub sp, sp, #1024 ; CHECK-NEXT: addvl sp, sp, #-3 ; CHECK-NEXT: str z10, [sp] // 16-byte Folded Spill ; CHECK-NEXT: str z9, [sp, #1, mul vl] // 16-byte Folded Spill @@ -870,11 +869,9 @@ define aarch64_sve_vector_pcs void @only_zpr_csr_vla(i64 %n) { ; CHECK-NEXT: ldr z10, [sp] // 16-byte Folded Reload ; CHECK-NEXT: ldr z9, [sp, #1, mul vl] // 16-byte Folded Reload ; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload -; CHECK-NEXT: sub sp, x29, #1024 -; CHECK-NEXT: ldr x19, [sp, #1040] // 8-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload -; CHECK-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK-NEXT: add sp, sp, #1056 +; CHECK-NEXT: mov sp, x29 +; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Folded Reload +; CHECK-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret %alloc = alloca i8, i64 %n, align 1 call void (...) @llvm.fake.use(ptr %alloc) diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll index 70874761b82ab..05450468f87a7 100644 --- a/llvm/test/CodeGen/AArch64/stack-hazard.ll +++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll @@ -975,8 +975,8 @@ define i32 @svecc_csr_d8(i32 noundef %num, %vs) "aarch64_psta ; ; CHECK64-LABEL: svecc_csr_d8: ; CHECK64: // %bb.0: // %entry -; CHECK64-NEXT: sub sp, sp, #80 -; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #-1 ; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill ; CHECK64-NEXT: sub sp, sp, #64 @@ -988,30 +988,50 @@ define i32 @svecc_csr_d8(i32 noundef %num, %vs) "aarch64_psta ; CHECK64-NEXT: //NO_APP ; CHECK64-NEXT: add sp, sp, #64 ; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #1 -; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload -; CHECK64-NEXT: add sp, sp, #80 +; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK64-NEXT: ret ; -; CHECK1024-LABEL: svecc_csr_d8: -; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: addvl sp, sp, #-1 -; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill -; CHECK1024-NEXT: sub sp, sp, #1024 -; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2064 + 8 * VG -; CHECK1024-NEXT: .cfi_offset w29, -16 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 -; CHECK1024-NEXT: mov w0, wzr -; CHECK1024-NEXT: //APP -; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: add sp, sp, #1024 -; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload -; CHECK1024-NEXT: addvl sp, sp, #1 -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1040 -; CHECK1024-NEXT: ret +; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2064 + 8 * VG +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-NOSPLITSVE-NEXT: mov w0, wzr +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_csr_d8: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-SPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2064 + 8 * VG +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-SPLITSVE-NEXT: mov w0, wzr +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-SPLITSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ret entry: tail call void asm sideeffect "", "~{d8}"() #1 ret i32 0 @@ -1039,8 +1059,8 @@ define i32 @svecc_csr_d8d9(i32 noundef %num, %vs) "aarch64_ps ; ; CHECK64-LABEL: svecc_csr_d8d9: ; CHECK64: // %bb.0: // %entry -; CHECK64-NEXT: sub sp, sp, #80 -; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #-2 ; CHECK64-NEXT: str z9, [sp] // 16-byte Folded Spill ; CHECK64-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill @@ -1055,33 +1075,56 @@ define i32 @svecc_csr_d8d9(i32 noundef %num, %vs) "aarch64_ps ; CHECK64-NEXT: add sp, sp, #64 ; CHECK64-NEXT: ldr z9, [sp] // 16-byte Folded Reload ; CHECK64-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #2 -; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload -; CHECK64-NEXT: add sp, sp, #80 +; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK64-NEXT: ret ; -; CHECK1024-LABEL: svecc_csr_d8d9: -; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: addvl sp, sp, #-2 -; CHECK1024-NEXT: str z9, [sp] // 16-byte Folded Spill -; CHECK1024-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill -; CHECK1024-NEXT: sub sp, sp, #1024 -; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG -; CHECK1024-NEXT: .cfi_offset w29, -16 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1040 -; CHECK1024-NEXT: mov w0, wzr -; CHECK1024-NEXT: //APP -; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: add sp, sp, #1024 -; CHECK1024-NEXT: ldr z9, [sp] // 16-byte Folded Reload -; CHECK1024-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload -; CHECK1024-NEXT: addvl sp, sp, #2 -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1040 -; CHECK1024-NEXT: ret +; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8d9: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-2 +; CHECK1024-NOSPLITSVE-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1040 +; CHECK1024-NOSPLITSVE-NEXT: mov w0, wzr +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: ldr z9, [sp] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #2 +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_csr_d8d9: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-2 +; CHECK1024-SPLITSVE-NEXT: str z9, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: str z8, [sp, #1, mul vl] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x49, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x70, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d9 @ cfa - 16 * VG - 1040 +; CHECK1024-SPLITSVE-NEXT: mov w0, wzr +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: ldr z9, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp, #1, mul vl] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #2 +; CHECK1024-SPLITSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ret entry: tail call void asm sideeffect "", "~{d8},~{d9}"() #1 ret i32 0 @@ -1108,8 +1151,8 @@ define i32 @svecc_csr_d8_allocd(double %d, %vs) "aarch64_psta ; ; CHECK64-LABEL: svecc_csr_d8_allocd: ; CHECK64: // %bb.0: // %entry -; CHECK64-NEXT: sub sp, sp, #80 -; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #-1 ; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill ; CHECK64-NEXT: sub sp, sp, #80 @@ -1122,31 +1165,52 @@ define i32 @svecc_csr_d8_allocd(double %d, %vs) "aarch64_psta ; CHECK64-NEXT: str d0, [sp, #72] ; CHECK64-NEXT: add sp, sp, #80 ; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #1 -; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload -; CHECK64-NEXT: add sp, sp, #80 +; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK64-NEXT: ret ; -; CHECK1024-LABEL: svecc_csr_d8_allocd: -; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: addvl sp, sp, #-1 -; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill -; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -; CHECK1024-NEXT: .cfi_offset w29, -16 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 -; CHECK1024-NEXT: mov w0, wzr -; CHECK1024-NEXT: //APP -; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: str d0, [sp, #1032] -; CHECK1024-NEXT: add sp, sp, #1040 -; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload -; CHECK1024-NEXT: addvl sp, sp, #1 -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1040 -; CHECK1024-NEXT: ret +; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8_allocd: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-NOSPLITSVE-NEXT: mov w0, wzr +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: str d0, [sp, #1032] +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_csr_d8_allocd: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-SPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-SPLITSVE-NEXT: mov w0, wzr +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: str d0, [sp, #1032] +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-SPLITSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ret entry: %a = alloca double tail call void asm sideeffect "", "~{d8}"() #1 @@ -1176,8 +1240,8 @@ define i32 @svecc_csr_d8_alloci64(i64 %d, %vs) "aarch64_pstat ; ; CHECK64-LABEL: svecc_csr_d8_alloci64: ; CHECK64: // %bb.0: // %entry -; CHECK64-NEXT: sub sp, sp, #80 -; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #-1 ; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill ; CHECK64-NEXT: sub sp, sp, #80 @@ -1191,32 +1255,54 @@ define i32 @svecc_csr_d8_alloci64(i64 %d, %vs) "aarch64_pstat ; CHECK64-NEXT: str x8, [sp, #8] ; CHECK64-NEXT: add sp, sp, #80 ; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #1 -; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload -; CHECK64-NEXT: add sp, sp, #80 +; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK64-NEXT: ret ; -; CHECK1024-LABEL: svecc_csr_d8_alloci64: -; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: addvl sp, sp, #-1 -; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill -; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG -; CHECK1024-NEXT: .cfi_offset w29, -16 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 -; CHECK1024-NEXT: mov x8, x0 -; CHECK1024-NEXT: mov w0, wzr -; CHECK1024-NEXT: //APP -; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: str x8, [sp, #8] -; CHECK1024-NEXT: add sp, sp, #1040 -; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload -; CHECK1024-NEXT: addvl sp, sp, #1 -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1040 -; CHECK1024-NEXT: ret +; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8_alloci64: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-NOSPLITSVE-NEXT: mov x8, x0 +; CHECK1024-NOSPLITSVE-NEXT: mov w0, wzr +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: str x8, [sp, #8] +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_csr_d8_alloci64: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-SPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0xa0, 0x10, 0x92, 0x2e, 0x00, 0x38, 0x1e, 0x22 // sp + 2080 + 8 * VG +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-SPLITSVE-NEXT: mov x8, x0 +; CHECK1024-SPLITSVE-NEXT: mov w0, wzr +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: str x8, [sp, #8] +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-SPLITSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ret entry: %a = alloca i64 tail call void asm sideeffect "", "~{d8}"() #1 @@ -1247,8 +1333,8 @@ define i32 @svecc_csr_d8_allocnxv4i32(i64 %d, %vs) "aarch64_p ; ; CHECK64-LABEL: svecc_csr_d8_allocnxv4i32: ; CHECK64: // %bb.0: // %entry -; CHECK64-NEXT: sub sp, sp, #80 -; CHECK64-NEXT: str x29, [sp, #64] // 8-byte Folded Spill +; CHECK64-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK64-NEXT: sub sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #-1 ; CHECK64-NEXT: str z8, [sp] // 16-byte Folded Spill ; CHECK64-NEXT: sub sp, sp, #64 @@ -1265,35 +1351,60 @@ define i32 @svecc_csr_d8_allocnxv4i32(i64 %d, %vs) "aarch64_p ; CHECK64-NEXT: add sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #1 ; CHECK64-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK64-NEXT: add sp, sp, #64 ; CHECK64-NEXT: addvl sp, sp, #1 -; CHECK64-NEXT: ldr x29, [sp, #64] // 8-byte Folded Reload -; CHECK64-NEXT: add sp, sp, #80 +; CHECK64-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload ; CHECK64-NEXT: ret ; -; CHECK1024-LABEL: svecc_csr_d8_allocnxv4i32: -; CHECK1024: // %bb.0: // %entry -; CHECK1024-NEXT: sub sp, sp, #1040 -; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill -; CHECK1024-NEXT: addvl sp, sp, #-1 -; CHECK1024-NEXT: str z8, [sp] // 16-byte Folded Spill -; CHECK1024-NEXT: sub sp, sp, #1024 -; CHECK1024-NEXT: addvl sp, sp, #-1 -; CHECK1024-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG -; CHECK1024-NEXT: .cfi_offset w29, -16 -; CHECK1024-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 -; CHECK1024-NEXT: mov z0.s, #0 // =0x0 -; CHECK1024-NEXT: add x8, sp, #1024 -; CHECK1024-NEXT: mov w0, wzr -; CHECK1024-NEXT: //APP -; CHECK1024-NEXT: //NO_APP -; CHECK1024-NEXT: str z0, [x8] -; CHECK1024-NEXT: add sp, sp, #1024 -; CHECK1024-NEXT: addvl sp, sp, #1 -; CHECK1024-NEXT: ldr z8, [sp] // 16-byte Folded Reload -; CHECK1024-NEXT: addvl sp, sp, #1 -; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload -; CHECK1024-NEXT: add sp, sp, #1040 -; CHECK1024-NEXT: ret +; CHECK1024-NOSPLITSVE-LABEL: svecc_csr_d8_allocnxv4i32: +; CHECK1024-NOSPLITSVE: // %bb.0: // %entry +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-NOSPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK1024-NOSPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG +; CHECK1024-NOSPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-NOSPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-NOSPLITSVE-NEXT: mov z0.s, #0 // =0x0 +; CHECK1024-NOSPLITSVE-NEXT: add x8, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: mov w0, wzr +; CHECK1024-NOSPLITSVE-NEXT: //APP +; CHECK1024-NOSPLITSVE-NEXT: //NO_APP +; CHECK1024-NOSPLITSVE-NEXT: str z0, [x8] +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-NOSPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-NOSPLITSVE-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload +; CHECK1024-NOSPLITSVE-NEXT: add sp, sp, #1040 +; CHECK1024-NOSPLITSVE-NEXT: ret +; +; CHECK1024-SPLITSVE-LABEL: svecc_csr_d8_allocnxv4i32: +; CHECK1024-SPLITSVE: // %bb.0: // %entry +; CHECK1024-SPLITSVE-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-SPLITSVE-NEXT: str z8, [sp] // 16-byte Folded Spill +; CHECK1024-SPLITSVE-NEXT: sub sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #-1 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x0f, 0x09, 0x8f, 0x90, 0x10, 0x92, 0x2e, 0x00, 0x40, 0x1e, 0x22 // sp + 2064 + 16 * VG +; CHECK1024-SPLITSVE-NEXT: .cfi_offset w29, -16 +; CHECK1024-SPLITSVE-NEXT: .cfi_escape 0x10, 0x48, 0x0b, 0x92, 0x2e, 0x00, 0x11, 0x78, 0x1e, 0x22, 0x11, 0xf0, 0x77, 0x22 // $d8 @ cfa - 8 * VG - 1040 +; CHECK1024-SPLITSVE-NEXT: mov z0.s, #0 // =0x0 +; CHECK1024-SPLITSVE-NEXT: add x8, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: mov w0, wzr +; CHECK1024-SPLITSVE-NEXT: //APP +; CHECK1024-SPLITSVE-NEXT: //NO_APP +; CHECK1024-SPLITSVE-NEXT: str z0, [x8] +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-SPLITSVE-NEXT: ldr z8, [sp] // 16-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: add sp, sp, #1024 +; CHECK1024-SPLITSVE-NEXT: addvl sp, sp, #1 +; CHECK1024-SPLITSVE-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload +; CHECK1024-SPLITSVE-NEXT: ret entry: %a = alloca tail call void asm sideeffect "", "~{d8}"() #1 @@ -1360,11 +1471,11 @@ define i32 @svecc_csr_x18_25_d8_15_allocdi64(i64 %d, double %e,