diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll new file mode 100644 index 00000000000000..be5c1cdeecf43b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/vararg.ll @@ -0,0 +1,1008 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32,ILP32 %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32,RV32D-ILP32 %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+d -target-abi ilp32f \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32,RV32D-ILP32F %s +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -global-isel -mattr=+d -target-abi ilp32d \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV32,RV32D-ILP32D %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64,LP64 %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d -target-abi lp64f \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64,LP64F %s +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -global-isel -mattr=+d -target-abi lp64d \ +; RUN: -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefixes=RV64,LP64D %s + +; The same vararg calling convention is used for ilp32/ilp32f/ilp32d and for +; lp64/lp64f/lp64d. Different CHECK lines are required due to slight +; codegen differences due to the way the f64 load operations are lowered and +; because the PseudoCALL specifies the calling convention. +; The nounwind attribute is omitted for some of the tests, to check that CFI +; directives are correctly generated. + +declare void @llvm.va_start(ptr) +declare void @llvm.va_end(ptr) + +declare void @notdead(ptr) + +; Although frontends are recommended to not generate va_arg due to the lack of +; support for aggregate types, we test simple cases here to ensure they are +; lowered correctly + +define i32 @va1(ptr %fmt, ...) { +; RV32-LABEL: va1: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: .cfi_def_cfa_offset 48 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: addi a0, sp, 20 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: sw a5, 36(sp) +; RV32-NEXT: sw a6, 40(sp) +; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: va1: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -80 +; RV64-NEXT: .cfi_def_cfa_offset 80 +; RV64-NEXT: sd a1, 24(sp) +; RV64-NEXT: sd a2, 32(sp) +; RV64-NEXT: sd a3, 40(sp) +; RV64-NEXT: sd a4, 48(sp) +; RV64-NEXT: sd a5, 56(sp) +; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: lwu a1, 8(sp) +; RV64-NEXT: sd a6, 64(sp) +; RV64-NEXT: sd a7, 72(sp) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi a1, a0, 4 +; RV64-NEXT: srli a2, a1, 32 +; RV64-NEXT: sw a1, 8(sp) +; RV64-NEXT: sw a2, 12(sp) +; RV64-NEXT: lw a0, 0(a0) +; RV64-NEXT: addi sp, sp, 80 +; RV64-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + store ptr %argp.next, ptr %va, align 4 + %1 = load i32, ptr %argp.cur, align 4 + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + +define i32 @va1_va_arg(ptr %fmt, ...) nounwind { +; RV32-LABEL: va1_va_arg: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: sw a5, 36(sp) +; RV32-NEXT: sw a6, 40(sp) +; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: addi a0, sp, 20 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi a0, a0, 3 +; RV32-NEXT: andi a0, a0, -4 +; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: va1_va_arg: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -80 +; RV64-NEXT: sd a1, 24(sp) +; RV64-NEXT: sd a2, 32(sp) +; RV64-NEXT: sd a3, 40(sp) +; RV64-NEXT: sd a4, 48(sp) +; RV64-NEXT: sd a5, 56(sp) +; RV64-NEXT: sd a6, 64(sp) +; RV64-NEXT: sd a7, 72(sp) +; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi a0, a0, 3 +; RV64-NEXT: andi a0, a0, -4 +; RV64-NEXT: addi a1, a0, 4 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a0, 0(a0) +; RV64-NEXT: addi sp, sp, 80 +; RV64-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i32 + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + +; Ensure the adjustment when restoring the stack pointer using the frame +; pointer is correct +define i32 @va1_va_arg_alloca(ptr %fmt, ...) nounwind { +; RV32-LABEL: va1_va_arg_alloca: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s1, 4(sp) # 4-byte Folded Spill +; RV32-NEXT: addi s0, sp, 16 +; RV32-NEXT: sw a1, 4(s0) +; RV32-NEXT: sw a2, 8(s0) +; RV32-NEXT: sw a3, 12(s0) +; RV32-NEXT: sw a4, 16(s0) +; RV32-NEXT: sw a5, 20(s0) +; RV32-NEXT: sw a6, 24(s0) +; RV32-NEXT: sw a7, 28(s0) +; RV32-NEXT: addi a0, s0, 4 +; RV32-NEXT: sw a0, -16(s0) +; RV32-NEXT: lw a0, -16(s0) +; RV32-NEXT: addi a0, a0, 3 +; RV32-NEXT: andi a0, a0, -4 +; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: sw a1, -16(s0) +; RV32-NEXT: lw s1, 0(a0) +; RV32-NEXT: addi a0, s1, 15 +; RV32-NEXT: andi a0, a0, -16 +; RV32-NEXT: sub a0, sp, a0 +; RV32-NEXT: mv sp, a0 +; RV32-NEXT: call notdead@plt +; RV32-NEXT: mv a0, s1 +; RV32-NEXT: addi sp, s0, -16 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s1, 4(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: va1_va_arg_alloca: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -96 +; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s1, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: addi s0, sp, 32 +; RV64-NEXT: sd a1, 8(s0) +; RV64-NEXT: sd a2, 16(s0) +; RV64-NEXT: sd a3, 24(s0) +; RV64-NEXT: sd a4, 32(s0) +; RV64-NEXT: sd a5, 40(s0) +; RV64-NEXT: sd a6, 48(s0) +; RV64-NEXT: sd a7, 56(s0) +; RV64-NEXT: addi a0, s0, 8 +; RV64-NEXT: sd a0, -32(s0) +; RV64-NEXT: ld a0, -32(s0) +; RV64-NEXT: addi a0, a0, 3 +; RV64-NEXT: andi a0, a0, -4 +; RV64-NEXT: addi a1, a0, 4 +; RV64-NEXT: sd a1, -32(s0) +; RV64-NEXT: lw s1, 0(a0) +; RV64-NEXT: slli a0, s1, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: addi a0, a0, 15 +; RV64-NEXT: andi a0, a0, -16 +; RV64-NEXT: sub a0, sp, a0 +; RV64-NEXT: mv sp, a0 +; RV64-NEXT: call notdead@plt +; RV64-NEXT: mv a0, s1 +; RV64-NEXT: addi sp, s0, -32 +; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s1, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i32 + %2 = alloca i8, i32 %1 + call void @notdead(ptr %2) + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + +define void @va1_caller() nounwind { +; RV32-LABEL: va1_caller: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: lui a3, 261888 +; RV32-NEXT: li a4, 2 +; RV32-NEXT: li a2, 0 +; RV32-NEXT: call va1@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; LP64-LABEL: va1_caller: +; LP64: # %bb.0: +; LP64-NEXT: addi sp, sp, -16 +; LP64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64-NEXT: lui a0, %hi(.LCPI3_0) +; LP64-NEXT: ld a1, %lo(.LCPI3_0)(a0) +; LP64-NEXT: li a2, 2 +; LP64-NEXT: call va1@plt +; LP64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64-NEXT: addi sp, sp, 16 +; LP64-NEXT: ret +; +; LP64F-LABEL: va1_caller: +; LP64F: # %bb.0: +; LP64F-NEXT: addi sp, sp, -16 +; LP64F-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64F-NEXT: li a0, 1023 +; LP64F-NEXT: slli a0, a0, 52 +; LP64F-NEXT: fmv.d.x fa5, a0 +; LP64F-NEXT: li a2, 2 +; LP64F-NEXT: fmv.x.d a1, fa5 +; LP64F-NEXT: call va1@plt +; LP64F-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64F-NEXT: addi sp, sp, 16 +; LP64F-NEXT: ret +; +; LP64D-LABEL: va1_caller: +; LP64D: # %bb.0: +; LP64D-NEXT: addi sp, sp, -16 +; LP64D-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; LP64D-NEXT: li a0, 1023 +; LP64D-NEXT: slli a0, a0, 52 +; LP64D-NEXT: fmv.d.x fa5, a0 +; LP64D-NEXT: li a2, 2 +; LP64D-NEXT: fmv.x.d a1, fa5 +; LP64D-NEXT: call va1@plt +; LP64D-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; LP64D-NEXT: addi sp, sp, 16 +; LP64D-NEXT: ret + %1 = call i32 (ptr, ...) @va1(ptr undef, double 1.0, i32 2) + ret void +} + +; Ensure that 2x xlen size+alignment varargs are accessed via an "aligned" +; register pair (where the first register is even-numbered). + +define i64 @va2(ptr %fmt, ...) nounwind { +; ILP32-LABEL: va2: +; ILP32: # %bb.0: +; ILP32-NEXT: addi sp, sp, -48 +; ILP32-NEXT: sw a1, 20(sp) +; ILP32-NEXT: sw a2, 24(sp) +; ILP32-NEXT: sw a3, 28(sp) +; ILP32-NEXT: sw a4, 32(sp) +; ILP32-NEXT: addi a0, sp, 20 +; ILP32-NEXT: sw a0, 12(sp) +; ILP32-NEXT: lw a0, 12(sp) +; ILP32-NEXT: sw a5, 36(sp) +; ILP32-NEXT: sw a6, 40(sp) +; ILP32-NEXT: sw a7, 44(sp) +; ILP32-NEXT: addi a1, a0, 7 +; ILP32-NEXT: andi a1, a1, -8 +; ILP32-NEXT: addi a0, a0, 15 +; ILP32-NEXT: sw a0, 12(sp) +; ILP32-NEXT: lw a0, 0(a1) +; ILP32-NEXT: lw a1, 4(a1) +; ILP32-NEXT: addi sp, sp, 48 +; ILP32-NEXT: ret +; +; RV32D-ILP32-LABEL: va2: +; RV32D-ILP32: # %bb.0: +; RV32D-ILP32-NEXT: addi sp, sp, -48 +; RV32D-ILP32-NEXT: sw a1, 20(sp) +; RV32D-ILP32-NEXT: sw a2, 24(sp) +; RV32D-ILP32-NEXT: sw a3, 28(sp) +; RV32D-ILP32-NEXT: sw a4, 32(sp) +; RV32D-ILP32-NEXT: addi a0, sp, 20 +; RV32D-ILP32-NEXT: sw a0, 12(sp) +; RV32D-ILP32-NEXT: lw a0, 12(sp) +; RV32D-ILP32-NEXT: sw a5, 36(sp) +; RV32D-ILP32-NEXT: sw a6, 40(sp) +; RV32D-ILP32-NEXT: sw a7, 44(sp) +; RV32D-ILP32-NEXT: addi a1, a0, 7 +; RV32D-ILP32-NEXT: andi a1, a1, -8 +; RV32D-ILP32-NEXT: fld fa5, 0(a1) +; RV32D-ILP32-NEXT: addi a0, a0, 15 +; RV32D-ILP32-NEXT: sw a0, 12(sp) +; RV32D-ILP32-NEXT: fsd fa5, 0(sp) +; RV32D-ILP32-NEXT: lw a0, 0(sp) +; RV32D-ILP32-NEXT: lw a1, 4(sp) +; RV32D-ILP32-NEXT: addi sp, sp, 48 +; RV32D-ILP32-NEXT: ret +; +; RV32D-ILP32F-LABEL: va2: +; RV32D-ILP32F: # %bb.0: +; RV32D-ILP32F-NEXT: addi sp, sp, -48 +; RV32D-ILP32F-NEXT: sw a1, 20(sp) +; RV32D-ILP32F-NEXT: sw a2, 24(sp) +; RV32D-ILP32F-NEXT: sw a3, 28(sp) +; RV32D-ILP32F-NEXT: sw a4, 32(sp) +; RV32D-ILP32F-NEXT: addi a0, sp, 20 +; RV32D-ILP32F-NEXT: sw a0, 12(sp) +; RV32D-ILP32F-NEXT: lw a0, 12(sp) +; RV32D-ILP32F-NEXT: sw a5, 36(sp) +; RV32D-ILP32F-NEXT: sw a6, 40(sp) +; RV32D-ILP32F-NEXT: sw a7, 44(sp) +; RV32D-ILP32F-NEXT: addi a1, a0, 7 +; RV32D-ILP32F-NEXT: andi a1, a1, -8 +; RV32D-ILP32F-NEXT: fld fa5, 0(a1) +; RV32D-ILP32F-NEXT: addi a0, a0, 15 +; RV32D-ILP32F-NEXT: sw a0, 12(sp) +; RV32D-ILP32F-NEXT: fsd fa5, 0(sp) +; RV32D-ILP32F-NEXT: lw a0, 0(sp) +; RV32D-ILP32F-NEXT: lw a1, 4(sp) +; RV32D-ILP32F-NEXT: addi sp, sp, 48 +; RV32D-ILP32F-NEXT: ret +; +; RV32D-ILP32D-LABEL: va2: +; RV32D-ILP32D: # %bb.0: +; RV32D-ILP32D-NEXT: addi sp, sp, -48 +; RV32D-ILP32D-NEXT: sw a1, 20(sp) +; RV32D-ILP32D-NEXT: sw a2, 24(sp) +; RV32D-ILP32D-NEXT: sw a3, 28(sp) +; RV32D-ILP32D-NEXT: sw a4, 32(sp) +; RV32D-ILP32D-NEXT: addi a0, sp, 20 +; RV32D-ILP32D-NEXT: sw a0, 12(sp) +; RV32D-ILP32D-NEXT: lw a0, 12(sp) +; RV32D-ILP32D-NEXT: sw a5, 36(sp) +; RV32D-ILP32D-NEXT: sw a6, 40(sp) +; RV32D-ILP32D-NEXT: sw a7, 44(sp) +; RV32D-ILP32D-NEXT: addi a1, a0, 7 +; RV32D-ILP32D-NEXT: andi a1, a1, -8 +; RV32D-ILP32D-NEXT: fld fa5, 0(a1) +; RV32D-ILP32D-NEXT: addi a0, a0, 15 +; RV32D-ILP32D-NEXT: sw a0, 12(sp) +; RV32D-ILP32D-NEXT: fsd fa5, 0(sp) +; RV32D-ILP32D-NEXT: lw a0, 0(sp) +; RV32D-ILP32D-NEXT: lw a1, 4(sp) +; RV32D-ILP32D-NEXT: addi sp, sp, 48 +; RV32D-ILP32D-NEXT: ret +; +; RV64-LABEL: va2: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -80 +; RV64-NEXT: sd a1, 24(sp) +; RV64-NEXT: sd a2, 32(sp) +; RV64-NEXT: sd a3, 40(sp) +; RV64-NEXT: sd a4, 48(sp) +; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: sd a5, 56(sp) +; RV64-NEXT: sd a6, 64(sp) +; RV64-NEXT: sd a7, 72(sp) +; RV64-NEXT: addi a1, a0, 7 +; RV64-NEXT: andi a1, a1, -8 +; RV64-NEXT: addi a0, a0, 15 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 0(a1) +; RV64-NEXT: addi sp, sp, 80 +; RV64-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va + %ptrint = ptrtoint ptr %argp.cur to iXLen + %1 = add iXLen %ptrint, 7 + %2 = and iXLen %1, -8 + %argp.cur.aligned = inttoptr iXLen %1 to ptr + %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i32 8 + store ptr %argp.next, ptr %va + %3 = inttoptr iXLen %2 to ptr + %4 = load double, ptr %3, align 8 + %5 = bitcast double %4 to i64 + call void @llvm.va_end(ptr %va) + ret i64 %5 +} + +; This test is slightly different than the SelectionDAG counterpart because +; narrowScalar and widenScalar for G_VAARG on types outside of [s32, sXLen] +; are not implemented yet. +define i64 @va2_va_arg(ptr %fmt, ...) nounwind { +; RV32-LABEL: va2_va_arg: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: sw a5, 36(sp) +; RV32-NEXT: sw a6, 40(sp) +; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: addi a0, sp, 20 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi a0, a0, 3 +; RV32-NEXT: andi a0, a0, -4 +; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: li a1, 0 +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: va2_va_arg: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -80 +; RV64-NEXT: sd a1, 24(sp) +; RV64-NEXT: sd a2, 32(sp) +; RV64-NEXT: sd a3, 40(sp) +; RV64-NEXT: sd a4, 48(sp) +; RV64-NEXT: sd a5, 56(sp) +; RV64-NEXT: sd a6, 64(sp) +; RV64-NEXT: sd a7, 72(sp) +; RV64-NEXT: addi a0, sp, 24 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi a0, a0, 3 +; RV64-NEXT: andi a0, a0, -4 +; RV64-NEXT: addi a1, a0, 4 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a0, 0(a0) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: addi sp, sp, 80 +; RV64-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i32 + call void @llvm.va_end(ptr %va) + %2 = zext i32 %1 to i64 + ret i64 %2 +} + +define void @va2_caller() nounwind { +; RV32-LABEL: va2_caller: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: li a1, 1 +; RV32-NEXT: call va2@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: va2_caller: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: li a1, 1 +; RV64-NEXT: call va2@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %1 = call i64 (ptr, ...) @va2(ptr undef, i32 1) + ret void +} + +; On RV32, Ensure a named 2*xlen argument is passed in a1 and a2, while the +; vararg double is passed in a4 and a5 (rather than a3 and a4) + +define i64 @va3(i32 %a, i64 %b, ...) nounwind { +; ILP32-LABEL: va3: +; ILP32: # %bb.0: +; ILP32-NEXT: addi sp, sp, -32 +; ILP32-NEXT: sw a3, 12(sp) +; ILP32-NEXT: sw a4, 16(sp) +; ILP32-NEXT: addi a0, sp, 12 +; ILP32-NEXT: sw a0, 4(sp) +; ILP32-NEXT: lw a0, 4(sp) +; ILP32-NEXT: sw a5, 20(sp) +; ILP32-NEXT: sw a6, 24(sp) +; ILP32-NEXT: sw a7, 28(sp) +; ILP32-NEXT: addi a3, a0, 7 +; ILP32-NEXT: andi a3, a3, -8 +; ILP32-NEXT: addi a0, a0, 15 +; ILP32-NEXT: sw a0, 4(sp) +; ILP32-NEXT: lw a4, 0(a3) +; ILP32-NEXT: lw a3, 4(a3) +; ILP32-NEXT: add a0, a1, a4 +; ILP32-NEXT: sltu a1, a0, a4 +; ILP32-NEXT: add a2, a2, a3 +; ILP32-NEXT: add a1, a2, a1 +; ILP32-NEXT: addi sp, sp, 32 +; ILP32-NEXT: ret +; +; RV32D-ILP32-LABEL: va3: +; RV32D-ILP32: # %bb.0: +; RV32D-ILP32-NEXT: addi sp, sp, -48 +; RV32D-ILP32-NEXT: sw a3, 28(sp) +; RV32D-ILP32-NEXT: sw a4, 32(sp) +; RV32D-ILP32-NEXT: addi a0, sp, 28 +; RV32D-ILP32-NEXT: sw a0, 20(sp) +; RV32D-ILP32-NEXT: lw a0, 20(sp) +; RV32D-ILP32-NEXT: sw a5, 36(sp) +; RV32D-ILP32-NEXT: sw a6, 40(sp) +; RV32D-ILP32-NEXT: sw a7, 44(sp) +; RV32D-ILP32-NEXT: addi a3, a0, 7 +; RV32D-ILP32-NEXT: andi a3, a3, -8 +; RV32D-ILP32-NEXT: fld fa5, 0(a3) +; RV32D-ILP32-NEXT: addi a0, a0, 15 +; RV32D-ILP32-NEXT: sw a0, 20(sp) +; RV32D-ILP32-NEXT: fsd fa5, 8(sp) +; RV32D-ILP32-NEXT: lw a3, 8(sp) +; RV32D-ILP32-NEXT: lw a4, 12(sp) +; RV32D-ILP32-NEXT: add a0, a1, a3 +; RV32D-ILP32-NEXT: sltu a1, a0, a3 +; RV32D-ILP32-NEXT: add a2, a2, a4 +; RV32D-ILP32-NEXT: add a1, a2, a1 +; RV32D-ILP32-NEXT: addi sp, sp, 48 +; RV32D-ILP32-NEXT: ret +; +; RV32D-ILP32F-LABEL: va3: +; RV32D-ILP32F: # %bb.0: +; RV32D-ILP32F-NEXT: addi sp, sp, -48 +; RV32D-ILP32F-NEXT: sw a3, 28(sp) +; RV32D-ILP32F-NEXT: sw a4, 32(sp) +; RV32D-ILP32F-NEXT: addi a0, sp, 28 +; RV32D-ILP32F-NEXT: sw a0, 20(sp) +; RV32D-ILP32F-NEXT: lw a0, 20(sp) +; RV32D-ILP32F-NEXT: sw a5, 36(sp) +; RV32D-ILP32F-NEXT: sw a6, 40(sp) +; RV32D-ILP32F-NEXT: sw a7, 44(sp) +; RV32D-ILP32F-NEXT: addi a3, a0, 7 +; RV32D-ILP32F-NEXT: andi a3, a3, -8 +; RV32D-ILP32F-NEXT: fld fa5, 0(a3) +; RV32D-ILP32F-NEXT: addi a0, a0, 15 +; RV32D-ILP32F-NEXT: sw a0, 20(sp) +; RV32D-ILP32F-NEXT: fsd fa5, 8(sp) +; RV32D-ILP32F-NEXT: lw a3, 8(sp) +; RV32D-ILP32F-NEXT: lw a4, 12(sp) +; RV32D-ILP32F-NEXT: add a0, a1, a3 +; RV32D-ILP32F-NEXT: sltu a1, a0, a3 +; RV32D-ILP32F-NEXT: add a2, a2, a4 +; RV32D-ILP32F-NEXT: add a1, a2, a1 +; RV32D-ILP32F-NEXT: addi sp, sp, 48 +; RV32D-ILP32F-NEXT: ret +; +; RV32D-ILP32D-LABEL: va3: +; RV32D-ILP32D: # %bb.0: +; RV32D-ILP32D-NEXT: addi sp, sp, -48 +; RV32D-ILP32D-NEXT: sw a3, 28(sp) +; RV32D-ILP32D-NEXT: sw a4, 32(sp) +; RV32D-ILP32D-NEXT: addi a0, sp, 28 +; RV32D-ILP32D-NEXT: sw a0, 20(sp) +; RV32D-ILP32D-NEXT: lw a0, 20(sp) +; RV32D-ILP32D-NEXT: sw a5, 36(sp) +; RV32D-ILP32D-NEXT: sw a6, 40(sp) +; RV32D-ILP32D-NEXT: sw a7, 44(sp) +; RV32D-ILP32D-NEXT: addi a3, a0, 7 +; RV32D-ILP32D-NEXT: andi a3, a3, -8 +; RV32D-ILP32D-NEXT: fld fa5, 0(a3) +; RV32D-ILP32D-NEXT: addi a0, a0, 15 +; RV32D-ILP32D-NEXT: sw a0, 20(sp) +; RV32D-ILP32D-NEXT: fsd fa5, 8(sp) +; RV32D-ILP32D-NEXT: lw a3, 8(sp) +; RV32D-ILP32D-NEXT: lw a4, 12(sp) +; RV32D-ILP32D-NEXT: add a0, a1, a3 +; RV32D-ILP32D-NEXT: sltu a1, a0, a3 +; RV32D-ILP32D-NEXT: add a2, a2, a4 +; RV32D-ILP32D-NEXT: add a1, a2, a1 +; RV32D-ILP32D-NEXT: addi sp, sp, 48 +; RV32D-ILP32D-NEXT: ret +; +; RV64-LABEL: va3: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd a2, 16(sp) +; RV64-NEXT: sd a3, 24(sp) +; RV64-NEXT: sd a4, 32(sp) +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: sd a5, 40(sp) +; RV64-NEXT: sd a6, 48(sp) +; RV64-NEXT: sd a7, 56(sp) +; RV64-NEXT: addi a2, a0, 7 +; RV64-NEXT: andi a2, a2, -8 +; RV64-NEXT: addi a0, a0, 15 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 0(a2) +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va + %ptrint = ptrtoint ptr %argp.cur to iXLen + %1 = add iXLen %ptrint, 7 + %2 = and iXLen %1, -8 + %argp.cur.aligned = inttoptr iXLen %1 to ptr + %argp.next = getelementptr inbounds i8, ptr %argp.cur.aligned, i32 8 + store ptr %argp.next, ptr %va + %3 = inttoptr iXLen %2 to ptr + %4 = load double, ptr %3, align 8 + call void @llvm.va_end(ptr %va) + %5 = bitcast double %4 to i64 + %6 = add i64 %b, %5 + ret i64 %6 +} + +; This test is slightly different than the SelectionDAG counterpart because +; narrowScalar and widenScalar for G_VAARG on types outside of [s32, sXLen] +; are not implemented yet. +define i64 @va3_va_arg(i32 %a, i64 %b, ...) nounwind { +; RV32-LABEL: va3_va_arg: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -32 +; RV32-NEXT: sw a3, 12(sp) +; RV32-NEXT: sw a4, 16(sp) +; RV32-NEXT: sw a5, 20(sp) +; RV32-NEXT: sw a6, 24(sp) +; RV32-NEXT: sw a7, 28(sp) +; RV32-NEXT: addi a0, sp, 12 +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lw a0, 4(sp) +; RV32-NEXT: addi a0, a0, 3 +; RV32-NEXT: andi a0, a0, -4 +; RV32-NEXT: addi a3, a0, 4 +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: lw a3, 0(a0) +; RV32-NEXT: add a0, a1, a3 +; RV32-NEXT: sltu a1, a0, a3 +; RV32-NEXT: add a1, a2, a1 +; RV32-NEXT: addi sp, sp, 32 +; RV32-NEXT: ret +; +; RV64-LABEL: va3_va_arg: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -64 +; RV64-NEXT: sd a2, 16(sp) +; RV64-NEXT: sd a3, 24(sp) +; RV64-NEXT: sd a4, 32(sp) +; RV64-NEXT: sd a5, 40(sp) +; RV64-NEXT: sd a6, 48(sp) +; RV64-NEXT: sd a7, 56(sp) +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi a0, a0, 3 +; RV64-NEXT: andi a0, a0, -4 +; RV64-NEXT: addi a2, a0, 4 +; RV64-NEXT: sd a2, 8(sp) +; RV64-NEXT: lw a0, 0(a0) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: srli a0, a0, 32 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: addi sp, sp, 64 +; RV64-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i32 + call void @llvm.va_end(ptr %va) + %2 = zext i32 %1 to i64 + %3 = add i64 %b, %2 + ret i64 %3 +} + +define void @va3_caller() nounwind { +; RV32-LABEL: va3_caller: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: lui a0, 5 +; RV32-NEXT: addi a3, a0, -480 +; RV32-NEXT: li a0, 2 +; RV32-NEXT: li a1, 1111 +; RV32-NEXT: li a2, 0 +; RV32-NEXT: call va3@plt +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: ret +; +; RV64-LABEL: va3_caller: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: lui a0, 5 +; RV64-NEXT: addiw a2, a0, -480 +; RV64-NEXT: li a0, 2 +; RV64-NEXT: li a1, 1111 +; RV64-NEXT: call va3@plt +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: ret + %1 = call i64 (i32, i64, ...) @va3(i32 2, i64 1111, i32 20000) + ret void +} + +declare void @llvm.va_copy(ptr, ptr) + +define i32 @va4_va_copy(i32 %argno, ...) nounwind { +; RV32-LABEL: va4_va_copy: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: sw s0, 8(sp) # 4-byte Folded Spill +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: sw a5, 36(sp) +; RV32-NEXT: sw a6, 40(sp) +; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: addi a0, sp, 20 +; RV32-NEXT: sw a0, 4(sp) +; RV32-NEXT: lw a0, 4(sp) +; RV32-NEXT: addi a0, a0, 3 +; RV32-NEXT: andi a0, a0, -4 +; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lw a1, 4(sp) +; RV32-NEXT: mv a2, sp +; RV32-NEXT: lw s0, 0(a0) +; RV32-NEXT: sw a2, 0(a1) +; RV32-NEXT: lw a0, 0(sp) +; RV32-NEXT: call notdead@plt +; RV32-NEXT: lw a0, 4(sp) +; RV32-NEXT: addi a0, a0, 3 +; RV32-NEXT: andi a0, a0, -4 +; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: sw a1, 4(sp) +; RV32-NEXT: lw a1, 4(sp) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: addi a1, a1, 3 +; RV32-NEXT: andi a1, a1, -4 +; RV32-NEXT: addi a2, a1, 4 +; RV32-NEXT: sw a2, 4(sp) +; RV32-NEXT: lw a2, 4(sp) +; RV32-NEXT: lw a1, 0(a1) +; RV32-NEXT: addi a2, a2, 3 +; RV32-NEXT: andi a2, a2, -4 +; RV32-NEXT: addi a3, a2, 4 +; RV32-NEXT: sw a3, 4(sp) +; RV32-NEXT: lw a2, 0(a2) +; RV32-NEXT: add a0, a0, s0 +; RV32-NEXT: add a1, a1, a2 +; RV32-NEXT: add a0, a0, a1 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: lw s0, 8(sp) # 4-byte Folded Reload +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: va4_va_copy: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -96 +; RV64-NEXT: sd ra, 24(sp) # 8-byte Folded Spill +; RV64-NEXT: sd s0, 16(sp) # 8-byte Folded Spill +; RV64-NEXT: sd a1, 40(sp) +; RV64-NEXT: sd a2, 48(sp) +; RV64-NEXT: sd a3, 56(sp) +; RV64-NEXT: sd a4, 64(sp) +; RV64-NEXT: sd a5, 72(sp) +; RV64-NEXT: sd a6, 80(sp) +; RV64-NEXT: sd a7, 88(sp) +; RV64-NEXT: addi a0, sp, 40 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi a0, a0, 3 +; RV64-NEXT: andi a0, a0, -4 +; RV64-NEXT: addi a1, a0, 4 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: ld a1, 8(sp) +; RV64-NEXT: mv a2, sp +; RV64-NEXT: lw s0, 0(a0) +; RV64-NEXT: sd a2, 0(a1) +; RV64-NEXT: lw a0, 4(sp) +; RV64-NEXT: lwu a1, 0(sp) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: call notdead@plt +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi a0, a0, 3 +; RV64-NEXT: andi a0, a0, -4 +; RV64-NEXT: addi a1, a0, 4 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: ld a1, 8(sp) +; RV64-NEXT: lw a0, 0(a0) +; RV64-NEXT: addi a1, a1, 3 +; RV64-NEXT: andi a1, a1, -4 +; RV64-NEXT: addi a2, a1, 4 +; RV64-NEXT: sd a2, 8(sp) +; RV64-NEXT: ld a2, 8(sp) +; RV64-NEXT: lw a1, 0(a1) +; RV64-NEXT: addi a2, a2, 3 +; RV64-NEXT: andi a2, a2, -4 +; RV64-NEXT: addi a3, a2, 4 +; RV64-NEXT: sd a3, 8(sp) +; RV64-NEXT: lw a2, 0(a2) +; RV64-NEXT: add a0, a0, s0 +; RV64-NEXT: add a1, a1, a2 +; RV64-NEXT: addw a0, a0, a1 +; RV64-NEXT: ld ra, 24(sp) # 8-byte Folded Reload +; RV64-NEXT: ld s0, 16(sp) # 8-byte Folded Reload +; RV64-NEXT: addi sp, sp, 96 +; RV64-NEXT: ret + %vargs = alloca ptr + %wargs = alloca ptr + call void @llvm.va_start(ptr %vargs) + %1 = va_arg ptr %vargs, i32 + call void @llvm.va_copy(ptr %wargs, ptr %vargs) + %2 = load ptr, ptr %wargs, align 4 + call void @notdead(ptr %2) + %3 = va_arg ptr %vargs, i32 + %4 = va_arg ptr %vargs, i32 + %5 = va_arg ptr %vargs, i32 + call void @llvm.va_end(ptr %vargs) + call void @llvm.va_end(ptr %wargs) + %add1 = add i32 %3, %1 + %add2 = add i32 %add1, %4 + %add3 = add i32 %add2, %5 + ret i32 %add3 +} + +; The va5_aligned_stack_callee and caller function are ommitted from this file +; since they were not included in the IR lowering test when vararg calls were +; initially added. + +; A function with no fixed arguments is not valid C, but can be +; specified in LLVM IR. We must ensure the vararg save area is +; still set up correctly. + +define i32 @va6_no_fixed_args(...) nounwind { +; RV32-LABEL: va6_no_fixed_args: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -48 +; RV32-NEXT: sw a0, 16(sp) +; RV32-NEXT: sw a1, 20(sp) +; RV32-NEXT: sw a2, 24(sp) +; RV32-NEXT: sw a3, 28(sp) +; RV32-NEXT: sw a4, 32(sp) +; RV32-NEXT: sw a5, 36(sp) +; RV32-NEXT: sw a6, 40(sp) +; RV32-NEXT: sw a7, 44(sp) +; RV32-NEXT: addi a0, sp, 16 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: addi a0, a0, 3 +; RV32-NEXT: andi a0, a0, -4 +; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: addi sp, sp, 48 +; RV32-NEXT: ret +; +; RV64-LABEL: va6_no_fixed_args: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -80 +; RV64-NEXT: sd a0, 16(sp) +; RV64-NEXT: sd a1, 24(sp) +; RV64-NEXT: sd a2, 32(sp) +; RV64-NEXT: sd a3, 40(sp) +; RV64-NEXT: sd a4, 48(sp) +; RV64-NEXT: sd a5, 56(sp) +; RV64-NEXT: sd a6, 64(sp) +; RV64-NEXT: sd a7, 72(sp) +; RV64-NEXT: addi a0, sp, 16 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: ld a0, 8(sp) +; RV64-NEXT: addi a0, a0, 3 +; RV64-NEXT: andi a0, a0, -4 +; RV64-NEXT: addi a1, a0, 4 +; RV64-NEXT: sd a1, 8(sp) +; RV64-NEXT: lw a0, 0(a0) +; RV64-NEXT: addi sp, sp, 80 +; RV64-NEXT: ret + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %1 = va_arg ptr %va, i32 + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + +; TODO: improve constant materialization of stack addresses + +define i32 @va_large_stack(ptr %fmt, ...) { +; RV32-LABEL: va_large_stack: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: addi a0, a0, 304 +; RV32-NEXT: sub sp, sp, a0 +; RV32-NEXT: .cfi_def_cfa_offset 100000048 +; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: sw a1, 276(a0) +; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: sw a2, 280(a0) +; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: sw a3, 284(a0) +; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: sw a4, 288(a0) +; RV32-NEXT: lui a0, 24414 +; RV32-NEXT: addi a0, a0, 276 +; RV32-NEXT: add a0, sp, a0 +; RV32-NEXT: sw a0, 12(sp) +; RV32-NEXT: lw a0, 12(sp) +; RV32-NEXT: lui a1, 24414 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a5, 292(a1) +; RV32-NEXT: lui a1, 24414 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a6, 296(a1) +; RV32-NEXT: lui a1, 24414 +; RV32-NEXT: add a1, sp, a1 +; RV32-NEXT: sw a7, 300(a1) +; RV32-NEXT: addi a1, a0, 4 +; RV32-NEXT: sw a1, 12(sp) +; RV32-NEXT: lw a0, 0(a0) +; RV32-NEXT: lui a1, 24414 +; RV32-NEXT: addi a1, a1, 304 +; RV32-NEXT: add sp, sp, a1 +; RV32-NEXT: ret +; +; RV64-LABEL: va_large_stack: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 24414 +; RV64-NEXT: addiw a0, a0, 336 +; RV64-NEXT: sub sp, sp, a0 +; RV64-NEXT: .cfi_def_cfa_offset 100000080 +; RV64-NEXT: lui a0, 24414 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: sd a1, 280(a0) +; RV64-NEXT: lui a0, 24414 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: sd a2, 288(a0) +; RV64-NEXT: lui a0, 24414 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: sd a3, 296(a0) +; RV64-NEXT: lui a0, 24414 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: sd a4, 304(a0) +; RV64-NEXT: lui a0, 24414 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: sd a5, 312(a0) +; RV64-NEXT: lui a0, 24414 +; RV64-NEXT: addiw a0, a0, 280 +; RV64-NEXT: add a0, sp, a0 +; RV64-NEXT: sd a0, 8(sp) +; RV64-NEXT: lw a0, 12(sp) +; RV64-NEXT: lwu a1, 8(sp) +; RV64-NEXT: lui a2, 24414 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: sd a6, 320(a2) +; RV64-NEXT: lui a2, 24414 +; RV64-NEXT: add a2, sp, a2 +; RV64-NEXT: sd a7, 328(a2) +; RV64-NEXT: slli a0, a0, 32 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: addi a1, a0, 4 +; RV64-NEXT: srli a2, a1, 32 +; RV64-NEXT: sw a1, 8(sp) +; RV64-NEXT: sw a2, 12(sp) +; RV64-NEXT: lw a0, 0(a0) +; RV64-NEXT: lui a1, 24414 +; RV64-NEXT: addiw a1, a1, 336 +; RV64-NEXT: add sp, sp, a1 +; RV64-NEXT: ret + %large = alloca [ 100000000 x i8 ] + %va = alloca ptr + call void @llvm.va_start(ptr %va) + %argp.cur = load ptr, ptr %va, align 4 + %argp.next = getelementptr inbounds i8, ptr %argp.cur, i32 4 + store ptr %argp.next, ptr %va, align 4 + %1 = load i32, ptr %argp.cur, align 4 + call void @llvm.va_end(ptr %va) + ret i32 %1 +} + + +