diff --git a/clang/test/CodeGen/expand-variadic-call.c b/clang/test/CodeGen/expand-variadic-call.c new file mode 100644 index 0000000000000..fa2b984bec08a --- /dev/null +++ b/clang/test/CodeGen/expand-variadic-call.c @@ -0,0 +1,273 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py + +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -target-cpu x86-64-v4 -std=c23 -O1 -ffreestanding -emit-llvm -o - %s | FileCheck %s + +// This test sanity checks calling a variadic function with the expansion transform disabled. +// The IR test cases {arch}/expand-variadic-call-*.ll correspond to IR generated from this test case. + +typedef __builtin_va_list va_list; +#define va_copy(dest, src) __builtin_va_copy(dest, src) +#define va_start(ap, ...) __builtin_va_start(ap, 0) +#define va_end(ap) __builtin_va_end(ap) +#define va_arg(ap, type) __builtin_va_arg(ap, type) + +// 32 bit x86 alignment uses getTypeStackAlign for special cases +// Whitebox testing. +// Needs a type >= 16 which is either a simd or a struct containing a simd +// darwinvectorabi should force 4 bytes +// linux vectors with align 16/32/64 return that alignment + + +void wrapped(va_list); + +// CHECK-LABEL: @codegen_for_copy( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CP:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[CP]]) #[[ATTR7:[0-9]+]] +// CHECK-NEXT: call void @llvm.va_copy(ptr nonnull [[CP]], ptr [[X:%.*]]) +// CHECK-NEXT: call void @wrapped(ptr noundef nonnull [[CP]]) #[[ATTR8:[0-9]+]] +// CHECK-NEXT: call void @llvm.va_end(ptr [[CP]]) +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[CP]]) #[[ATTR7]] +// CHECK-NEXT: ret void +// +void codegen_for_copy(va_list x) +{ + va_list cp; + va_copy(cp, x); + wrapped(cp); + va_end(cp); +} + + +// CHECK-LABEL: @vararg( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VA:%.*]] = alloca [1 x %struct.__va_list_tag], align 16 +// CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull [[VA]]) #[[ATTR7]] +// CHECK-NEXT: call void @llvm.va_start(ptr nonnull [[VA]]) +// CHECK-NEXT: call void @wrapped(ptr noundef nonnull [[VA]]) #[[ATTR8]] +// CHECK-NEXT: call void @llvm.va_end(ptr [[VA]]) +// CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull [[VA]]) #[[ATTR7]] +// CHECK-NEXT: ret void +// + void vararg(...) +{ + va_list va; + __builtin_va_start(va, 0); + wrapped(va); + va_end(va); +} + +// vectors with alignment 16/32/64 are natively aligned on linux x86 +// v32f32 would be a m1024 type, larger than x64 defines at time of writing +typedef int i32; +typedef float v4f32 __attribute__((__vector_size__(16), __aligned__(16))); +typedef float v8f32 __attribute__((__vector_size__(32), __aligned__(32))); +typedef float v16f32 __attribute__((__vector_size__(64), __aligned__(64))); +typedef float v32f32 __attribute__((__vector_size__(128), __aligned__(128))); + + +// Pass a single value to wrapped() via vararg(...) +// CHECK-LABEL: @single_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]]) #[[ATTR9:[0-9]+]] +// CHECK-NEXT: ret void +// +void single_i32(i32 x) +{ + vararg(x); +} + +// CHECK-LABEL: @single_double( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(double noundef [[X:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void single_double(double x) +{ + vararg(x); +} + +// CHECK-LABEL: @single_v4f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(<4 x float> noundef [[X:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void single_v4f32(v4f32 x) +{ + vararg(x); +} + +// CHECK-LABEL: @single_v8f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(<8 x float> noundef [[X:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void single_v8f32(v8f32 x) +{ + vararg(x); +} + +// CHECK-LABEL: @single_v16f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(<16 x float> noundef [[X:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void single_v16f32(v16f32 x) +{ + vararg(x); +} + +// CHECK-LABEL: @single_v32f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128 +// CHECK-NEXT: [[X:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2:![0-9]+]] +// CHECK-NEXT: store <32 x float> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void single_v32f32(v32f32 x) +{ + vararg(x); +} + + + +// CHECK-LABEL: @i32_double( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], double noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void i32_double(i32 x, double y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @double_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(double noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void double_i32(double x, i32 y) +{ + vararg(x, y); +} + + +// A struct used by libc variadic tests + +typedef struct { + char c; + short s; + int i; + long l; + float f; + double d; +} libcS; + +// CHECK-LABEL: @i32_libcS( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void i32_libcS(i32 x, libcS y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @libcS_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(ptr noundef nonnull byval([[STRUCT_LIBCS:%.*]]) align 8 [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void libcS_i32(libcS x, i32 y) +{ + vararg(x, y); +} + + +// CHECK-LABEL: @i32_v4f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], <4 x float> noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void i32_v4f32(i32 x, v4f32 y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @v4f32_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(<4 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void v4f32_i32(v4f32 x, i32 y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @i32_v8f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], <8 x float> noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void i32_v8f32(i32 x, v8f32 y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @v8f32_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(<8 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void v8f32_i32(v8f32 x, i32 y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @i32_v16f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], <16 x float> noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void i32_v16f32(i32 x, v16f32 y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @v16f32_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void (...) @vararg(<16 x float> noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void v16f32_i32(v16f32 x, i32 y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @i32_v32f32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128 +// CHECK-NEXT: [[Y:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x float> [[Y]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void (...) @vararg(i32 noundef [[X:%.*]], ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void i32_v32f32(i32 x, v32f32 y) +{ + vararg(x, y); +} + +// CHECK-LABEL: @v32f32_i32( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[INDIRECT_ARG_TEMP:%.*]] = alloca <32 x float>, align 128 +// CHECK-NEXT: [[X:%.*]] = load <32 x float>, ptr [[TMP0:%.*]], align 128, !tbaa [[TBAA2]] +// CHECK-NEXT: store <32 x float> [[X]], ptr [[INDIRECT_ARG_TEMP]], align 128, !tbaa [[TBAA2]] +// CHECK-NEXT: tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 [[INDIRECT_ARG_TEMP]], i32 noundef [[Y:%.*]]) #[[ATTR9]] +// CHECK-NEXT: ret void +// +void v32f32_i32(v32f32 x, i32 y) +{ + vararg(x, y); +} diff --git a/clang/test/CodeGen/variadic-wrapper-removal.c b/clang/test/CodeGen/variadic-wrapper-removal.c new file mode 100644 index 0000000000000..da41dde16f3d7 --- /dev/null +++ b/clang/test/CodeGen/variadic-wrapper-removal.c @@ -0,0 +1,86 @@ +// REQUIRES: x86-registered-target +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s +// RUN: %clang_cc1 -triple=x86_64-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s + +// neither arm arch is implemented yet, leaving it here as a reminder +// armv6 is a ptr as far as the struct is concerned, but possibly also a [1 x i32] passed by value +// that seems insistent, maybe leave 32 bit arm alone for now +// aarch64 is a struct of five things passed using byval memcpy + +// R-N: %clang_cc1 -triple=armv6-none--eabi -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s + +// R-N: %clang_cc1 -triple=aarch64-none-linux-gnu -O1 -emit-llvm -o - %s | opt --passes=expand-variadics -S | FileCheck %s + + + +// expand-variadics rewrites calls to variadic functions into calls to +// equivalent functions that take a va_list argument. A property of the +// implementation is that said "equivalent function" may be a pre-existing one. +// This is equivalent to inlining a sufficiently simple variadic wrapper. + +#include + +typedef int FILE; // close enough for this test + +// fprintf is sometimes implemented as a call to vfprintf. That fits the +// pattern the transform pass recognises - given an implementation of fprintf +// in the IR module, calls to it can be rewritten into calls into vfprintf. + +// CHECK-LABEL: define{{.*}} i32 @fprintf( +// CHECK-LABEL: define{{.*}} i32 @call_fprintf( +// CHECK-NOT: @fprintf +// CHECK: @vfprintf +int vfprintf(FILE *restrict f, const char *restrict fmt, va_list ap); +int fprintf(FILE *restrict f, const char *restrict fmt, ...) +{ + int ret; + va_list ap; + va_start(ap, fmt); + ret = vfprintf(f, fmt, ap); + va_end(ap); + return ret; +} +int call_fprintf(FILE *f) +{ + int x = 42; + double y = 3.14; + return fprintf(f, "int %d dbl %g\n", x, y); +} + +// Void return type is also OK + +// CHECK-LABEL: define{{.*}} void @no_result( +// CHECK-LABEL: define{{.*}} void @call_no_result( +// CHECK-NOT: @no_result +// CHECK: @vno_result +void vno_result(const char * fmt, va_list); +void no_result(const char * fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vno_result(fmt, ap); + va_end(ap); +} +void call_no_result(FILE *f) +{ + int x = 101; + no_result("", x); +} + +// The vaend in the forwarding implementation is optional where it's a no-op + +// CHECK-LABEL: define{{.*}} i32 @no_vaend( +// CHECK-LABEL: define{{.*}} i32 @call_no_vaend( +// CHECK-NOT: @no_vaend +// CHECK: @vno_vaend +int vno_vaend(int x, va_list); +int no_vaend(int x, ...) +{ + va_list ap; + va_start(ap, x); + return vno_vaend(x, ap); +} +int call_no_vaend(int x) +{ + return no_vaend(x, 10, 2.5f); +} diff --git a/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp new file mode 100644 index 0000000000000..cf436ead77a2c --- /dev/null +++ b/clang/test/CodeGenCXX/inline-then-fold-variadics.cpp @@ -0,0 +1,117 @@ +// RUN: %clang_cc1 -triple i386-unknown-linux-gnu -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default" -S | FileCheck %s --check-prefixes=CHECK,X86Linux + +// RUN: %clang_cc1 -triple x86_64-linux-gnu -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default" -S | FileCheck %s --check-prefixes=CHECK,X64SystemV + +// RUN: %clang_cc1 -triple i386-apple-darwin -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default" -S | FileCheck %s --check-prefixes=CHECK,X86Darwin + +// RUN: %clang_cc1 -triple x86_64-apple-darwin -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default" -S | FileCheck %s --check-prefixes=CHECK,X64SystemV + +// RUN: %clang_cc1 -triple i686-windows-msvc -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default" -S | FileCheck %s --check-prefixes=CHECK,X86Windows + +// 64 bit windows va_arg passes most types indirectly but the call instruction uses the types by value +// ___: %clang_cc1 -triple x86_64-pc-windows-msvc -Wno-varargs -O1 -disable-llvm-passes -emit-llvm -o - %s | opt --passes=instcombine | opt -passes="expand-variadics,default" -S | FileCheck %s --check-prefixes=CHECK + +// Checks for consistency between clang and expand-variadics +// 1. Use clang to lower va_arg +// 2. Use expand-variadics to lower the rest of the variadic operations +// 3. Use opt -O1 to simplify the result for simpler filecheck patterns +// The simplification will fail when the two are not consistent, modulo bugs elsewhere. + +#include + +// This test can be simplified when expand-variadics is extended to apply to more patterns. +// The first_valist and second_valist functions can then be inlined, either in the test or +// by enabling optimisaton passes in the clang invocation. +// The explicit instcombine pass canonicalises the variadic function IR. + +// More complicated tests need instcombine of ptrmask to land first. + +template +static X first_valist(va_list va) { + return va_arg(va, X); +} + +template +static X first(...) { + va_list va; + __builtin_va_start(va, 0); + return first_valist(va); +} + +template +static Y second_valist(va_list va) { + va_arg(va, X); + Y r = va_arg(va, Y); + return r; +} + + +template +static Y second(...) { + va_list va; + __builtin_va_start(va, 0); + return second_valist(va); +} + +extern "C" +{ +// CHECK-LABEL: define{{.*}} i32 @first_i32_i32(i32{{.*}} %x, i32{{.*}} %y) +// CHECK: entry: +// CHECK: ret i32 %x +int first_i32_i32(int x, int y) +{ + return first(x, y); +} + +// CHECK-LABEL: define{{.*}} i32 @second_i32_i32(i32{{.*}} %x, i32{{.*}} %y) +// CHECK: entry: +// CHECK: ret i32 %y +int second_i32_i32(int x, int y) +{ + return second(x, y); +} +} + +// Permutations of an int and a double +extern "C" +{ +// CHECK-LABEL: define{{.*}} i32 @first_i32_f64(i32{{.*}} %x, double{{.*}} %y) +// CHECK: entry: +// CHECK: ret i32 %x +int first_i32_f64(int x, double y) +{ + return first(x, y); +} + +// CHECK-LABEL: define{{.*}} double @second_i32_f64(i32{{.*}} %x, double{{.*}} %y) +// CHECK: entry: + +// X86Linux: ret double %y +// X64SystemV: ret double %y +// X86Darwin: ret double %y +// X86Windows: [[TMP0:%.*]] = alloca <{ [4 x i8], double }>, align 4 +// X86Windows: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 4 +// X86Windows: store double %y, ptr [[TMP1]], align 4 +// X86Windows: [[TMP2:%.*]] = load double, ptr [[TMP0]], align 4 +// X86Windows: ret double [[TMP2]] +double second_i32_f64(int x, double y) +{ + return second(x, y); +} + +// CHECK-LABEL: define{{.*}} double @first_f64_i32(double{{.*}} %x, i32{{.*}} %y) +// CHECK: entry: +// CHECK: ret double %x +double first_f64_i32(double x, int y) +{ + return first(x, y); +} + +// CHECK-LABEL: define{{.*}} i32 @second_f64_i32(double{{.*}} %x, i32{{.*}} %y) +// CHECK: entry: +// CHECK: ret i32 %y +int second_f64_i32(double x, int y) +{ + return second(x, y); +} +} diff --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h index bbfb8a0dbe26a..fe3208df7a23b 100644 --- a/llvm/include/llvm/CodeGen/Passes.h +++ b/llvm/include/llvm/CodeGen/Passes.h @@ -600,6 +600,10 @@ namespace llvm { /// Lowers KCFI operand bundles for indirect calls. FunctionPass *createKCFIPass(); + + // Inline variadic functions and expand variadic intrinsics. + ModulePass *createExpandVariadicsPass(); + } // End llvm namespace #endif diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h index 3db639a687240..6487d0a5e26d1 100644 --- a/llvm/include/llvm/InitializePasses.h +++ b/llvm/include/llvm/InitializePasses.h @@ -106,6 +106,7 @@ void initializeExpandLargeDivRemLegacyPassPass(PassRegistry&); void initializeExpandMemCmpLegacyPassPass(PassRegistry &); void initializeExpandPostRAPass(PassRegistry&); void initializeExpandReductionsPass(PassRegistry&); +void initializeExpandVariadicsPass(PassRegistry &); void initializeExpandVectorPredicationPass(PassRegistry &); void initializeExternalAAWrapperPassPass(PassRegistry&); void initializeFEntryInserterPass(PassRegistry&); diff --git a/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h new file mode 100644 index 0000000000000..cfd37341f4d64 --- /dev/null +++ b/llvm/include/llvm/Transforms/IPO/ExpandVariadics.h @@ -0,0 +1,24 @@ +//===- ExpandVariadics.h - expand variadic functions ------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H +#define LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H + +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Module; + +class ExpandVariadicsPass : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); +}; + +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_IPO_EXPANDVARIADICS_H diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index c934ec42f6eb1..624fffd233ce5 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -131,6 +131,7 @@ #include "llvm/Transforms/IPO/DeadArgumentElimination.h" #include "llvm/Transforms/IPO/ElimAvailExtern.h" #include "llvm/Transforms/IPO/EmbedBitcodePass.h" +#include "llvm/Transforms/IPO/ExpandVariadics.h" #include "llvm/Transforms/IPO/ForceFunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/IPO/FunctionImport.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 44511800ccff8..4ea9493208315 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -59,6 +59,7 @@ MODULE_PASS("dot-callgraph", CallGraphDOTPrinterPass()) MODULE_PASS("dxil-upgrade", DXILUpgradePass()) MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) MODULE_PASS("extract-blocks", BlockExtractorPass({}, false)) +MODULE_PASS("expand-variadics", ExpandVariadicsPass()) MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) MODULE_PASS("function-import", FunctionImportPass()) MODULE_PASS("globalopt", GlobalOptPass()) diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt index 034f1587ae8df..b8bd0be91d223 100644 --- a/llvm/lib/Transforms/IPO/CMakeLists.txt +++ b/llvm/lib/Transforms/IPO/CMakeLists.txt @@ -12,6 +12,7 @@ add_llvm_component_library(LLVMipo DeadArgumentElimination.cpp ElimAvailExtern.cpp EmbedBitcodePass.cpp + ExpandVariadics.cpp ExtractGV.cpp ForceFunctionAttrs.cpp FunctionAttrs.cpp diff --git a/llvm/lib/Transforms/IPO/ExpandVariadics.cpp b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp new file mode 100644 index 0000000000000..623e7aad5ad21 --- /dev/null +++ b/llvm/lib/Transforms/IPO/ExpandVariadics.cpp @@ -0,0 +1,701 @@ +//===-- ExpandVariadicsPass.cpp --------------------------------*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is an optimisation pass for variadic functions. If called from codegen, +// it can serve as the implementation of variadic functions for a given target. +// +// The target-dependent parts are in namespace VariadicABIInfo. Enabling a new +// target means adding a case to VariadicABIInfo::create() along with tests. +// +// The module pass using that information is class ExpandVariadics. +// +// The strategy is: +// 1. Test whether a variadic function is sufficiently simple +// 2. If it was, calls to it can be replaced with calls to a different function +// 3. If it wasn't, try to split it into a simple function and a remainder +// 4. Optionally rewrite the varadic function calling convention as well +// +// This pass considers "sufficiently simple" to mean a variadic function that +// calls into a different function taking a va_list to do the real work. For +// example, libc might implement fprintf as a single basic block calling into +// vfprintf. This pass can then rewrite call to the variadic into some code +// to construct a target-specific value to use for the va_list and a call +// into the non-variadic implementation function. There's a test for that. +// +// Most other variadic functions whose definition is known can be converted into +// that form. Create a new internal function taking a va_list where the original +// took a ... parameter. Move the blocks across. Create a new block containing a +// va_start that calls into the new function. This is nearly target independent. +// +// Where this transform is consistent with the ABI, e.g. AMDGPU or NVPTX, or +// where the ABI can be chosen to align with this transform, the function +// interface can be rewritten along with calls to unknown variadic functions. +// +// The aggregate effect is to unblock other transforms, most critically the +// general purpose inliner. Known calls to variadic functions become zero cost. +// +// This pass does define some target specific information which is partially +// redundant with other parts of the compiler. In particular, the call frame +// it builds must be the exact complement of the va_arg lowering performed +// by clang. The va_list construction is similar to work done by the backend +// for targets that lower variadics there, though distinct in that this pass +// constructs the pieces using alloca instead of relative to stack pointers. +// +// Consistency with clang is primarily tested by emitting va_arg using clang +// then expanding the variadic functions using this pass, followed by trying +// to constant fold the functions to no-ops. +// +// Target specific behaviour is tested in IR - mainly checking that values are +// put into positions in call frames that make sense for that particular target. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/IPO/ExpandVariadics.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" +#include "llvm/InitializePasses.h" +#include "llvm/Pass.h" +#include "llvm/TargetParser/Triple.h" + +#define DEBUG_TYPE "expand-variadics" + +using namespace llvm; + +namespace { +namespace VariadicABIInfo { + +// calling convention for passing as valist object, same as it would be in C +// aarch64 uses byval +enum class ValistCc { value, pointer, /*byval*/ }; + +struct Interface { +protected: + Interface(uint32_t MinAlign, uint32_t MaxAlign) + : MinAlign(MinAlign), MaxAlign(MaxAlign) {} + +public: + virtual ~Interface() {} + const uint32_t MinAlign; + const uint32_t MaxAlign; + + // Most ABIs use a void* or char* for va_list, others can specialise + virtual Type *vaListType(LLVMContext &Ctx) { + return PointerType::getUnqual(Ctx); + } + + // Lots of targets use a void* pointed at a buffer for va_list. + // Some use more complicated iterator constructs. + // This interface seeks to express both. + // Ideally it would be a compile time error for a derived class + // to override only one of valistOnStack, initializeVAList. + + // How the vaListType is passed + virtual ValistCc valistCc() { return ValistCc::value; } + + // The valist might need to be stack allocated. + virtual bool valistOnStack() { return false; } + + virtual void initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder, + AllocaInst * /*va_list*/, Value * /*buffer*/) { + // Function needs to be implemented iff valist is on the stack. + assert(!valistOnStack()); + llvm_unreachable("Only called if valistOnStack() returns true"); + } + + // All targets currently implemented use a ptr for the valist parameter + Type *vaListParameterType(LLVMContext &Ctx) { + return PointerType::getUnqual(Ctx); + } + + bool vaEndIsNop() { return true; } + + bool vaCopyIsMemcpy() { return true; } +}; + +struct X64SystemV final : public Interface { + // X64 documented behaviour: + // Slots are at least eight byte aligned and at most 16 byte aligned. + // If the type needs more than sixteen byte alignment, it still only gets + // that much alignment on the stack. + // X64 behaviour in clang: + // Slots are at least eight byte aligned and at most naturally aligned + // This matches clang, not the ABI docs. + X64SystemV() : Interface(8, 0) {} + + Type *vaListType(LLVMContext &Ctx) override { + auto I32 = Type::getInt32Ty(Ctx); + auto Ptr = PointerType::getUnqual(Ctx); + return ArrayType::get(StructType::get(Ctx, {I32, I32, Ptr, Ptr}), 1); + } + ValistCc valistCc() override { return ValistCc::pointer; } + + bool valistOnStack() override { return true; } + + void initializeVAList(LLVMContext &Ctx, IRBuilder<> &Builder, + AllocaInst *VaList, Value *VoidBuffer) override { + assert(valistOnStack()); + assert(VaList != nullptr); + assert(VaList->getAllocatedType() == vaListType(Ctx)); + + Type *VaListTy = vaListType(Ctx); + + Type *I32 = Type::getInt32Ty(Ctx); + Type *I64 = Type::getInt64Ty(Ctx); + + Value *Idxs[3] = { + ConstantInt::get(I64, 0), + ConstantInt::get(I32, 0), + nullptr, + }; + + Idxs[2] = ConstantInt::get(I32, 0); + Builder.CreateStore( + ConstantInt::get(I32, 48), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "gp_offset")); + + Idxs[2] = ConstantInt::get(I32, 1); + Builder.CreateStore( + ConstantInt::get(I32, 6 * 8 + 8 * 16), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "fp_offset")); + + Idxs[2] = ConstantInt::get(I32, 2); + Builder.CreateStore( + VoidBuffer, + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "overfow_arg_area")); + + Idxs[2] = ConstantInt::get(I32, 3); + Builder.CreateStore( + ConstantPointerNull::get(PointerType::getUnqual(Ctx)), + Builder.CreateInBoundsGEP(VaListTy, VaList, Idxs, "reg_save_area")); + } +}; + +std::unique_ptr create(Module &M) { + llvm::Triple Triple(M.getTargetTriple()); + const bool IsLinuxABI = Triple.isOSLinux() || Triple.isOSCygMing(); + + switch (Triple.getArch()) { + case Triple::x86: { + // These seem to all fall out the same, despite getTypeStackAlign + // implying otherwise. + if (Triple.isOSDarwin()) { + struct X86Darwin final : public Interface { + // X86_32ABIInfo::getTypeStackAlignInBytes is misleading for this. + // The slotSize(4) implies a minimum alignment + // The AllowHigherAlign = true means there is no maximum alignment. + X86Darwin() : Interface(4, 0) {} + }; + + return std::make_unique(); + } + if (Triple.getOS() == llvm::Triple::Win32) { + struct X86Windows final : public Interface { + X86Windows() : Interface(4, 0) {} + }; + return std::make_unique(); + } + + if (IsLinuxABI) { + struct X86Linux final : public Interface { + X86Linux() : Interface(4, 0) {} + }; + return std::make_unique(); + } + break; + } + + case Triple::x86_64: { + if (Triple.isWindowsMSVCEnvironment() || Triple.isOSWindows()) { + struct X64Windows final : public Interface { + X64Windows() : Interface(8, 8) {} + }; + // x64 msvc emit vaarg passes > 8 byte values by pointer + // however the variadic call instruction created does not, e.g. + // a <4 x f32> will be passed as itself, not as a pointer or byval. + // Postponing resolution of that for now. + return nullptr; + } + + if (Triple.isOSDarwin()) { + return std::make_unique(); + } + + if (IsLinuxABI) { + return std::make_unique(); + } + + break; + } + + default: + return nullptr; + } + + return nullptr; +} + +} // namespace VariadicABIInfo + +class ExpandVariadics : public ModulePass { +public: + static char ID; + std::unique_ptr ABI; + + ExpandVariadics() : ModulePass(ID) {} + StringRef getPassName() const override { return "Expand variadic functions"; } + + // A predicate in that return nullptr means false + // Returns the function target to use when inlining on success + Function *isFunctionInlinable(Module &M, Function *F); + + // Rewrite a call site. + void expandCall(Module &M, CallInst *CB, Function *VarargF, Function *NF); + + // this could be partially target specific + bool expansionApplicableToFunction(Module &M, Function *F) { + if (F->isIntrinsic() || !F->isVarArg() || + F->hasFnAttribute(Attribute::Naked)) + return false; + + if (F->getCallingConv() != CallingConv::C) + return false; + + if (GlobalValue::isInterposableLinkage(F->getLinkage())) + return false; + + for (const Use &U : F->uses()) { + const auto *CB = dyn_cast(U.getUser()); + + if (!CB) + return false; + + if (CB->isMustTailCall()) { + return false; + } + + if (!CB->isCallee(&U) || CB->getFunctionType() != F->getFunctionType()) { + return false; + } + } + + // Branch funnels look like variadic functions but aren't: + // + // define hidden void @__typeid_typeid1_0_branch_funnel(ptr nest %0, ...) { + // musttail call void (...) @llvm.icall.branch.funnel(ptr %0, ptr @vt1_1, + // ptr @vf1_1, ...) ret void + // } + // + // %1 = call i32 @__typeid_typeid1_0_branch_funnel(ptr nest %vtable, ptr + // %obj, i32 1) + + // TODO: there should be a reasonable way to check for an intrinsic + // without inserting a prototype that then needs to be removed + Function *Funnel = + Intrinsic::getDeclaration(&M, Intrinsic::icall_branch_funnel); + for (const User *U : Funnel->users()) { + if (auto *I = dyn_cast(U)) { + if (F == I->getFunction()) { + return false; + } + } + } + if (Funnel->use_empty()) + Funnel->eraseFromParent(); + + return true; + } + + template + static BasicBlock::iterator + skipIfInstructionIsSpecificIntrinsic(BasicBlock::iterator Iter) { + if (auto *Intrinsic = dyn_cast(&*Iter)) + if (Intrinsic->getIntrinsicID() == ID) + Iter++; + return Iter; + } + + bool callinstRewritable(CallBase *CB, Function *NF) { + if (CallInst *CI = dyn_cast(CB)) + if (CI->isMustTailCall()) + return false; + + return true; + } + + bool runOnFunction(Module &M, Function *F) { + bool Changed = false; + + if (!expansionApplicableToFunction(M, F)) + return false; + + Function *Equivalent = isFunctionInlinable(M, F); + + if (!Equivalent) + return Changed; + + for (User *U : llvm::make_early_inc_range(F->users())) + if (CallInst *CB = dyn_cast(U)) { + Value *calledOperand = CB->getCalledOperand(); + if (F == calledOperand) { + expandCall(M, CB, F, Equivalent); + Changed = true; + } + } + + return Changed; + } + + bool runOnModule(Module &M) override { + ABI = VariadicABIInfo::create(M); + if (!ABI) + return false; + + bool Changed = false; + for (Function &F : llvm::make_early_inc_range(M)) { + Changed |= runOnFunction(M, &F); + } + + return Changed; + } +}; + +Function *ExpandVariadics::isFunctionInlinable(Module &M, Function *F) { + assert(F->isVarArg()); + assert(expansionApplicableToFunction(M, F)); + + if (F->isDeclaration()) + return nullptr; + + // A variadic function is inlinable if it is sufficiently simple. + // Specifically, if it is a single basic block which is functionally + // equivalent to packing the variadic arguments into a va_list which is + // passed to another function. The inlining strategy is to build a va_list + // in the caller and then call said inner function. + + // Single basic block. + BasicBlock &BB = F->getEntryBlock(); + if (!isa(BB.getTerminator())) + return nullptr; + + // Walk the block in order checking for specific instructions, some of them + // optional. + BasicBlock::iterator Iter = BB.begin(); + + AllocaInst *Alloca = dyn_cast(&*Iter++); + if (!Alloca) + return nullptr; + + Value *ValistArgument = Alloca; + + Iter = skipIfInstructionIsSpecificIntrinsic(Iter); + + VAStartInst *Start = dyn_cast(&*Iter++); + if (!Start || Start->getArgList() != ValistArgument) { + return nullptr; + } + + // The va_list instance is stack allocated + // The ... replacement is a va_list passed "by value" + // That involves a load for some ABIs and passing the pointer for others + Value *ValistTrailingArgument = nullptr; + switch (ABI->valistCc()) { + case VariadicABIInfo::ValistCc::value: { + // If it's being passed by value, need a load + // TODO: Check it's loading the right thing + auto *load = dyn_cast(&*Iter); + if (!load) + return nullptr; + ValistTrailingArgument = load; + Iter++; + break; + } + case VariadicABIInfo::ValistCc::pointer: { + // If it's being passed by pointer, going to use the alloca directly + ValistTrailingArgument = ValistArgument; + break; + } + } + + CallInst *Call = dyn_cast(&*Iter++); + if (!Call) + return nullptr; + + if (auto *end = dyn_cast(&*Iter)) { + if (end->getArgList() != ValistArgument) + return nullptr; + Iter++; + } else { + // Only fail on a missing va_end if it wasn't a no-op + if (!ABI->vaEndIsNop()) + return nullptr; + } + + Iter = skipIfInstructionIsSpecificIntrinsic(Iter); + + ReturnInst *Ret = dyn_cast(&*Iter++); + if (!Ret || Iter != BB.end()) + return nullptr; + + // The function call is expected to take the fixed arguments then the alloca + // TODO: Drop the vectors here, iterate over them both together instead. + SmallVector FuncArgs; + for (Argument &A : F->args()) + FuncArgs.push_back(&A); + + SmallVector CallArgs; + for (Use &A : Call->args()) + CallArgs.push_back(A); + + size_t Fixed = FuncArgs.size(); + if (Fixed + 1 != CallArgs.size()) + return nullptr; + + for (size_t i = 0; i < Fixed; i++) + if (FuncArgs[i] != CallArgs[i]) + return nullptr; + + if (CallArgs[Fixed] != ValistTrailingArgument) + return nullptr; + + // Check the varadic function returns the result of the inner call + Value *MaybeReturnValue = Ret->getReturnValue(); + if (Call->getType()->isVoidTy()) { + if (MaybeReturnValue != nullptr) + return nullptr; + } else { + if (MaybeReturnValue != Call) + return nullptr; + } + + // All checks passed. Found a va_list taking function we can use. + return Call->getCalledFunction(); +} + +void ExpandVariadics::expandCall(Module &M, CallInst *CB, Function *VarargF, + Function *NF) { + const DataLayout &DL = M.getDataLayout(); + + if (!callinstRewritable(CB, NF)) { + return; + } + + // This is something of a problem because the call instructions' idea of the + // function type doesn't necessarily match reality, before or after this + // pass + // Since the plan here is to build a new instruction there is no + // particular benefit to trying to preserve an incorrect initial type + // If the types don't match and we aren't changing ABI, leave it alone + // in case someone is deliberately doing dubious type punning through a + // varargs + FunctionType *FuncType = CB->getFunctionType(); + if (FuncType != VarargF->getFunctionType()) { + return; + } + + auto &Ctx = CB->getContext(); + + // Align the struct on ABI->MinAlign to start with + Align MaxFieldAlign(ABI->MinAlign ? ABI->MinAlign : 1); + + // The strategy here is to allocate a call frame containing the variadic + // arguments laid out such that a target specific va_list can be initialised + // with it, such that target specific va_arg instructions will correctly + // iterate over it. Primarily this means getting the alignment right. + + class { + // The awkward memory layout is to allow access to a contiguous array of + // types + enum { N = 4 }; + SmallVector FieldTypes; + SmallVector, N> maybeValueIsByval; + + public: + void append(Type *T, Value *V, bool IsByVal) { + FieldTypes.push_back(T); + maybeValueIsByval.push_back({V, IsByVal}); + } + + void padding(LLVMContext &Ctx, uint64_t By) { + append(ArrayType::get(Type::getInt8Ty(Ctx), By), nullptr, false); + } + + size_t size() { return FieldTypes.size(); } + bool empty() { return FieldTypes.empty(); } + + StructType *asStruct(LLVMContext &Ctx, StringRef Name) { + const bool IsPacked = true; + return StructType::create(Ctx, FieldTypes, + (Twine(Name) + ".vararg").str(), IsPacked); + } + + void initialiseStructAlloca(const DataLayout &DL, IRBuilder<> &Builder, + AllocaInst *Alloced) { + + StructType *VarargsTy = cast(Alloced->getAllocatedType()); + + for (size_t i = 0; i < size(); i++) { + auto [v, IsByVal] = maybeValueIsByval[i]; + if (!v) + continue; + + auto r = Builder.CreateStructGEP(VarargsTy, Alloced, i); + if (IsByVal) { + Type *ByValType = FieldTypes[i]; + Builder.CreateMemCpy(r, {}, v, {}, + DL.getTypeAllocSize(ByValType).getFixedValue()); + } else { + Builder.CreateStore(v, r); + } + } + } + } Frame; + + uint64_t CurrentOffset = 0; + for (unsigned I = FuncType->getNumParams(), E = CB->arg_size(); I < E; ++I) { + Value *ArgVal = CB->getArgOperand(I); + bool IsByVal = CB->paramHasAttr(I, Attribute::ByVal); + Type *ArgType = IsByVal ? CB->getParamByValType(I) : ArgVal->getType(); + Align DataAlign = DL.getABITypeAlign(ArgType); + + uint64_t DataAlignV = DataAlign.value(); + + // Currently using 0 as a sentinel to mean ignored + if (ABI->MinAlign && DataAlignV < ABI->MinAlign) + DataAlignV = ABI->MinAlign; + if (ABI->MaxAlign && DataAlignV > ABI->MaxAlign) + DataAlignV = ABI->MaxAlign; + + DataAlign = Align(DataAlignV); + MaxFieldAlign = std::max(MaxFieldAlign, DataAlign); + + if (uint64_t Rem = CurrentOffset % DataAlignV) { + // Inject explicit padding to deal with alignment requirements + uint64_t Padding = DataAlignV - Rem; + Frame.padding(Ctx, Padding); + CurrentOffset += Padding; + } + + Frame.append(ArgType, ArgVal, IsByVal); + CurrentOffset += DL.getTypeAllocSize(ArgType).getFixedValue(); + } + + if (Frame.empty()) { + // Not passing anything, hopefully va_arg won't try to dereference it + // Might be a target specific thing whether one can pass nullptr instead + // of undef i32 + Frame.append(Type::getInt32Ty(Ctx), nullptr, false); + } + + Function *CBF = CB->getParent()->getParent(); + + StructType *VarargsTy = Frame.asStruct(Ctx, CBF->getName()); + + BasicBlock &BB = CBF->getEntryBlock(); + IRBuilder<> Builder(&*BB.getFirstInsertionPt()); + + // Clumsy call here is to set a specific alignment on the struct instance + AllocaInst *Alloced = + Builder.Insert(new AllocaInst(VarargsTy, DL.getAllocaAddrSpace(), nullptr, + MaxFieldAlign), + "vararg_buffer"); + assert(Alloced->getAllocatedType() == VarargsTy); + + // Initialise the fields in the struct + // TODO: Lifetime annotate it and alloca in entry + // Needs to start life shortly before these copies and end immediately after + // the new call instruction + Builder.SetInsertPoint(CB); + + Frame.initialiseStructAlloca(DL, Builder, Alloced); + + unsigned NumArgs = FuncType->getNumParams(); + + SmallVector Args; + Args.assign(CB->arg_begin(), CB->arg_begin() + NumArgs); + + // Initialise a va_list pointing to that struct and pass it as the last + // argument + { + PointerType *Voidptr = PointerType::getUnqual(Ctx); + Value *VoidBuffer = + Builder.CreatePointerBitCastOrAddrSpaceCast(Alloced, Voidptr); + + if (ABI->valistOnStack()) { + assert(ABI->valistCc() == VariadicABIInfo::ValistCc::pointer); + Type *VaListTy = ABI->vaListType(Ctx); + + // TODO: one va_list alloca per function, also lifetime annotate + AllocaInst *VaList = Builder.CreateAlloca(VaListTy, nullptr, "va_list"); + + ABI->initializeVAList(Ctx, Builder, VaList, VoidBuffer); + Args.push_back(VaList); + } else { + assert(ABI->valistCc() == VariadicABIInfo::ValistCc::value); + Args.push_back(VoidBuffer); + } + } + + // Attributes excluding any on the vararg arguments + AttributeList PAL = CB->getAttributes(); + if (!PAL.isEmpty()) { + SmallVector ArgAttrs; + for (unsigned ArgNo = 0; ArgNo < NumArgs; ArgNo++) + ArgAttrs.push_back(PAL.getParamAttrs(ArgNo)); + PAL = + AttributeList::get(Ctx, PAL.getFnAttrs(), PAL.getRetAttrs(), ArgAttrs); + } + + SmallVector OpBundles; + CB->getOperandBundlesAsDefs(OpBundles); + + CallInst *NewCB = CallInst::Create(NF, Args, OpBundles, "", CB); + + CallInst::TailCallKind TCK = cast(CB)->getTailCallKind(); + assert(TCK != CallInst::TCK_MustTail); // guarded at prologue + + // It doesn't get to be a tail call any more + // might want to guard this with arch, x64 and aarch64 document that + // varargs can't be tail called anyway + // Not totally convinced this is necessary but dead store elimination + // decides to discard the stores to the alloca and pass uninitialised + // memory along instead when the function is marked tailcall + if (TCK == CallInst::TCK_Tail) { + TCK = CallInst::TCK_None; + } + NewCB->setTailCallKind(TCK); + + NewCB->setAttributes(PAL); + NewCB->takeName(CB); + NewCB->setCallingConv(CB->getCallingConv()); + NewCB->copyMetadata(*CB, {LLVMContext::MD_prof, LLVMContext::MD_dbg}); + + if (!CB->use_empty()) // dead branch? + { + CB->replaceAllUsesWith(NewCB); + } + CB->eraseFromParent(); +} + +} // namespace + +char ExpandVariadics::ID = 0; + +INITIALIZE_PASS(ExpandVariadics, DEBUG_TYPE, "Expand variadic functions", false, + false) + +ModulePass *llvm::createExpandVariadicsPass() { return new ExpandVariadics(); } + +PreservedAnalyses ExpandVariadicsPass::run(Module &M, ModuleAnalysisManager &) { + return ExpandVariadics().runOnModule(M) ? PreservedAnalyses::none() + : PreservedAnalyses::all(); +} diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-i386-darwin.ll b/llvm/test/CodeGen/X86/expand-variadic-call-i386-darwin.ll new file mode 100644 index 0000000000000..38d2be77c23f2 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-variadic-call-i386-darwin.ll @@ -0,0 +1,387 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s +target datalayout = "e-m:o-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:128-n8:16:32-S128" +target triple = "i386-apple-macosx10.4.0" + +; Check the variables are lowered to the locations this target expects + +; The types show the call frames +; CHECK: %single_i32.vararg = type <{ i32 }> +; CHECK: %single_double.vararg = type <{ double }> +; CHECK: %single_v4f32.vararg = type <{ <4 x float> }> +; CHECK: %single_v8f32.vararg = type <{ <8 x float> }> +; CHECK: %single_v16f32.vararg = type <{ <16 x float> }> +; CHECK: %single_v32f32.vararg = type <{ <32 x float> }> +; CHECK: %i32_double.vararg = type <{ i32, double }> +; CHECK: %double_i32.vararg = type <{ double, i32 }> +; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }> +; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }> +; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }> +; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }> +; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }> +; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }> +; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }> +; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }> + +%struct.libcS = type { i8, i16, i32, i32, float, double } + +define void @codegen_for_copy(ptr noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %x.addr = alloca ptr, align 4 +; CHECK-NEXT: %cp = alloca ptr, align 4 +; CHECK-NEXT: store ptr %x, ptr %x.addr, align 4, !tbaa !4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #9 +; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr) +; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4, !tbaa !4 +; CHECK-NEXT: call void @wrapped(ptr noundef %0) #10 +; CHECK-NEXT: call void @llvm.va_end(ptr %cp) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #9 +; CHECK-NEXT: ret void +; +entry: + %x.addr = alloca ptr, align 4 + %cp = alloca ptr, align 4 + store ptr %x, ptr %x.addr, align 4, !tbaa !5 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #8 + call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr) + %0 = load ptr, ptr %cp, align 4, !tbaa !5 + call void @wrapped(ptr noundef %0) #9 + call void @llvm.va_end(ptr %cp) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #8 + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare void @llvm.va_copy(ptr, ptr) #2 + +declare void @wrapped(ptr noundef) #3 + +declare void @llvm.va_end(ptr) #2 + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +define void @vararg(...) #0 { +; CHECK-LABEL: define {{[^@]+}}@vararg(...) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #9 +; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va) +; CHECK-NEXT: %0 = load ptr, ptr %va, align 4, !tbaa !4 +; CHECK-NEXT: call void @wrapped(ptr noundef %0) #10 +; CHECK-NEXT: call void @llvm.va_end(ptr %va) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #9 +; CHECK-NEXT: ret void +; +entry: + %va = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #8 + call void @llvm.va_start(ptr nonnull %va) + %0 = load ptr, ptr %va, align 4, !tbaa !5 + call void @wrapped(ptr noundef %0) #9 + call void @llvm.va_end(ptr %va) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #8 + ret void +} + +declare void @llvm.va_start(ptr) #2 + +define void @single_i32(i32 noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x) #10 + ret void +} + +define void @single_double(double noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x) #10 + ret void +} + +define void @single_v4f32(<4 x float> noundef %x) #4 { +; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x) #10 + ret void +} + +define void @single_v8f32(<8 x float> noundef %x) #5 { +; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> noundef %x) #10 + ret void +} + +define void @single_v16f32(<16 x float> noundef %x) #6 { +; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> noundef %x) #10 + ret void +} + +define void @single_v32f32(<32 x float> noundef %x) #7 { +; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) #7 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<32 x float> noundef %x) #10 + ret void +} + +define void @i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store double %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, double noundef %y) #10 + ret void +} + +define void @double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x, i32 noundef %y) #10 + ret void +} + +define void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 4 %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 4 %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %y, i64 24, i1 false) +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 4 %y) #10 + ret void +} + +define void @libcS_i32(ptr noundef byval(%struct.libcS) align 4 %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 4 %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %x, i64 24, i1 false) +; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 4 %x, i32 noundef %y) #10 + ret void +} + +define void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) #10 + ret void +} + +define void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) #10 + ret void +} + +define void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) #5 { +; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y) #10 + ret void +} + +define void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) #5 { +; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32 +; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y) #10 + ret void +} + +define void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) #6 { +; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y) #10 + ret void +} + +define void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) #6 { +; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64 +; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y) #10 + ret void +} + +define void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) #7 { +; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) #7 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y) #10 + ret void +} + +define void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) #7 { +; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) #7 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128 +; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128 +; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y) #10 + ret void +} + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn } +attributes #3 = { "frame-pointer"="all" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" } +attributes #5 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="256" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" } +attributes #6 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" } +attributes #7 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="1024" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="yonah" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+sse3,+x87" "tune-cpu"="generic" } +attributes #8 = { nounwind } +attributes #9 = { nobuiltin nounwind "no-builtins" } +attributes #10 = { nobuiltin "no-builtins" } + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"frame-pointer", i32 2} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-i386-linux.ll b/llvm/test/CodeGen/X86/expand-variadic-call-i386-linux.ll new file mode 100644 index 0000000000000..cd9c42e8a4bd9 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-variadic-call-i386-linux.ll @@ -0,0 +1,387 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s +target datalayout = "e-m:e-p:32:32-p270:32:32-p271:32:32-p272:64:64-i128:128-f64:32:64-f80:32-n8:16:32-S128" +target triple = "i386-unknown-linux-gnu" + +; Check the variables are lowered to the locations this target expects + +; The types show the call frames +; CHECK: %single_i32.vararg = type <{ i32 }> +; CHECK: %single_double.vararg = type <{ double }> +; CHECK: %single_v4f32.vararg = type <{ <4 x float> }> +; CHECK: %single_v8f32.vararg = type <{ <8 x float> }> +; CHECK: %single_v16f32.vararg = type <{ <16 x float> }> +; CHECK: %single_v32f32.vararg = type <{ <32 x float> }> +; CHECK: %i32_double.vararg = type <{ i32, double }> +; CHECK: %double_i32.vararg = type <{ double, i32 }> +; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }> +; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }> +; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }> +; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }> +; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }> +; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }> +; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }> +; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }> + +%struct.libcS = type { i8, i16, i32, i32, float, double } + +define void @codegen_for_copy(ptr noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %x.addr = alloca ptr, align 4 +; CHECK-NEXT: %cp = alloca ptr, align 4 +; CHECK-NEXT: store ptr %x, ptr %x.addr, align 4, !tbaa !4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #9 +; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr) +; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4, !tbaa !4 +; CHECK-NEXT: call void @wrapped(ptr noundef %0) #10 +; CHECK-NEXT: call void @llvm.va_end(ptr %cp) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #9 +; CHECK-NEXT: ret void +; +entry: + %x.addr = alloca ptr, align 4 + %cp = alloca ptr, align 4 + store ptr %x, ptr %x.addr, align 4, !tbaa !5 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #8 + call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr) + %0 = load ptr, ptr %cp, align 4, !tbaa !5 + call void @wrapped(ptr noundef %0) #9 + call void @llvm.va_end(ptr %cp) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #8 + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare void @llvm.va_copy(ptr, ptr) #2 + +declare void @wrapped(ptr noundef) #3 + +declare void @llvm.va_end(ptr) #2 + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +define void @vararg(...) #0 { +; CHECK-LABEL: define {{[^@]+}}@vararg(...) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #9 +; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va) +; CHECK-NEXT: %0 = load ptr, ptr %va, align 4, !tbaa !4 +; CHECK-NEXT: call void @wrapped(ptr noundef %0) #10 +; CHECK-NEXT: call void @llvm.va_end(ptr %va) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #9 +; CHECK-NEXT: ret void +; +entry: + %va = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #8 + call void @llvm.va_start(ptr nonnull %va) + %0 = load ptr, ptr %va, align 4, !tbaa !5 + call void @wrapped(ptr noundef %0) #9 + call void @llvm.va_end(ptr %va) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #8 + ret void +} + +declare void @llvm.va_start(ptr) #2 + +define void @single_i32(i32 noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x) #10 + ret void +} + +define void @single_double(double noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x) #10 + ret void +} + +define void @single_v4f32(<4 x float> noundef %x) #4 { +; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x) #10 + ret void +} + +define void @single_v8f32(<8 x float> noundef %x) #5 { +; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> noundef %x) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> noundef %x) #10 + ret void +} + +define void @single_v16f32(<16 x float> noundef %x) #6 { +; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> noundef %x) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> noundef %x) #10 + ret void +} + +define void @single_v32f32(<32 x float> noundef %x) #7 { +; CHECK-LABEL: define {{[^@]+}}@single_v32f32(<32 x float> noundef %x) #7 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<32 x float> noundef %x) #10 + ret void +} + +define void @i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store double %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, double noundef %y) #10 + ret void +} + +define void @double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x, i32 noundef %y) #10 + ret void +} + +define void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 4 %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 4 %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %y, i64 24, i1 false) +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 4 %y) #10 + ret void +} + +define void @libcS_i32(ptr noundef byval(%struct.libcS) align 4 %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 4 %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %x, i64 24, i1 false) +; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 4 %x, i32 noundef %y) #10 + ret void +} + +define void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) #10 + ret void +} + +define void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) #10 + ret void +} + +define void @i32_v8f32(i32 noundef %x, <8 x float> noundef %y) #5 { +; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> noundef %y) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <8 x float> noundef %y) #10 + ret void +} + +define void @v8f32_i32(<8 x float> noundef %x, i32 noundef %y) #5 { +; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> noundef %x, i32 noundef %y) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32 +; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> noundef %x, i32 noundef %y) #10 + ret void +} + +define void @i32_v16f32(i32 noundef %x, <16 x float> noundef %y) #6 { +; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> noundef %y) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <16 x float> noundef %y) #10 + ret void +} + +define void @v16f32_i32(<16 x float> noundef %x, i32 noundef %y) #6 { +; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> noundef %x, i32 noundef %y) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64 +; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> noundef %x, i32 noundef %y) #10 + ret void +} + +define void @i32_v32f32(i32 noundef %x, <32 x float> noundef %y) #7 { +; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, <32 x float> noundef %y) #7 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <32 x float> %y, ptr %1, align 128 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <32 x float> noundef %y) #10 + ret void +} + +define void @v32f32_i32(<32 x float> noundef %x, i32 noundef %y) #7 { +; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(<32 x float> noundef %x, i32 noundef %y) #7 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128 +; CHECK-NEXT: %0 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <32 x float> %x, ptr %0, align 128 +; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #11 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<32 x float> noundef %x, i32 noundef %y) #10 + ret void +} + +attributes #0 = { nounwind "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn } +attributes #3 = { "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #5 = { nounwind "min-legal-vector-width"="256" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { nounwind "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #7 = { nounwind "min-legal-vector-width"="1024" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #8 = { nounwind } +attributes #9 = { nobuiltin nounwind "no-builtins" } +attributes #10 = { nobuiltin "no-builtins" } + +!llvm.module.flags = !{!0, !1, !2, !3} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"PIE Level", i32 2} +!5 = !{!6, !6, i64 0} +!6 = !{!"any pointer", !7, i64 0} +!7 = !{!"omnipotent char", !8, i64 0} +!8 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-i686-msvc.ll b/llvm/test/CodeGen/X86/expand-variadic-call-i686-msvc.ll new file mode 100644 index 0000000000000..48ffbb54868be --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-variadic-call-i686-msvc.ll @@ -0,0 +1,404 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s +target datalayout = "e-m:x-p:32:32-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32-a:0:32-S32" +target triple = "i686-unknown-windows-msvc19.33.0" + +; Check the variables are lowered to the locations this target expects + +; The types show the call frames +; CHECK: %single_i32.vararg = type <{ i32 }> +; CHECK: %single_double.vararg = type <{ double }> +; CHECK: %single_v4f32.vararg = type <{ <4 x float> }> +; CHECK: %single_v8f32.vararg = type <{ <8 x float> }> +; CHECK: %single_v16f32.vararg = type <{ <16 x float> }> +; CHECK: %single_v32f32.vararg = type <{ ptr }> +; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }> +; CHECK: %double_i32.vararg = type <{ double, i32 }> +; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }> +; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }> +; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }> +; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }> +; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }> +; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }> +; CHECK: %i32_v32f32.vararg = type <{ i32, ptr }> +; CHECK: %v32f32_i32.vararg = type <{ ptr, i32 }> + +%struct.libcS = type { i8, i16, i32, i32, float, double } + +define void @codegen_for_copy(ptr noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %x.addr = alloca ptr, align 4 +; CHECK-NEXT: %cp = alloca ptr, align 4 +; CHECK-NEXT: store ptr %x, ptr %x.addr, align 4, !tbaa !3 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #8 +; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr) +; CHECK-NEXT: %0 = load ptr, ptr %cp, align 4, !tbaa !3 +; CHECK-NEXT: call void @wrapped(ptr noundef %0) #9 +; CHECK-NEXT: call void @llvm.va_end(ptr %cp) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #8 +; CHECK-NEXT: ret void +; +entry: + %x.addr = alloca ptr, align 4 + %cp = alloca ptr, align 4 + store ptr %x, ptr %x.addr, align 4, !tbaa !4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %cp) #7 + call void @llvm.va_copy(ptr nonnull %cp, ptr nonnull %x.addr) + %0 = load ptr, ptr %cp, align 4, !tbaa !4 + call void @wrapped(ptr noundef %0) #8 + call void @llvm.va_end(ptr %cp) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %cp) #7 + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare void @llvm.va_copy(ptr, ptr) #2 + +declare void @wrapped(ptr noundef) #3 + +declare void @llvm.va_end(ptr) #2 + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +define void @vararg(...) #0 { +; CHECK-LABEL: define {{[^@]+}}@vararg(...) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va = alloca ptr, align 4 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #8 +; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va) +; CHECK-NEXT: %0 = load ptr, ptr %va, align 4, !tbaa !3 +; CHECK-NEXT: call void @wrapped(ptr noundef %0) #9 +; CHECK-NEXT: call void @llvm.va_end(ptr %va) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #8 +; CHECK-NEXT: ret void +; +entry: + %va = alloca ptr, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %va) #7 + call void @llvm.va_start(ptr nonnull %va) + %0 = load ptr, ptr %va, align 4, !tbaa !4 + call void @wrapped(ptr noundef %0) #8 + call void @llvm.va_end(ptr %va) + call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %va) #7 + ret void +} + +declare void @llvm.va_start(ptr) #2 + +define void @single_i32(i32 noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 4 +; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x) #9 + ret void +} + +define void @single_double(double noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 8 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x) #9 + ret void +} + +define void @single_v4f32(<4 x float> inreg noundef %x) #4 { +; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> inreg noundef %x) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> inreg noundef %x) #9 + ret void +} + +define void @single_v8f32(<8 x float> inreg noundef %x) #5 { +; CHECK-LABEL: define {{[^@]+}}@single_v8f32(<8 x float> inreg noundef %x) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> inreg noundef %x) #9 + ret void +} + +define void @single_v16f32(<16 x float> inreg noundef %x) #6 { +; CHECK-LABEL: define {{[^@]+}}@single_v16f32(<16 x float> inreg noundef %x) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> inreg noundef %x) #9 + ret void +} + +define void @single_v32f32(ptr nocapture noundef readonly %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_v32f32(ptr nocapture noundef readonly %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 4 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128 +; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 128, !tbaa !7 +; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !7 +; CHECK-NEXT: %1 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store ptr %indirect-arg-temp, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 128 + %x = load <32 x float>, ptr %0, align 128, !tbaa !8 + store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !8 + call void (...) @vararg(ptr noundef nonnull %indirect-arg-temp) #9 + ret void +} + +define void @i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store double %y, ptr %1, align 8 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, double noundef %y) #9 + ret void +} + +define void @double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 8 +; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x, i32 noundef %y) #9 + ret void +} + +define void @i32_libcS(i32 noundef %x, ptr nocapture noundef readonly byval(%struct.libcS) align 4 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr nocapture noundef readonly byval(%struct.libcS) align 4 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 8 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %0, i64 24, i1 false) +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 4 %0) #9 + ret void +} + +define void @libcS_i32(ptr nocapture noundef readonly byval(%struct.libcS) align 4 %0, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr nocapture noundef readonly byval(%struct.libcS) align 4 %0, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 8 +; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %0, i64 24, i1 false) +; CHECK-NEXT: %2 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %2, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 4 %0, i32 noundef %y) #9 + ret void +} + +define void @i32_v4f32(i32 noundef %x, <4 x float> inreg noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> inreg noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <4 x float> inreg noundef %y) #9 + ret void +} + +define void @v4f32_i32(<4 x float> inreg noundef %x, i32 noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> inreg noundef %x, i32 noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> inreg noundef %x, i32 noundef %y) #9 + ret void +} + +define void @i32_v8f32(i32 noundef %x, <8 x float> inreg noundef %y) #5 { +; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, <8 x float> inreg noundef %y) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <8 x float> %y, ptr %1, align 32 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <8 x float> inreg noundef %y) #9 + ret void +} + +define void @v8f32_i32(<8 x float> inreg noundef %x, i32 noundef %y) #5 { +; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(<8 x float> inreg noundef %x, i32 noundef %y) #5 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32 +; CHECK-NEXT: %0 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <8 x float> %x, ptr %0, align 32 +; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<8 x float> inreg noundef %x, i32 noundef %y) #9 + ret void +} + +define void @i32_v16f32(i32 noundef %x, <16 x float> inreg noundef %y) #6 { +; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, <16 x float> inreg noundef %y) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <16 x float> %y, ptr %1, align 64 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <16 x float> inreg noundef %y) #9 + ret void +} + +define void @v16f32_i32(<16 x float> inreg noundef %x, i32 noundef %y) #6 { +; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(<16 x float> inreg noundef %x, i32 noundef %y) #6 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64 +; CHECK-NEXT: %0 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <16 x float> %x, ptr %0, align 64 +; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<16 x float> inreg noundef %x, i32 noundef %y) #9 + ret void +} + +define void @i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 4 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128 +; CHECK-NEXT: %y = load <32 x float>, ptr %0, align 128, !tbaa !7 +; CHECK-NEXT: store <32 x float> %y, ptr %indirect-arg-temp, align 128, !tbaa !7 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store ptr %indirect-arg-temp, ptr %2, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 128 + %y = load <32 x float>, ptr %0, align 128, !tbaa !8 + store <32 x float> %y, ptr %indirect-arg-temp, align 128, !tbaa !8 + call void (...) @vararg(i32 noundef %x, ptr noundef nonnull %indirect-arg-temp) #9 + ret void +} + +define void @v32f32_i32(ptr nocapture noundef readonly %0, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(ptr nocapture noundef readonly %0, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 4 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128 +; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 128, !tbaa !7 +; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !7 +; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store ptr %indirect-arg-temp, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %2, align 4 +; CHECK-NEXT: call void @wrapped(ptr %vararg_buffer) #10 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 128 + %x = load <32 x float>, ptr %0, align 128, !tbaa !8 + store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !8 + call void (...) @vararg(ptr noundef nonnull %indirect-arg-temp, i32 noundef %y) #9 + ret void +} + +attributes #0 = { nounwind "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn } +attributes #3 = { "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #5 = { nounwind "min-legal-vector-width"="256" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #6 = { nounwind "min-legal-vector-width"="512" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pentium4" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #7 = { nounwind } +attributes #8 = { nobuiltin nounwind "no-builtins" } +attributes #9 = { nobuiltin "no-builtins" } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"NumRegisterParameters", i32 0} +!1 = !{i32 1, !"wchar_size", i32 2} +!2 = !{i32 1, !"MaxTLSAlign", i32 65536} +!4 = !{!5, !5, i64 0} +!5 = !{!"any pointer", !6, i64 0} +!6 = !{!"omnipotent char", !7, i64 0} +!7 = !{!"Simple C/C++ TBAA"} +!8 = !{!6, !6, i64 0} diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-x64-darwin.ll b/llvm/test/CodeGen/X86/expand-variadic-call-x64-darwin.ll new file mode 100644 index 0000000000000..19e9545df000b --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-variadic-call-x64-darwin.ll @@ -0,0 +1,591 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s +target datalayout = "e-m:o-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.4.0" + +; Check the variables are lowered to the locations this target expects + +; The types show the call frames +; CHECK: %single_i32.vararg = type <{ i32 }> +; CHECK: %single_double.vararg = type <{ double }> +; CHECK: %single_v4f32.vararg = type <{ <4 x float> }> +; CHECK: %single_v8f32.vararg = type <{ <8 x float> }> +; CHECK: %single_v16f32.vararg = type <{ <16 x float> }> +; CHECK: %single_v32f32.vararg = type <{ <32 x float> }> +; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }> +; CHECK: %double_i32.vararg = type <{ double, i32 }> +; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }> +; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }> +; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }> +; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }> +; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }> +; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }> +; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }> +; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }> + +%struct.__va_list_tag = type { i32, i32, ptr, ptr } +%struct.libcS = type { i8, i16, i32, i64, float, double } + +define void @codegen_for_copy(ptr noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %cp = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp) #6 +; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr %x) +; CHECK-NEXT: call void @wrapped(ptr noundef nonnull %cp) #7 +; CHECK-NEXT: call void @llvm.va_end(ptr %cp) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp) #6 +; CHECK-NEXT: ret void +; +entry: + %cp = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp) #5 + call void @llvm.va_copy(ptr nonnull %cp, ptr %x) + call void @wrapped(ptr noundef nonnull %cp) #6 + call void @llvm.va_end(ptr %cp) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp) #5 + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare void @llvm.va_copy(ptr, ptr) #2 + +declare void @wrapped(ptr noundef) #3 + +declare void @llvm.va_end(ptr) #2 + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +define void @vararg(...) #0 { +; CHECK-LABEL: define {{[^@]+}}@vararg(...) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %va) #6 +; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va) +; CHECK-NEXT: call void @wrapped(ptr noundef nonnull %va) #7 +; CHECK-NEXT: call void @llvm.va_end(ptr %va) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %va) #6 +; CHECK-NEXT: ret void +; +entry: + %va = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %va) #5 + call void @llvm.va_start(ptr nonnull %va) + call void @wrapped(ptr noundef nonnull %va) #6 + call void @llvm.va_end(ptr %va) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %va) #5 + ret void +} + +declare void @llvm.va_start(ptr) #2 + +define void @single_i32(i32 noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x) #7 + ret void +} + +define void @single_double(double noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 8 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x) #7 + ret void +} + +define void @single_v4f32(<4 x float> noundef %x) #4 { +; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x) #7 + ret void +} + +define void @single_v8f32(ptr nocapture noundef readonly byval(<8 x float>) align 16 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_v8f32(ptr nocapture noundef readonly byval(<8 x float>) align 16 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32 +; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 16 +; CHECK-NEXT: %x = load <8 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <8 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 32, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <8 x float>, align 16 + %x = load <8 x float>, ptr %0, align 16, !tbaa !4 + store <8 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<8 x float>) align 16 %indirect-arg-temp) #7 + ret void +} + +define void @single_v16f32(ptr nocapture noundef readonly byval(<16 x float>) align 16 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_v16f32(ptr nocapture noundef readonly byval(<16 x float>) align 16 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64 +; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 16 +; CHECK-NEXT: %x = load <16 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <16 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 64, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <16 x float>, align 16 + %x = load <16 x float>, ptr %0, align 16, !tbaa !4 + store <16 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<16 x float>) align 16 %indirect-arg-temp) #7 + ret void +} + +define void @single_v32f32(ptr nocapture noundef readonly byval(<32 x float>) align 16 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_v32f32(ptr nocapture noundef readonly byval(<32 x float>) align 16 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 16 +; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 128, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 16 + %x = load <32 x float>, ptr %0, align 16, !tbaa !4 + store <32 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 16 %indirect-arg-temp) #7 + ret void +} + +define void @i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store double %y, ptr %1, align 8 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, double noundef %y) #7 + ret void +} + +define void @double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 8 +; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x, i32 noundef %y) #7 + ret void +} + +define void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %y, i64 32, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y) #7 + ret void +} + +define void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %x, i64 32, i1 false) +; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y) #7 + ret void +} + +define void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) #7 + ret void +} + +define void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) #7 + ret void +} + +define void @i32_v8f32(i32 noundef %x, ptr nocapture noundef readonly byval(<8 x float>) align 16 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, ptr nocapture noundef readonly byval(<8 x float>) align 16 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32 +; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 16 +; CHECK-NEXT: %y = load <8 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <8 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 32, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <8 x float>, align 16 + %y = load <8 x float>, ptr %0, align 16, !tbaa !4 + store <8 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<8 x float>) align 16 %indirect-arg-temp) #7 + ret void +} + +define void @v8f32_i32(ptr nocapture noundef readonly byval(<8 x float>) align 16 %0, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(ptr nocapture noundef readonly byval(<8 x float>) align 16 %0, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32 +; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 16 +; CHECK-NEXT: %x = load <8 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <8 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 32, i1 false) +; CHECK-NEXT: %2 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %2, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <8 x float>, align 16 + %x = load <8 x float>, ptr %0, align 16, !tbaa !4 + store <8 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<8 x float>) align 16 %indirect-arg-temp, i32 noundef %y) #7 + ret void +} + +define void @i32_v16f32(i32 noundef %x, ptr nocapture noundef readonly byval(<16 x float>) align 16 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, ptr nocapture noundef readonly byval(<16 x float>) align 16 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64 +; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 16 +; CHECK-NEXT: %y = load <16 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <16 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 64, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <16 x float>, align 16 + %y = load <16 x float>, ptr %0, align 16, !tbaa !4 + store <16 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<16 x float>) align 16 %indirect-arg-temp) #7 + ret void +} + +define void @v16f32_i32(ptr nocapture noundef readonly byval(<16 x float>) align 16 %0, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(ptr nocapture noundef readonly byval(<16 x float>) align 16 %0, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64 +; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 16 +; CHECK-NEXT: %x = load <16 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <16 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 64, i1 false) +; CHECK-NEXT: %2 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %2, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <16 x float>, align 16 + %x = load <16 x float>, ptr %0, align 16, !tbaa !4 + store <16 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<16 x float>) align 16 %indirect-arg-temp, i32 noundef %y) #7 + ret void +} + +define void @i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly byval(<32 x float>) align 16 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly byval(<32 x float>) align 16 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 16 +; CHECK-NEXT: %y = load <32 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <32 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 128, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 16 + %y = load <32 x float>, ptr %0, align 16, !tbaa !4 + store <32 x float> %y, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<32 x float>) align 16 %indirect-arg-temp) #7 + ret void +} + +define void @v32f32_i32(ptr nocapture noundef readonly byval(<32 x float>) align 16 %0, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(ptr nocapture noundef readonly byval(<32 x float>) align 16 %0, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 16 +; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 16, !tbaa !3 +; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 128, i1 false) +; CHECK-NEXT: %2 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %2, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 16 + %x = load <32 x float>, ptr %0, align 16, !tbaa !4 + store <32 x float> %x, ptr %indirect-arg-temp, align 16, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 16 %indirect-arg-temp, i32 noundef %y) #7 + ret void +} + +attributes #0 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn } +attributes #3 = { "frame-pointer"="all" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind "frame-pointer"="all" "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cmov,+cx16,+cx8,+fxsr,+mmx,+sahf,+sse,+sse2,+sse3,+ssse3,+x87" "tune-cpu"="generic" } +attributes #5 = { nounwind } +attributes #6 = { nobuiltin nounwind "no-builtins" } +attributes #7 = { nobuiltin "no-builtins" } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"frame-pointer", i32 2} +!4 = !{!5, !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll b/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll new file mode 100644 index 0000000000000..122a86c372979 --- /dev/null +++ b/llvm/test/CodeGen/X86/expand-variadic-call-x64-linux.ll @@ -0,0 +1,591 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: -p --function-signature +; RUN: opt -S --passes=expand-variadics < %s | FileCheck %s +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Check the variables are lowered to the locations this target expects + +; The types show the call frames +; CHECK: %single_i32.vararg = type <{ i32 }> +; CHECK: %single_double.vararg = type <{ double }> +; CHECK: %single_v4f32.vararg = type <{ <4 x float> }> +; CHECK: %single_v8f32.vararg = type <{ <8 x float> }> +; CHECK: %single_v16f32.vararg = type <{ <16 x float> }> +; CHECK: %single_v32f32.vararg = type <{ <32 x float> }> +; CHECK: %i32_double.vararg = type <{ i32, [4 x i8], double }> +; CHECK: %double_i32.vararg = type <{ double, i32 }> +; CHECK: %i32_v4f32.vararg = type <{ i32, [12 x i8], <4 x float> }> +; CHECK: %v4f32_i32.vararg = type <{ <4 x float>, i32 }> +; CHECK: %i32_v8f32.vararg = type <{ i32, [28 x i8], <8 x float> }> +; CHECK: %v8f32_i32.vararg = type <{ <8 x float>, i32 }> +; CHECK: %i32_v16f32.vararg = type <{ i32, [60 x i8], <16 x float> }> +; CHECK: %v16f32_i32.vararg = type <{ <16 x float>, i32 }> +; CHECK: %i32_v32f32.vararg = type <{ i32, [124 x i8], <32 x float> }> +; CHECK: %v32f32_i32.vararg = type <{ <32 x float>, i32 }> + +%struct.__va_list_tag = type { i32, i32, ptr, ptr } +%struct.libcS = type { i8, i16, i32, i64, float, double } + +define void @codegen_for_copy(ptr noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@codegen_for_copy(ptr noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %cp = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp) #6 +; CHECK-NEXT: call void @llvm.va_copy(ptr nonnull %cp, ptr %x) +; CHECK-NEXT: call void @wrapped(ptr noundef nonnull %cp) #7 +; CHECK-NEXT: call void @llvm.va_end(ptr %cp) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp) #6 +; CHECK-NEXT: ret void +; +entry: + %cp = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %cp) #5 + call void @llvm.va_copy(ptr nonnull %cp, ptr %x) + call void @wrapped(ptr noundef nonnull %cp) #6 + call void @llvm.va_end(ptr %cp) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %cp) #5 + ret void +} + +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare void @llvm.va_copy(ptr, ptr) #2 + +declare void @wrapped(ptr noundef) #3 + +declare void @llvm.va_end(ptr) #2 + +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +define void @vararg(...) #0 { +; CHECK-LABEL: define {{[^@]+}}@vararg(...) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %va = alloca [1 x %struct.__va_list_tag], align 16 +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %va) #6 +; CHECK-NEXT: call void @llvm.va_start(ptr nonnull %va) +; CHECK-NEXT: call void @wrapped(ptr noundef nonnull %va) #7 +; CHECK-NEXT: call void @llvm.va_end(ptr %va) +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %va) #6 +; CHECK-NEXT: ret void +; +entry: + %va = alloca [1 x %struct.__va_list_tag], align 16 + call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %va) #5 + call void @llvm.va_start(ptr nonnull %va) + call void @wrapped(ptr noundef nonnull %va) #6 + call void @llvm.va_end(ptr %va) + call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %va) #5 + ret void +} + +declare void @llvm.va_start(ptr) #2 + +define void @single_i32(i32 noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_i32(i32 noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_i32.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %single_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x) #7 + ret void +} + +define void @single_double(double noundef %x) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_double(double noundef %x) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_double.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %single_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 8 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x) #7 + ret void +} + +define void @single_v4f32(<4 x float> noundef %x) #4 { +; CHECK-LABEL: define {{[^@]+}}@single_v4f32(<4 x float> noundef %x) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %single_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x) #7 + ret void +} + +define void @single_v8f32(ptr nocapture noundef readonly byval(<8 x float>) align 32 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_v8f32(ptr nocapture noundef readonly byval(<8 x float>) align 32 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v8f32.vararg, align 32 +; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 32 +; CHECK-NEXT: %x = load <8 x float>, ptr %0, align 32, !tbaa !3 +; CHECK-NEXT: store <8 x float> %x, ptr %indirect-arg-temp, align 32, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %single_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 32, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <8 x float>, align 32 + %x = load <8 x float>, ptr %0, align 32, !tbaa !4 + store <8 x float> %x, ptr %indirect-arg-temp, align 32, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<8 x float>) align 32 %indirect-arg-temp) #7 + ret void +} + +define void @single_v16f32(ptr nocapture noundef readonly byval(<16 x float>) align 64 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_v16f32(ptr nocapture noundef readonly byval(<16 x float>) align 64 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v16f32.vararg, align 64 +; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 64 +; CHECK-NEXT: %x = load <16 x float>, ptr %0, align 64, !tbaa !3 +; CHECK-NEXT: store <16 x float> %x, ptr %indirect-arg-temp, align 64, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %single_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 64, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <16 x float>, align 64 + %x = load <16 x float>, ptr %0, align 64, !tbaa !4 + store <16 x float> %x, ptr %indirect-arg-temp, align 64, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<16 x float>) align 64 %indirect-arg-temp) #7 + ret void +} + +define void @single_v32f32(ptr nocapture noundef readonly byval(<32 x float>) align 128 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@single_v32f32(ptr nocapture noundef readonly byval(<32 x float>) align 128 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %single_v32f32.vararg, align 128 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128 +; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 128, !tbaa !3 +; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %single_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 128, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 128 + %x = load <32 x float>, ptr %0, align 128, !tbaa !4 + store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 %indirect-arg-temp) #7 + ret void +} + +define void @i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_double(i32 noundef %x, double noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_double.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_double.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store double %y, ptr %1, align 8 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, double noundef %y) #7 + ret void +} + +define void @double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@double_i32(double noundef %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %double_i32.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store double %x, ptr %0, align 8 +; CHECK-NEXT: %1 = getelementptr inbounds %double_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(double noundef %x, i32 noundef %y) #7 + ret void +} + +define void @i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_libcS(i32 noundef %x, ptr noundef byval(%struct.libcS) align 8 %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_libcS.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_libcS.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %y, i64 32, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(%struct.libcS) align 8 %y) #7 + ret void +} + +define void @libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@libcS_i32(ptr noundef byval(%struct.libcS) align 8 %x, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %libcS_i32.vararg, align 8 +; CHECK-NEXT: %0 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %0, ptr %x, i64 32, i1 false) +; CHECK-NEXT: %1 = getelementptr inbounds %libcS_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(ptr noundef nonnull byval(%struct.libcS) align 8 %x, i32 noundef %y) #7 + ret void +} + +define void @i32_v4f32(i32 noundef %x, <4 x float> noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@i32_v4f32(i32 noundef %x, <4 x float> noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v4f32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %0, align 4 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v4f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: store <4 x float> %y, ptr %1, align 16 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(i32 noundef %x, <4 x float> noundef %y) #7 + ret void +} + +define void @v4f32_i32(<4 x float> noundef %x, i32 noundef %y) #4 { +; CHECK-LABEL: define {{[^@]+}}@v4f32_i32(<4 x float> noundef %x, i32 noundef %y) #4 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v4f32_i32.vararg, align 16 +; CHECK-NEXT: %0 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store <4 x float> %x, ptr %0, align 16 +; CHECK-NEXT: %1 = getelementptr inbounds %v4f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %1, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + tail call void (...) @vararg(<4 x float> noundef %x, i32 noundef %y) #7 + ret void +} + +define void @i32_v8f32(i32 noundef %x, ptr nocapture noundef readonly byval(<8 x float>) align 32 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_v8f32(i32 noundef %x, ptr nocapture noundef readonly byval(<8 x float>) align 32 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v8f32.vararg, align 32 +; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 32 +; CHECK-NEXT: %y = load <8 x float>, ptr %0, align 32, !tbaa !3 +; CHECK-NEXT: store <8 x float> %y, ptr %indirect-arg-temp, align 32, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %i32_v8f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 32, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <8 x float>, align 32 + %y = load <8 x float>, ptr %0, align 32, !tbaa !4 + store <8 x float> %y, ptr %indirect-arg-temp, align 32, !tbaa !4 + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<8 x float>) align 32 %indirect-arg-temp) #7 + ret void +} + +define void @v8f32_i32(ptr nocapture noundef readonly byval(<8 x float>) align 32 %0, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@v8f32_i32(ptr nocapture noundef readonly byval(<8 x float>) align 32 %0, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v8f32_i32.vararg, align 32 +; CHECK-NEXT: %indirect-arg-temp = alloca <8 x float>, align 32 +; CHECK-NEXT: %x = load <8 x float>, ptr %0, align 32, !tbaa !3 +; CHECK-NEXT: store <8 x float> %x, ptr %indirect-arg-temp, align 32, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 32, i1 false) +; CHECK-NEXT: %2 = getelementptr inbounds %v8f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %2, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <8 x float>, align 32 + %x = load <8 x float>, ptr %0, align 32, !tbaa !4 + store <8 x float> %x, ptr %indirect-arg-temp, align 32, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<8 x float>) align 32 %indirect-arg-temp, i32 noundef %y) #7 + ret void +} + +define void @i32_v16f32(i32 noundef %x, ptr nocapture noundef readonly byval(<16 x float>) align 64 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_v16f32(i32 noundef %x, ptr nocapture noundef readonly byval(<16 x float>) align 64 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v16f32.vararg, align 64 +; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 64 +; CHECK-NEXT: %y = load <16 x float>, ptr %0, align 64, !tbaa !3 +; CHECK-NEXT: store <16 x float> %y, ptr %indirect-arg-temp, align 64, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %i32_v16f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 64, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <16 x float>, align 64 + %y = load <16 x float>, ptr %0, align 64, !tbaa !4 + store <16 x float> %y, ptr %indirect-arg-temp, align 64, !tbaa !4 + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<16 x float>) align 64 %indirect-arg-temp) #7 + ret void +} + +define void @v16f32_i32(ptr nocapture noundef readonly byval(<16 x float>) align 64 %0, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@v16f32_i32(ptr nocapture noundef readonly byval(<16 x float>) align 64 %0, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v16f32_i32.vararg, align 64 +; CHECK-NEXT: %indirect-arg-temp = alloca <16 x float>, align 64 +; CHECK-NEXT: %x = load <16 x float>, ptr %0, align 64, !tbaa !3 +; CHECK-NEXT: store <16 x float> %x, ptr %indirect-arg-temp, align 64, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 64, i1 false) +; CHECK-NEXT: %2 = getelementptr inbounds %v16f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %2, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <16 x float>, align 64 + %x = load <16 x float>, ptr %0, align 64, !tbaa !4 + store <16 x float> %x, ptr %indirect-arg-temp, align 64, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<16 x float>) align 64 %indirect-arg-temp, i32 noundef %y) #7 + ret void +} + +define void @i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly byval(<32 x float>) align 128 %0) #0 { +; CHECK-LABEL: define {{[^@]+}}@i32_v32f32(i32 noundef %x, ptr nocapture noundef readonly byval(<32 x float>) align 128 %0) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %i32_v32f32.vararg, align 128 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128 +; CHECK-NEXT: %y = load <32 x float>, ptr %0, align 128, !tbaa !3 +; CHECK-NEXT: store <32 x float> %y, ptr %indirect-arg-temp, align 128, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: store i32 %x, ptr %1, align 4 +; CHECK-NEXT: %2 = getelementptr inbounds %i32_v32f32.vararg, ptr %vararg_buffer, i32 0, i32 2 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %2, ptr %indirect-arg-temp, i64 128, i1 false) +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 128 + %y = load <32 x float>, ptr %0, align 128, !tbaa !4 + store <32 x float> %y, ptr %indirect-arg-temp, align 128, !tbaa !4 + tail call void (...) @vararg(i32 noundef %x, ptr noundef nonnull byval(<32 x float>) align 128 %indirect-arg-temp) #7 + ret void +} + +define void @v32f32_i32(ptr nocapture noundef readonly byval(<32 x float>) align 128 %0, i32 noundef %y) #0 { +; CHECK-LABEL: define {{[^@]+}}@v32f32_i32(ptr nocapture noundef readonly byval(<32 x float>) align 128 %0, i32 noundef %y) #0 { +; CHECK-NEXT: entry: +; CHECK-NEXT: %vararg_buffer = alloca %v32f32_i32.vararg, align 128 +; CHECK-NEXT: %indirect-arg-temp = alloca <32 x float>, align 128 +; CHECK-NEXT: %x = load <32 x float>, ptr %0, align 128, !tbaa !3 +; CHECK-NEXT: store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !3 +; CHECK-NEXT: %1 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 0 +; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr %1, ptr %indirect-arg-temp, i64 128, i1 false) +; CHECK-NEXT: %2 = getelementptr inbounds %v32f32_i32.vararg, ptr %vararg_buffer, i32 0, i32 1 +; CHECK-NEXT: store i32 %y, ptr %2, align 4 +; CHECK-NEXT: %va_list = alloca [1 x { i32, i32, ptr, ptr }], align 8 +; CHECK-NEXT: %gp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 0 +; CHECK-NEXT: store i32 48, ptr %gp_offset, align 4 +; CHECK-NEXT: %fp_offset = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 1 +; CHECK-NEXT: store i32 176, ptr %fp_offset, align 4 +; CHECK-NEXT: %overfow_arg_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 2 +; CHECK-NEXT: store ptr %vararg_buffer, ptr %overfow_arg_area, align 8 +; CHECK-NEXT: %reg_save_area = getelementptr inbounds [1 x { i32, i32, ptr, ptr }], ptr %va_list, i64 0, i32 0, i32 3 +; CHECK-NEXT: store ptr null, ptr %reg_save_area, align 8 +; CHECK-NEXT: call void @wrapped(ptr %va_list) #8 +; CHECK-NEXT: ret void +; +entry: + %indirect-arg-temp = alloca <32 x float>, align 128 + %x = load <32 x float>, ptr %0, align 128, !tbaa !4 + store <32 x float> %x, ptr %indirect-arg-temp, align 128, !tbaa !4 + tail call void (...) @vararg(ptr noundef nonnull byval(<32 x float>) align 128 %indirect-arg-temp, i32 noundef %y) #7 + ret void +} + +attributes #0 = { nounwind "min-legal-vector-width"="0" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn } +attributes #3 = { "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #4 = { nounwind "min-legal-vector-width"="128" "no-builtins" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" } +attributes #5 = { nounwind } +attributes #6 = { nobuiltin nounwind "no-builtins" } +attributes #7 = { nobuiltin "no-builtins" } + +!llvm.module.flags = !{!0, !1, !2} + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{i32 8, !"PIC Level", i32 2} +!2 = !{i32 7, !"PIE Level", i32 2} +!4 = !{!5, !5, i64 0} +!5 = !{!"omnipotent char", !6, i64 0} +!6 = !{!"Simple C/C++ TBAA"} diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn index 2003e86e90b96..fc82a4f97dbb1 100644 --- a/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/IPO/BUILD.gn @@ -33,6 +33,7 @@ static_library("IPO") { "DeadArgumentElimination.cpp", "ElimAvailExtern.cpp", "EmbedBitcodePass.cpp", + "ExpandVariadics.cpp", "ExtractGV.cpp", "ForceFunctionAttrs.cpp", "FunctionAttrs.cpp",