diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 007b3737f83e6..b6bb5e969e130 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -429,6 +429,8 @@ LANGOPT(PaddingOnUnsignedFixedPoint, 1, 0, LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors") +LANGOPT(RegCall4, 1, 0, "Set __regcall4 as a default calling convention to respect __regcall ABI v.4") + LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type") ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2, diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 9e25a5e0b58a5..296fa1fcc38a0 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4505,6 +4505,9 @@ def no_offload_add_rpath: Flag<["--"], "no-offload-add-rpath">, Flags<[NoArgumen Alias; def r : Flag<["-"], "r">, Flags<[LinkerInput,NoArgumentUnused]>, Group; +def regcall4 : Flag<["-"], "regcall4">, Group, Flags<[CC1Option]>, + HelpText<"Set __regcall4 as a default calling convention to respect __regcall ABI v.4">, + MarshallingInfoFlag>; def save_temps_EQ : Joined<["-", "--"], "save-temps=">, Flags<[CC1Option, FlangOption, FC1Option, NoXarchOption]>, HelpText<"Save intermediate compilation results.">; def save_temps : Flag<["-", "--"], "save-temps">, Flags<[FlangOption, FC1Option, NoXarchOption]>, @@ -7292,6 +7295,8 @@ def _SLASH_Gv : CLFlag<"Gv">, HelpText<"Set __vectorcall as a default calling convention">; def _SLASH_Gregcall : CLFlag<"Gregcall">, HelpText<"Set __regcall as a default calling convention">; +def _SLASH_Gregcall4 : CLFlag<"Gregcall4">, + HelpText<"Set __regcall4 as a default calling convention to respect __regcall ABI v.4">; // GNU Driver aliases diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 16f0d90451f7a..153f6dc2e9cf1 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -1688,8 +1688,12 @@ void CXXNameMangler::mangleRegCallName(const IdentifierInfo *II) { // ::= __regcall3__ // ::= [n] // ::= - Out << II->getLength() + sizeof("__regcall3__") - 1 << "__regcall3__" - << II->getName(); + if (getASTContext().getLangOpts().RegCall4) + Out << II->getLength() + sizeof("__regcall4__") - 1 << "__regcall4__" + << II->getName(); + else + Out << II->getLength() + sizeof("__regcall3__") - 1 << "__regcall3__" + << II->getName(); } void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) { diff --git a/clang/lib/AST/Mangle.cpp b/clang/lib/AST/Mangle.cpp index 31cdad4c8fdd4..53af9fc4d5189 100644 --- a/clang/lib/AST/Mangle.cpp +++ b/clang/lib/AST/Mangle.cpp @@ -198,8 +198,12 @@ void MangleContext::mangleName(GlobalDecl GD, raw_ostream &Out) { Out << '_'; else if (CC == CCM_Fast) Out << '@'; - else if (CC == CCM_RegCall) - Out << "__regcall3__"; + else if (CC == CCM_RegCall) { + if (getASTContext().getLangOpts().RegCall4) + Out << "__regcall4__"; + else + Out << "__regcall3__"; + } if (!MCXX) Out << D->getIdentifier()->getName(); diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp index 3306d90dc8566..91af18d611979 100644 --- a/clang/lib/AST/MicrosoftMangle.cpp +++ b/clang/lib/AST/MicrosoftMangle.cpp @@ -2853,6 +2853,7 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) { // ::= T # __attribute__((__swiftasynccall__)) // // Clang-only // ::= w # __regcall + // ::= x # __regcall4 // The 'export' calling conventions are from a bygone era // (*cough*Win16*cough*) when functions were declared for export with // that keyword. (It didn't actually export them, it just made them so @@ -2873,7 +2874,12 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) { case CC_Swift: Out << 'S'; break; case CC_SwiftAsync: Out << 'W'; break; case CC_PreserveMost: Out << 'U'; break; - case CC_X86RegCall: Out << 'w'; break; + case CC_X86RegCall: + if (getASTContext().getLangOpts().RegCall4) + Out << "x"; + else + Out << "w"; + break; } } void MicrosoftCXXNameMangler::mangleCallingConvention(const FunctionType *T) { diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 8d2abc69c330e..e7cbc748b7a38 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1196,6 +1196,8 @@ void CodeGenModule::Release() { getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment); if (getCodeGenOpts().SkipRaxSetup) getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1); + if (getLangOpts().RegCall4) + getModule().addModuleFlag(llvm::Module::Override, "RegCallv4", 1); if (getContext().getTargetInfo().getMaxTLSAlign()) getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign", @@ -1707,7 +1709,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD, if (FD && FD->getType()->castAs()->getCallConv() == CC_X86RegCall) { - Out << "__regcall3__" << II->getName(); + if (CGM.getLangOpts().RegCall4) + Out << "__regcall4__" << II->getName(); + else + Out << "__regcall3__" << II->getName(); } else if (FD && FD->hasAttr() && GD.getKernelReferenceKind() == KernelReferenceKind::Stub) { Out << "__device_stub__" << II->getName(); diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index eae3643bd4bf5..c515573500049 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -7936,6 +7936,9 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, CmdArgs.push_back("-fms-memptr-rep=virtual"); } + if (Args.hasArg(options::OPT_regcall4)) + CmdArgs.push_back("-regcall4"); + // Parse the default calling convention options. if (Arg *CCArg = Args.getLastArg(options::OPT__SLASH_Gd, options::OPT__SLASH_Gr, @@ -7972,6 +7975,9 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, CmdArgs.push_back(DCCFlag); } + if (Args.hasArg(options::OPT__SLASH_Gregcall4)) + CmdArgs.push_back("-regcall4"); + Args.AddLastArg(CmdArgs, options::OPT_vtordisp_mode_EQ); if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) { diff --git a/clang/test/CodeGen/check-regcall4-moduleflag.c b/clang/test/CodeGen/check-regcall4-moduleflag.c new file mode 100644 index 0000000000000..0b968e3d19d82 --- /dev/null +++ b/clang/test/CodeGen/check-regcall4-moduleflag.c @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=NO-REGCALL4 +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -regcall4 -emit-llvm %s -o - | FileCheck %s -check-prefix=REGCALL4 + +void f(void) {} + +// REGCALL4: !"RegCallv4", i32 1} +// NO-REGCALL4-NOT: "RegCallv4" diff --git a/clang/test/CodeGen/regcall4.c b/clang/test/CodeGen/regcall4.c new file mode 100644 index 0000000000000..5fbe77fbc7d76 --- /dev/null +++ b/clang/test/CodeGen/regcall4.c @@ -0,0 +1,100 @@ +// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefixes=X86,Win32 +// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefixes=X64,Win64 +// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-linux-gnu | FileCheck %s --check-prefixes=X86,Lin32 +// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-linux-gnu | FileCheck %s --check-prefixes=X64,Lin64 + +#include + +void __regcall v1(int a, int b) {} +// X86: define dso_local x86_regcallcc void @__regcall4__v1(i32 inreg noundef %a, i32 inreg noundef %b) +// X64: define dso_local x86_regcallcc void @__regcall4__v1(i32 noundef %a, i32 noundef %b) + +void __attribute__((regcall)) v1b(int a, int b) {} +// X86: define dso_local x86_regcallcc void @__regcall4__v1b(i32 inreg noundef %a, i32 inreg noundef %b) +// X64: define dso_local x86_regcallcc void @__regcall4__v1b(i32 noundef %a, i32 noundef %b) + +void __regcall v2(char a, char b) {} +// X86: define dso_local x86_regcallcc void @__regcall4__v2(i8 inreg noundef signext %a, i8 inreg noundef signext %b) +// Win64: define dso_local x86_regcallcc void @__regcall4__v2(i8 noundef %a, i8 noundef %b) +// Lin64: define dso_local x86_regcallcc void @__regcall4__v2(i8 noundef signext %a, i8 noundef signext %b) + +struct Small { int x; }; +void __regcall v3(int a, struct Small b, int c) {} +// Win32: define dso_local x86_regcallcc void @__regcall4__v3(i32 inreg noundef %a, i32 %b.0, i32 inreg noundef %c) +// Lin32: define dso_local x86_regcallcc void @__regcall4__v3(i32 inreg noundef %a, i32 inreg %0, i32 %b.0, i32 inreg noundef %c) +// X64: define dso_local x86_regcallcc void @__regcall4__v3(i32 noundef %a, i32 %b.coerce, i32 noundef %c) + +struct Large { int a[5]; }; +void __regcall v4(int a, struct Large b, int c) {} +// Win32: define dso_local x86_regcallcc void @__regcall4__v4(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 inreg noundef %c) +// Lin32: define dso_local x86_regcallcc void @__regcall4__v4(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 noundef %c) +// Win64: define dso_local x86_regcallcc void @__regcall4__v4(i32 noundef %a, ptr noundef %b, i32 noundef %c) +// Lin64: define dso_local x86_regcallcc void @__regcall4__v4(i32 noundef %a, [5 x i32] %b.coerce, i32 noundef %c) + +void __regcall v5(long long a, int b, int c) {} +// X86: define dso_local x86_regcallcc void @__regcall4__v5(i64 noundef %a, i32 inreg noundef %b, i32 inreg noundef %c) +// X64: define dso_local x86_regcallcc void @__regcall4__v5(i64 noundef %a, i32 noundef %b, i32 noundef %c) + +struct HFA2 { double x, y; }; +struct HFA4 { double w, x, y, z; }; +struct HFA5 { double v, w, x, y, z; }; + +void __regcall hfa1(int a, struct HFA4 b, int c) {} +// X86: define dso_local x86_regcallcc void @__regcall4__hfa1(i32 inreg noundef %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg noundef %c) +// X64: define dso_local x86_regcallcc void @__regcall4__hfa1(i32 noundef %a, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, i32 noundef %c) + +// HFAs that would require more than six total SSE registers are passed +// indirectly. Additional vector arguments can consume the rest of the SSE +// registers. +void __regcall hfa2(struct HFA4 a, struct HFA4 b, double c) {} +// X86: define dso_local x86_regcallcc void @__regcall4__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, ptr inreg noundef %0) +// X64: define dso_local x86_regcallcc void @__regcall4__hfa2(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double noundef %c) + +// Ensure that we pass builtin types directly while counting them against the +// SSE register usage. +void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {} +// X86: define dso_local x86_regcallcc void @__regcall4__hfa3(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double %f.0, double %f.1) +// X64: define dso_local x86_regcallcc void @__regcall4__hfa3(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double %{{.*}}, double %{{.*}}) + +// Aggregates with more than four elements are not HFAs and are passed byval(%b.3, double noundef). +// Because they are not classified as homogeneous, they don't get special +// handling to ensure alignment. +void __regcall hfa4(struct HFA5 a) {} +// X32: define dso_local x86_regcallcc void @__regcall4__hfa4(ptr noundef byval(%struct.HFA5) align 4 %{{.*}}) +// Win64: define dso_local x86_regcallcc void @__regcall4__hfa4(ptr noundef %a) +// Lin64: define dso_local x86_regcallcc void @__regcall4__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4) + +// Return HFAs of 4 or fewer elements in registers. +static struct HFA2 g_hfa2; +struct HFA2 __regcall hfa5(void) { return g_hfa2; } +// X86: define dso_local x86_regcallcc %struct.HFA2 @__regcall4__hfa5() +// X64: define dso_local x86_regcallcc %struct.HFA2 @__regcall4__hfa5() + +typedef float __attribute__((vector_size(16))) v4f32; +struct HVA2 { v4f32 x, y; }; +struct HVA4 { v4f32 w, x, y, z; }; + +void __regcall hva1(int a, struct HVA4 b, int c) {} +// X86: define dso_local x86_regcallcc void @__regcall4__hva1(i32 inreg noundef %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg noundef %c) +// X64: define dso_local x86_regcallcc void @__regcall4__hva1(i32 noundef %a, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 noundef %c) + +void __regcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {} +// X86: define dso_local x86_regcallcc void @__regcall4__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, ptr inreg noundef %0) +// X64: define dso_local x86_regcallcc void @__regcall4__hva2(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> noundef %c) + +void __regcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {} +// X86: define dso_local x86_regcallcc void @__regcall4__hva3(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, <4 x float> %f.0, <4 x float> %f.1) +// X64: define dso_local x86_regcallcc void @__regcall4__hva3(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, <4 x float> %{{.*}}, <4 x float> %{{.*}}) + +typedef float __attribute__((ext_vector_type(3))) v3f32; +struct OddSizeHVA { v3f32 x, y; }; + +void __regcall odd_size_hva(struct OddSizeHVA a) {} +// X86: define dso_local x86_regcallcc void @__regcall4__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1) +// X64: define dso_local x86_regcallcc void @__regcall4__odd_size_hva(<3 x float> %{{.*}}, <3 x float> %{{.*}}) + +struct HFA6 { __m128 f[4]; }; +struct HFA6 __regcall ret_reg_reused(struct HFA6 a, struct HFA6 b, struct HFA6 c, struct HFA6 d){ struct HFA6 h; return h;} +// X86: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, ptr inreg noundef %c, ptr inreg noundef %d) +// Win64: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c.0, <4 x float> %c.1, <4 x float> %c.2, <4 x float> %c.3, <4 x float> %d.0, <4 x float> %d.1, <4 x float> %d.2, <4 x float> %d.3) +// Lin64: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused([4 x <4 x float>] %a.coerce, [4 x <4 x float>] %b.coerce, [4 x <4 x float>] %c.coerce, [4 x <4 x float>] %d.coerce) diff --git a/clang/test/CodeGenCXX/regcall4.cpp b/clang/test/CodeGenCXX/regcall4.cpp new file mode 100644 index 0000000000000..7c35db36e1053 --- /dev/null +++ b/clang/test/CodeGenCXX/regcall4.cpp @@ -0,0 +1,120 @@ +// RUN: %clang_cc1 -regcall4 -triple x86_64-linux-gnu -emit-llvm -std=c++11 %s -o - | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK-LIN -check-prefix=CHECK-LIN64 %s +// RUN: %clang_cc1 -regcall4 -triple i386-linux-gnu -emit-llvm -std=c++11 %s -o - | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK-LIN -check-prefix=CHECK-LIN32 %s +// RUN: %clang_cc1 -regcall4 -triple x86_64-windows-msvc -emit-llvm -std=c++11 %s -o - -DWIN_TEST | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK-WIN64 %s +// RUN: %clang_cc1 -regcall4 -triple i386-windows-msvc -emit-llvm -std=c++11 %s -o - -DWIN_TEST | FileCheck -allow-deprecated-dag-overlap -check-prefix=CHECK-WIN32 %s + +int __regcall foo(int i); + +int main() +{ + int p = 0, _data; + auto lambda = [&](int parameter) -> int { + _data = foo(parameter); + return _data; + }; + return lambda(p); +} +// CHECK-LIN: call x86_regcallcc {{.+}} @_Z15__regcall4__foo +// CHECK-WIN64: call x86_regcallcc {{.+}} @"?foo@@YxHH@Z" +// CHECK-WIN32: call x86_regcallcc {{.+}} @"?foo@@YxHH@Z" + +int __regcall foo (int i){ + return i; +} +// CHECK-LIN: define{{.*}} x86_regcallcc noundef {{.+}}@_Z15__regcall4__foo +// CHECK-WIN64: define dso_local x86_regcallcc noundef {{.+}}@"?foo@@YxHH@Z" +// CHECK-WIN32: define dso_local x86_regcallcc noundef {{.+}}@"?foo@@YxHH@Z" + +// used to give a body to test_class functions +static int x = 0; +class test_class { + int a; +public: +#ifndef WIN_TEST + __regcall +#endif + test_class(){++x;} + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classC1Ev + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classC2Ev + // Windows ignores calling convention on constructor/destructors. + // CHECK-WIN64-DAG: define linkonce_odr dso_local noundef ptr @"??0test_class@@QEAA@XZ" + // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_thiscallcc noundef ptr @"??0test_class@@QAE@XZ" + +#ifndef WIN_TEST + __regcall +#endif + ~test_class(){--x;} + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classD2Ev + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_classD1Ev + // Windows ignores calling convention on constructor/destructors. + // CHECK-WIN64-DAG: define linkonce_odr dso_local void @"??1test_class@@QEAA@XZ" + // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_thiscallcc void @"??1test_class@@QAE@XZ" + + test_class& __regcall operator+=(const test_class&){ + return *this; + } + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc noundef nonnull align 4 dereferenceable(4) ptr @_ZN10test_classpLERKS_ + // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc noundef nonnull align 4 dereferenceable(4) ptr @"??Ytest_class@@QEAxAEAV0@AEBV0@@Z" + // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc noundef nonnull align 4 dereferenceable(4) ptr @"??Ytest_class@@QAxAAV0@ABV0@@Z" + void __regcall do_thing(){} + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_class20__regcall4__do_thingEv + // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"?do_thing@test_class@@QEAxXXZ" + // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"?do_thing@test_class@@QAxXXZ" + + template + void __regcall tempFunc(T i){} + // CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_ZN10test_class20__regcall4__tempFuncIiEEvT_ + // CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"??$freeTempFunc@H@@YxXH@Z" + // CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"??$freeTempFunc@H@@YxXH@Z" +}; + +bool __regcall operator ==(const test_class&, const test_class&){ --x; return false;} +// CHECK-LIN-DAG: define{{.*}} x86_regcallcc noundef zeroext i1 @_ZeqRK10test_classS1_ +// CHECK-WIN64-DAG: define dso_local x86_regcallcc noundef zeroext i1 @"??8@Yx_NAEBVtest_class@@0@Z" +// CHECK-WIN32-DAG: define dso_local x86_regcallcc noundef zeroext i1 @"??8@Yx_NABVtest_class@@0@Z" + +test_class __regcall operator""_test_class (unsigned long long) { ++x; return test_class{};} +// CHECK-LIN64-DAG: define{{.*}} x86_regcallcc void @_Zli11_test_classy(ptr noalias sret(%class.test_class) align 4 %agg.result, i64 noundef %0) +// CHECK-LIN32-DAG: define{{.*}} x86_regcallcc void @_Zli11_test_classy(ptr inreg noalias sret(%class.test_class) align 4 %agg.result, i64 noundef %0) +// CHECK-WIN64-DAG: ??__K_test_class@@Yx?AVtest_class@@_K@Z" +// CHECK-WIN32-DAG: ??__K_test_class@@Yx?AVtest_class@@_K@Z" + +template +void __regcall freeTempFunc(T i){} +// CHECK-LIN-DAG: define linkonce_odr x86_regcallcc void @_Z24__regcall4__freeTempFuncIiEvT_ +// CHECK-WIN64-DAG: define linkonce_odr dso_local x86_regcallcc void @"??$freeTempFunc@H@@YxXH@Z" +// CHECK-WIN32-DAG: define linkonce_odr dso_local x86_regcallcc void @"??$freeTempFunc@H@@YxXH@Z" + +// class to force generation of functions +void force_gen() { + test_class t; + test_class t2 = 12_test_class; + t += t2; + auto t3 = 100_test_class; + t3.tempFunc(1); + freeTempFunc(1); + t3.do_thing(); +} + +long double _Complex __regcall foo(long double _Complex f) { + return f; +} +// CHECK-LIN64-DAG: define{{.*}} x86_regcallcc void @_Z15__regcall4__fooCe(ptr noalias sret({ x86_fp80, x86_fp80 }) align 16 %agg.result, ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 %f) +// CHECK-LIN32-DAG: define{{.*}} x86_regcallcc void @_Z15__regcall4__fooCe(ptr inreg noalias sret({ x86_fp80, x86_fp80 }) align 4 %agg.result, ptr noundef byval({ x86_fp80, x86_fp80 }) align 4 %f) +// CHECK-WIN64-DAG: define dso_local x86_regcallcc noundef { double, double } @"?foo@@YxU?$_Complex@O@__clang@@U12@@Z"(double noundef %f.0, double noundef %f.1) +// CHECK-WIN32-DAG: define dso_local x86_regcallcc noundef { double, double } @"?foo@@YxU?$_Complex@O@__clang@@U12@@Z"(double noundef %f.0, double noundef %f.1) + +// The following caused us to dereference uninitialized memory. The long name +// seems necessary, as does the return types. +float _Complex __regcall callee(float _Complex f); +// CHECK-LIN64-DAG: declare x86_regcallcc noundef <2 x float> @_Z18__regcall4__calleeCf(<2 x float> noundef) +// CHECK-LIN32-DAG: declare x86_regcallcc noundef { float, float } @_Z18__regcall4__calleeCf(float noundef, float noundef) +// CHECK-WIN64-DAG: declare dso_local x86_regcallcc noundef { float, float } @"?callee@@YxU?$_Complex@M@__clang@@U12@@Z"(float noundef, float noundef) +// CHECK-WIN32-DAG: declare dso_local x86_regcallcc noundef { float, float } @"?callee@@YxU?$_Complex@M@__clang@@U12@@Z"(float noundef, float noundef) + +__regcall int +some_really_long_name_that_manages_to_hit_the_right_spot_of_mem(int a) { + float _Complex x[2]; + x[0] = callee(x[0]); + return a; +} diff --git a/clang/test/Driver/cl-cc-flags.c b/clang/test/Driver/cl-cc-flags.c index 6fa0b6bd8e92f..eacaee2c27697 100644 --- a/clang/test/Driver/cl-cc-flags.c +++ b/clang/test/Driver/cl-cc-flags.c @@ -16,6 +16,10 @@ // RUN: %clang_cl --target=i686-windows-msvc /Gregcall -### -- %s 2>&1 | FileCheck --check-prefix=REGCALL %s // REGCALL: -fdefault-calling-conv=regcall +// RUN: %clang_cl --target=i686-windows-msvc /Gregcall /Gregcall4 -### -- %s 2>&1 | FileCheck --check-prefix=REGCALL4 %s +// REGCALL4: -fdefault-calling-conv=regcall +// REGCALL4: -regcall4 + // Last one should win: // RUN: %clang_cl --target=i686-windows-msvc /Gd /Gv -### -- %s 2>&1 | FileCheck --check-prefix=LASTWINS_VECTOR %s diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td index 06cebdc215943..3ce59dc4aa61b 100644 --- a/llvm/lib/Target/X86/X86CallingConv.td +++ b/llvm/lib/Target/X86/X86CallingConv.td @@ -23,6 +23,11 @@ class CCIfNotSubtarget "(State.getMachineFunction().getSubtarget()).", F), A>; +/// CCIfRegCallv4 - Match if RegCall ABIv4 is respected. +class CCIfRegCallv4 + : CCIf<"State.getMachineFunction().getFunction().getParent()->getModuleFlag(\"RegCallv4\")!=nullptr", + A>; + /// CCIfIsVarArgOnWin - Match if isVarArg on Windows 32bits. class CCIfIsVarArgOnWin : CCIf<"State.isVarArg() && " @@ -55,6 +60,20 @@ def RC_X86_32_RegCall : RC_X86_RegCall { let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]; } +// RegCall register classes for 32 bits if it respect regcall ABI v.4 +// Change in __regcall ABI v.4: don't use EAX as a spare register is +// needed to code virtual call thunk, +def RC_X86_32_RegCallv4_Win : RC_X86_RegCall { + let GPR_8 = [CL, DL, DIL, SIL]; + let GPR_16 = [CX, DX, DI, SI]; + let GPR_32 = [ECX, EDX, EDI, ESI]; + let GPR_64 = [RAX]; ///< Not actually used, but AssignToReg can't handle [] + ///< \todo Fix AssignToReg to enable empty lists + let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]; + let YMM = [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7]; + let ZMM = [ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]; +} + class RC_X86_64_RegCall : RC_X86_RegCall { let XMM = [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]; @@ -71,6 +90,18 @@ def RC_X86_64_RegCall_Win : RC_X86_64_RegCall { let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R10, R11, R12, R14, R15]; } +// On Windows 64 we don't want to use R13 - it is reserved for +// largely aligned stack. +// Change in __regcall ABI v.4: additionally don't use R10 as a +// a spare register is needed to code virtual call thunk. +// +def RC_X86_64_RegCallv4_Win : RC_X86_64_RegCall { + let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R11B, R12B, R14B, R15B]; + let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R11W, R12W, R14W, R15W]; + let GPR_32 = [EAX, ECX, EDX, EDI, ESI, R8D, R9D, R11D, R12D, R14D, R15D]; + let GPR_64 = [RAX, RCX, RDX, RDI, RSI, R8, R9, R11, R12, R14, R15]; +} + def RC_X86_64_RegCall_SysV : RC_X86_64_RegCall { let GPR_8 = [AL, CL, DL, DIL, SIL, R8B, R9B, R12B, R13B, R14B, R15B]; let GPR_16 = [AX, CX, DX, DI, SI, R8W, R9W, R12W, R13W, R14W, R15W]; @@ -433,8 +464,12 @@ def RetCC_X86_64_AnyReg : CallingConv<[ defm X86_32_RegCall : X86_RegCall_base; +defm X86_32_RegCallv4_Win : + X86_RegCall_base; defm X86_Win64_RegCall : X86_RegCall_base; +defm X86_Win64_RegCallv4 : + X86_RegCall_base; defm X86_SysV64_RegCall : X86_RegCall_base; @@ -447,6 +482,8 @@ def RetCC_X86_32 : CallingConv<[ // If HiPE, use RetCC_X86_32_HiPE. CCIfCC<"CallingConv::HiPE", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, + CCIfCC<"CallingConv::X86_RegCall", + CCIfSubtarget<"isTargetWin32()", CCIfRegCallv4>>>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, // Otherwise, use RetCC_X86_32_C. @@ -473,6 +510,9 @@ def RetCC_X86_64 : CallingConv<[ // Handle Vectorcall CC CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, + CCIfCC<"CallingConv::X86_RegCall", + CCIfSubtarget<"isTargetWin64()", CCIfRegCallv4>>>, + CCIfCC<"CallingConv::X86_RegCall", CCIfSubtarget<"isTargetWin64()", CCDelegateTo>>, @@ -1052,6 +1092,8 @@ def CC_X86_32 : CallingConv<[ CCIfCC<"CallingConv::Tail", CCDelegateTo>, CCIfCC<"CallingConv::GHC", CCDelegateTo>, CCIfCC<"CallingConv::HiPE", CCDelegateTo>, + CCIfCC<"CallingConv::X86_RegCall", + CCIfSubtarget<"isTargetWin32()", CCIfRegCallv4>>>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, // Otherwise, drop to normal X86-32 CC @@ -1067,6 +1109,8 @@ def CC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::Win64", CCDelegateTo>, CCIfCC<"CallingConv::X86_64_SysV", CCDelegateTo>, CCIfCC<"CallingConv::X86_VectorCall", CCDelegateTo>, + CCIfCC<"CallingConv::X86_RegCall", + CCIfSubtarget<"isTargetWin64()", CCIfRegCallv4>>>, CCIfCC<"CallingConv::X86_RegCall", CCIfSubtarget<"isTargetWin64()", CCDelegateTo>>, CCIfCC<"CallingConv::X86_RegCall", CCDelegateTo>, diff --git a/llvm/test/CodeGen/X86/sse-regcall4.ll b/llvm/test/CodeGen/X86/sse-regcall4.ll new file mode 100644 index 0000000000000..80eaf0f900066 --- /dev/null +++ b/llvm/test/CodeGen/X86/sse-regcall4.ll @@ -0,0 +1,467 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-pc-win32 -mattr=+sse | FileCheck --check-prefix=WIN32 %s +; RUN: llc < %s -mtriple=x86_64-win32 -mattr=+sse | FileCheck --check-prefix=WIN64 %s +; RUN: llc < %s -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck --check-prefix=LINUXOSX %s + +; Test regcall when receiving/returning i1 +define x86_regcallcc i1 @test_argReti1(i1 %a) { +; WIN32-LABEL: test_argReti1: +; WIN32: # %bb.0: +; WIN32-NEXT: incb %cl +; WIN32-NEXT: # kill: def $cl killed $cl killed $ecx +; WIN32-NEXT: retl +; +; WIN64-LABEL: test_argReti1: +; WIN64: # %bb.0: +; WIN64-NEXT: incb %al +; WIN64-NEXT: # kill: def $al killed $al killed $eax +; WIN64-NEXT: retq +; +; LINUXOSX-LABEL: test_argReti1: +; LINUXOSX: # %bb.0: +; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: # kill: def $al killed $al killed $eax +; LINUXOSX-NEXT: retq + %add = add i1 %a, 1 + ret i1 %add +} + +; Test regcall when passing/retrieving i1 +define x86_regcallcc i1 @test_CallargReti1(i1 %a) { +; WIN32-LABEL: test_CallargReti1: +; WIN32: # %bb.0: +; WIN32-NEXT: incb %cl +; WIN32-NEXT: movzbl %cl, %ecx +; WIN32-NEXT: calll _test_argReti1 +; WIN32-NEXT: incb %cl +; WIN32-NEXT: retl +; +; WIN64-LABEL: test_CallargReti1: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: .seh_stackalloc 8 +; WIN64-NEXT: .seh_endprologue +; WIN64-NEXT: incb %al +; WIN64-NEXT: movzbl %al, %eax +; WIN64-NEXT: callq test_argReti1 +; WIN64-NEXT: incb %al +; WIN64-NEXT: popq %rcx +; WIN64-NEXT: retq +; WIN64-NEXT: .seh_endproc +; +; LINUXOSX-LABEL: test_CallargReti1: +; LINUXOSX: # %bb.0: +; LINUXOSX-NEXT: pushq %rax +; LINUXOSX-NEXT: .cfi_def_cfa_offset 16 +; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: movzbl %al, %eax +; LINUXOSX-NEXT: callq *test_argReti1@GOTPCREL(%rip) +; LINUXOSX-NEXT: incb %al +; LINUXOSX-NEXT: popq %rcx +; LINUXOSX-NEXT: .cfi_def_cfa_offset 8 +; LINUXOSX-NEXT: retq + %b = add i1 %a, 1 + %c = call x86_regcallcc i1 @test_argReti1(i1 %b) + %d = add i1 %c, 1 + ret i1 %d +} + +;test calling conventions - input parameters, callee saved xmms +define x86_regcallcc <16 x float> @testf32_inp(<16 x float> %a, <16 x float> %b, <16 x float> %c) nounwind { +; WIN32-LABEL: testf32_inp: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp +; WIN32-NEXT: movl %esp, %ebp +; WIN32-NEXT: andl $-16, %esp +; WIN32-NEXT: subl $32, %esp +; WIN32-NEXT: movaps %xmm7, (%esp) # 16-byte Spill +; WIN32-NEXT: movaps %xmm6, %xmm7 +; WIN32-NEXT: movaps %xmm5, %xmm6 +; WIN32-NEXT: movaps %xmm3, %xmm5 +; WIN32-NEXT: movaps %xmm2, %xmm3 +; WIN32-NEXT: movaps %xmm1, %xmm2 +; WIN32-NEXT: movaps %xmm0, %xmm1 +; WIN32-NEXT: addps %xmm4, %xmm0 +; WIN32-NEXT: mulps %xmm4, %xmm1 +; WIN32-NEXT: subps %xmm1, %xmm0 +; WIN32-NEXT: movups 8(%ebp), %xmm1 +; WIN32-NEXT: addps %xmm1, %xmm0 +; WIN32-NEXT: movaps %xmm2, %xmm4 +; WIN32-NEXT: addps %xmm6, %xmm4 +; WIN32-NEXT: mulps %xmm6, %xmm2 +; WIN32-NEXT: subps %xmm2, %xmm4 +; WIN32-NEXT: movups 24(%ebp), %xmm1 +; WIN32-NEXT: addps %xmm1, %xmm4 +; WIN32-NEXT: movaps %xmm3, %xmm2 +; WIN32-NEXT: addps %xmm7, %xmm2 +; WIN32-NEXT: mulps %xmm7, %xmm3 +; WIN32-NEXT: subps %xmm3, %xmm2 +; WIN32-NEXT: movups 40(%ebp), %xmm1 +; WIN32-NEXT: addps %xmm1, %xmm2 +; WIN32-NEXT: movaps %xmm5, %xmm3 +; WIN32-NEXT: movaps (%esp), %xmm1 # 16-byte Reload +; WIN32-NEXT: addps %xmm1, %xmm3 +; WIN32-NEXT: mulps %xmm1, %xmm5 +; WIN32-NEXT: subps %xmm5, %xmm3 +; WIN32-NEXT: movups 56(%ebp), %xmm1 +; WIN32-NEXT: addps %xmm1, %xmm3 +; WIN32-NEXT: movaps %xmm4, %xmm1 +; WIN32-NEXT: movl %ebp, %esp +; WIN32-NEXT: popl %ebp +; WIN32-NEXT: retl +; +; WIN64-LABEL: testf32_inp: +; WIN64: # %bb.0: +; WIN64-NEXT: subq $72, %rsp +; WIN64-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; WIN64-NEXT: movaps %xmm12, (%rsp) # 16-byte Spill +; WIN64-NEXT: movaps %xmm0, %xmm12 +; WIN64-NEXT: addps %xmm4, %xmm12 +; WIN64-NEXT: movaps %xmm1, %xmm13 +; WIN64-NEXT: addps %xmm5, %xmm13 +; WIN64-NEXT: movaps %xmm2, %xmm14 +; WIN64-NEXT: addps %xmm6, %xmm14 +; WIN64-NEXT: movaps %xmm3, %xmm15 +; WIN64-NEXT: addps %xmm7, %xmm15 +; WIN64-NEXT: mulps %xmm4, %xmm0 +; WIN64-NEXT: subps %xmm0, %xmm12 +; WIN64-NEXT: mulps %xmm5, %xmm1 +; WIN64-NEXT: subps %xmm1, %xmm13 +; WIN64-NEXT: mulps %xmm6, %xmm2 +; WIN64-NEXT: subps %xmm2, %xmm14 +; WIN64-NEXT: mulps %xmm7, %xmm3 +; WIN64-NEXT: subps %xmm3, %xmm15 +; WIN64-NEXT: addps %xmm8, %xmm12 +; WIN64-NEXT: addps %xmm9, %xmm13 +; WIN64-NEXT: addps %xmm10, %xmm14 +; WIN64-NEXT: addps %xmm11, %xmm15 +; WIN64-NEXT: movaps %xmm12, %xmm0 +; WIN64-NEXT: movaps %xmm13, %xmm1 +; WIN64-NEXT: movaps %xmm14, %xmm2 +; WIN64-NEXT: movaps %xmm15, %xmm3 +; WIN64-NEXT: movaps (%rsp), %xmm12 # 16-byte Reload +; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload +; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload +; WIN64-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload +; WIN64-NEXT: addq $72, %rsp +; WIN64-NEXT: retq +; +; LINUXOSX-LABEL: testf32_inp: +; LINUXOSX: # %bb.0: +; LINUXOSX-NEXT: movaps %xmm15, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX-NEXT: movaps %xmm14, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX-NEXT: movaps %xmm13, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX-NEXT: movaps %xmm12, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; LINUXOSX-NEXT: movaps %xmm0, %xmm12 +; LINUXOSX-NEXT: addps %xmm4, %xmm12 +; LINUXOSX-NEXT: movaps %xmm1, %xmm13 +; LINUXOSX-NEXT: addps %xmm5, %xmm13 +; LINUXOSX-NEXT: movaps %xmm2, %xmm14 +; LINUXOSX-NEXT: addps %xmm6, %xmm14 +; LINUXOSX-NEXT: movaps %xmm3, %xmm15 +; LINUXOSX-NEXT: addps %xmm7, %xmm15 +; LINUXOSX-NEXT: mulps %xmm4, %xmm0 +; LINUXOSX-NEXT: subps %xmm0, %xmm12 +; LINUXOSX-NEXT: mulps %xmm5, %xmm1 +; LINUXOSX-NEXT: subps %xmm1, %xmm13 +; LINUXOSX-NEXT: mulps %xmm6, %xmm2 +; LINUXOSX-NEXT: subps %xmm2, %xmm14 +; LINUXOSX-NEXT: mulps %xmm7, %xmm3 +; LINUXOSX-NEXT: subps %xmm3, %xmm15 +; LINUXOSX-NEXT: addps %xmm8, %xmm12 +; LINUXOSX-NEXT: addps %xmm9, %xmm13 +; LINUXOSX-NEXT: addps %xmm10, %xmm14 +; LINUXOSX-NEXT: addps %xmm11, %xmm15 +; LINUXOSX-NEXT: movaps %xmm12, %xmm0 +; LINUXOSX-NEXT: movaps %xmm13, %xmm1 +; LINUXOSX-NEXT: movaps %xmm14, %xmm2 +; LINUXOSX-NEXT: movaps %xmm15, %xmm3 +; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm12 # 16-byte Reload +; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm13 # 16-byte Reload +; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm14 # 16-byte Reload +; LINUXOSX-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm15 # 16-byte Reload +; LINUXOSX-NEXT: retq + %x1 = fadd <16 x float> %a, %b + %x2 = fmul <16 x float> %a, %b + %x3 = fsub <16 x float> %x1, %x2 + %x4 = fadd <16 x float> %x3, %c + ret <16 x float> %x4 +} + +;test calling conventions - input parameters, callee saved GPRs +define x86_regcallcc i32 @testi32_inp(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, i32 %a6, +; WIN32-LABEL: testi32_inp: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp +; WIN32-NEXT: pushl %ebx +; WIN32-NEXT: subl $8, %esp +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: movl %edi, %esi +; WIN32-NEXT: movl %edx, (%esp) # 4-byte Spill +; WIN32-NEXT: movl %ecx, %edi +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: leal (%esi,%eax), %ecx +; WIN32-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; WIN32-NEXT: movl %esi, %ecx +; WIN32-NEXT: subl %eax, %ecx +; WIN32-NEXT: movl %edi, %eax +; WIN32-NEXT: subl %edx, %eax +; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: imull %eax, %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: movl %esi, %eax +; WIN32-NEXT: subl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: imull %ecx, %eax +; WIN32-NEXT: addl %ebx, %eax +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %ebp +; WIN32-NEXT: movl %ebp, %ebx +; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ebx +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: movl %edx, %ecx +; WIN32-NEXT: subl {{[0-9]+}}(%esp), %ecx +; WIN32-NEXT: imull %ebx, %ecx +; WIN32-NEXT: addl %eax, %ecx +; WIN32-NEXT: addl (%esp), %edi # 4-byte Folded Reload +; WIN32-NEXT: addl {{[0-9]+}}(%esp), %ebp +; WIN32-NEXT: movl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: addl {{[0-9]+}}(%esp), %eax +; WIN32-NEXT: imull %eax, %edi +; WIN32-NEXT: addl {{[0-9]+}}(%esp), %esi +; WIN32-NEXT: imull {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; WIN32-NEXT: addl %esi, %edi +; WIN32-NEXT: addl {{[0-9]+}}(%esp), %edx +; WIN32-NEXT: imull %ebp, %edx +; WIN32-NEXT: addl %edx, %edi +; WIN32-NEXT: addl %ecx, %edi +; WIN32-NEXT: movl %edi, %ecx +; WIN32-NEXT: addl $8, %esp +; WIN32-NEXT: popl %ebx +; WIN32-NEXT: popl %ebp +; WIN32-NEXT: retl +; +; WIN64-LABEL: testi32_inp: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rbx +; WIN64-NEXT: # kill: def $edx killed $edx def $rdx +; WIN64-NEXT: # kill: def $esi killed $esi def $rsi +; WIN64-NEXT: # kill: def $r14d killed $r14d def $r14 +; WIN64-NEXT: # kill: def $r12d killed $r12d def $r12 +; WIN64-NEXT: # kill: def $r11d killed $r11d def $r11 +; WIN64-NEXT: # kill: def $r9d killed $r9d def $r9 +; WIN64-NEXT: # kill: def $r8d killed $r8d def $r8 +; WIN64-NEXT: # kill: def $edi killed $edi def $rdi +; WIN64-NEXT: leal (%rdx,%rdi), %ebx +; WIN64-NEXT: # kill: def $edx killed $edx killed $rdx +; WIN64-NEXT: subl %edi, %edx +; WIN64-NEXT: leal (%rsi,%r8), %edi +; WIN64-NEXT: # kill: def $esi killed $esi killed $rsi +; WIN64-NEXT: subl %r8d, %esi +; WIN64-NEXT: leal (%r9,%r11), %r8d +; WIN64-NEXT: # kill: def $r9d killed $r9d killed $r9 +; WIN64-NEXT: subl %r11d, %r9d +; WIN64-NEXT: movl %eax, %r11d +; WIN64-NEXT: subl %ecx, %r11d +; WIN64-NEXT: imull %r11d, %r9d +; WIN64-NEXT: leal (%r12,%r14), %r11d +; WIN64-NEXT: # kill: def $r12d killed $r12d killed $r12 +; WIN64-NEXT: subl %r14d, %r12d +; WIN64-NEXT: imull %edx, %r12d +; WIN64-NEXT: movl {{[0-9]+}}(%rsp), %edx +; WIN64-NEXT: addl %r9d, %r12d +; WIN64-NEXT: movl %r15d, %r9d +; WIN64-NEXT: subl %edx, %r9d +; WIN64-NEXT: imull %esi, %r9d +; WIN64-NEXT: addl %r12d, %r9d +; WIN64-NEXT: addl %ecx, %eax +; WIN64-NEXT: imull %r8d, %eax +; WIN64-NEXT: imull %ebx, %r11d +; WIN64-NEXT: addl %r11d, %eax +; WIN64-NEXT: addl %r15d, %edx +; WIN64-NEXT: imull %edi, %edx +; WIN64-NEXT: addl %edx, %eax +; WIN64-NEXT: addl %r9d, %eax +; WIN64-NEXT: popq %rbx +; WIN64-NEXT: retq +; +; LINUXOSX-LABEL: testi32_inp: +; LINUXOSX: # %bb.0: +; LINUXOSX-NEXT: # kill: def $edx killed $edx def $rdx +; LINUXOSX-NEXT: # kill: def $esi killed $esi def $rsi +; LINUXOSX-NEXT: # kill: def $r14d killed $r14d def $r14 +; LINUXOSX-NEXT: # kill: def $r13d killed $r13d def $r13 +; LINUXOSX-NEXT: # kill: def $r12d killed $r12d def $r12 +; LINUXOSX-NEXT: # kill: def $r9d killed $r9d def $r9 +; LINUXOSX-NEXT: # kill: def $r8d killed $r8d def $r8 +; LINUXOSX-NEXT: # kill: def $edi killed $edi def $rdi +; LINUXOSX-NEXT: leal (%rdx,%rdi), %r10d +; LINUXOSX-NEXT: # kill: def $edx killed $edx killed $rdx +; LINUXOSX-NEXT: subl %edi, %edx +; LINUXOSX-NEXT: leal (%rsi,%r8), %edi +; LINUXOSX-NEXT: # kill: def $esi killed $esi killed $rsi +; LINUXOSX-NEXT: subl %r8d, %esi +; LINUXOSX-NEXT: leal (%r9,%r12), %r8d +; LINUXOSX-NEXT: # kill: def $r9d killed $r9d killed $r9 +; LINUXOSX-NEXT: subl %r12d, %r9d +; LINUXOSX-NEXT: movl %eax, %r11d +; LINUXOSX-NEXT: subl %ecx, %r11d +; LINUXOSX-NEXT: imull %r11d, %r9d +; LINUXOSX-NEXT: leal (%r13,%r14), %r11d +; LINUXOSX-NEXT: movl %r13d, %r12d +; LINUXOSX-NEXT: subl %r14d, %r12d +; LINUXOSX-NEXT: imull %edx, %r12d +; LINUXOSX-NEXT: movl {{[0-9]+}}(%rsp), %edx +; LINUXOSX-NEXT: addl %r9d, %r12d +; LINUXOSX-NEXT: movl %r15d, %r9d +; LINUXOSX-NEXT: subl %edx, %r9d +; LINUXOSX-NEXT: imull %esi, %r9d +; LINUXOSX-NEXT: addl %r12d, %r9d +; LINUXOSX-NEXT: addl %ecx, %eax +; LINUXOSX-NEXT: imull %r8d, %eax +; LINUXOSX-NEXT: imull %r10d, %r11d +; LINUXOSX-NEXT: addl %r11d, %eax +; LINUXOSX-NEXT: addl %r15d, %edx +; LINUXOSX-NEXT: imull %edi, %edx +; LINUXOSX-NEXT: addl %edx, %eax +; LINUXOSX-NEXT: addl %r9d, %eax +; LINUXOSX-NEXT: retq + i32 %b1, i32 %b2, i32 %b3, i32 %b4, i32 %b5, i32 %b6) nounwind { + %x1 = sub i32 %a1, %a2 + %x2 = sub i32 %a3, %a4 + %x3 = sub i32 %a5, %a6 + %y1 = sub i32 %b1, %b2 + %y2 = sub i32 %b3, %b4 + %y3 = sub i32 %b5, %b6 + %v1 = add i32 %a1, %a2 + %v2 = add i32 %a3, %a4 + %v3 = add i32 %a5, %a6 + %w1 = add i32 %b1, %b2 + %w2 = add i32 %b3, %b4 + %w3 = add i32 %b5, %b6 + %s1 = mul i32 %x1, %y1 + %s2 = mul i32 %x2, %y2 + %s3 = mul i32 %x3, %y3 + %t1 = mul i32 %v1, %w1 + %t2 = mul i32 %v2, %w2 + %t3 = mul i32 %v3, %w3 + %m1 = add i32 %s1, %s2 + %m2 = add i32 %m1, %s3 + %n1 = add i32 %t1, %t2 + %n2 = add i32 %n1, %t3 + %r1 = add i32 %m2, %n2 + ret i32 %r1 +} + +; Test that parameters, overflowing register capacity, are passed through the stack +define x86_regcallcc <32 x float> @testf32_stack(<32 x float> %a, <32 x float> %b, <32 x float> %c) nounwind { +; WIN32-LABEL: testf32_stack: +; WIN32: # %bb.0: +; WIN32-NEXT: pushl %ebp +; WIN32-NEXT: movl %esp, %ebp +; WIN32-NEXT: andl $-16, %esp +; WIN32-NEXT: subl $48, %esp +; WIN32-NEXT: movaps %xmm7, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; WIN32-NEXT: movaps %xmm6, (%esp) # 16-byte Spill +; WIN32-NEXT: movaps %xmm5, %xmm6 +; WIN32-NEXT: movaps %xmm4, %xmm5 +; WIN32-NEXT: movaps %xmm3, %xmm4 +; WIN32-NEXT: movaps %xmm2, %xmm3 +; WIN32-NEXT: movaps %xmm1, %xmm2 +; WIN32-NEXT: movaps %xmm0, %xmm1 +; WIN32-NEXT: movups 120(%ebp), %xmm7 +; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload +; WIN32-NEXT: addps %xmm7, %xmm0 +; WIN32-NEXT: movups 248(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm0 +; WIN32-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill +; WIN32-NEXT: movups 104(%ebp), %xmm7 +; WIN32-NEXT: movaps (%esp), %xmm0 # 16-byte Reload +; WIN32-NEXT: addps %xmm7, %xmm0 +; WIN32-NEXT: movups 232(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm0 +; WIN32-NEXT: movaps %xmm0, (%esp) # 16-byte Spill +; WIN32-NEXT: movups 88(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm6 +; WIN32-NEXT: movups 216(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm6 +; WIN32-NEXT: movups 72(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm5 +; WIN32-NEXT: movups 200(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm5 +; WIN32-NEXT: movups 56(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm4 +; WIN32-NEXT: movups 184(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm4 +; WIN32-NEXT: movups 40(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm3 +; WIN32-NEXT: movups 168(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm3 +; WIN32-NEXT: movups 24(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm2 +; WIN32-NEXT: movups 152(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm2 +; WIN32-NEXT: movups 8(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm1 +; WIN32-NEXT: movups 136(%ebp), %xmm7 +; WIN32-NEXT: addps %xmm7, %xmm1 +; WIN32-NEXT: movaps %xmm1, %xmm0 +; WIN32-NEXT: movaps %xmm2, %xmm1 +; WIN32-NEXT: movaps %xmm3, %xmm2 +; WIN32-NEXT: movaps %xmm4, %xmm3 +; WIN32-NEXT: movaps %xmm5, %xmm4 +; WIN32-NEXT: movaps %xmm6, %xmm5 +; WIN32-NEXT: movaps (%esp), %xmm6 # 16-byte Reload +; WIN32-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm7 # 16-byte Reload +; WIN32-NEXT: movl %ebp, %esp +; WIN32-NEXT: popl %ebp +; WIN32-NEXT: retl +; +; WIN64-LABEL: testf32_stack: +; WIN64: # %bb.0: +; WIN64-NEXT: pushq %rax +; WIN64-NEXT: addps %xmm15, %xmm7 +; WIN64-NEXT: addps %xmm14, %xmm6 +; WIN64-NEXT: addps %xmm13, %xmm5 +; WIN64-NEXT: addps %xmm12, %xmm4 +; WIN64-NEXT: addps %xmm11, %xmm3 +; WIN64-NEXT: addps %xmm10, %xmm2 +; WIN64-NEXT: addps %xmm9, %xmm1 +; WIN64-NEXT: addps %xmm8, %xmm0 +; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm0 +; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm1 +; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm2 +; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm3 +; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm4 +; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm5 +; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm6 +; WIN64-NEXT: addps {{[0-9]+}}(%rsp), %xmm7 +; WIN64-NEXT: popq %rax +; WIN64-NEXT: retq +; +; LINUXOSX-LABEL: testf32_stack: +; LINUXOSX: # %bb.0: +; LINUXOSX-NEXT: addps %xmm15, %xmm7 +; LINUXOSX-NEXT: addps %xmm14, %xmm6 +; LINUXOSX-NEXT: addps %xmm13, %xmm5 +; LINUXOSX-NEXT: addps %xmm12, %xmm4 +; LINUXOSX-NEXT: addps %xmm11, %xmm3 +; LINUXOSX-NEXT: addps %xmm10, %xmm2 +; LINUXOSX-NEXT: addps %xmm9, %xmm1 +; LINUXOSX-NEXT: addps %xmm8, %xmm0 +; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm0 +; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm1 +; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm2 +; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm3 +; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm4 +; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm5 +; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm6 +; LINUXOSX-NEXT: addps {{[0-9]+}}(%rsp), %xmm7 +; LINUXOSX-NEXT: retq + %x1 = fadd <32 x float> %a, %b + %x2 = fadd <32 x float> %x1, %c + ret <32 x float> %x2 +} + +!llvm.module.flags = !{!0} +!0 = !{i32 4, !"RegCallv4", i32 1}