Skip to content

Commit

Permalink
[X86][Regcall] Add an option to respect regcall ABI v.4 in win64&win32
Browse files Browse the repository at this point in the history
Reviewed By: pengfei

Differential Revision: https://reviews.llvm.org/D155863
  • Loading branch information
yubingex007-a11y committed Aug 3, 2023
1 parent 2772c26 commit 6ee497a
Show file tree
Hide file tree
Showing 13 changed files with 780 additions and 6 deletions.
2 changes: 2 additions & 0 deletions clang/include/clang/Basic/LangOptions.def
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,8 @@ LANGOPT(PaddingOnUnsignedFixedPoint, 1, 0,

LANGOPT(RegisterStaticDestructors, 1, 1, "Register C++ static destructors")

LANGOPT(RegCall4, 1, 0, "Set __regcall4 as a default calling convention to respect __regcall ABI v.4")

LANGOPT(MatrixTypes, 1, 0, "Enable or disable the builtin matrix type")

ENUM_LANGOPT(StrictFlexArraysLevel, StrictFlexArraysLevelKind, 2,
Expand Down
5 changes: 5 additions & 0 deletions clang/include/clang/Driver/Options.td
Original file line number Diff line number Diff line change
Expand Up @@ -4505,6 +4505,9 @@ def no_offload_add_rpath: Flag<["--"], "no-offload-add-rpath">, Flags<[NoArgumen
Alias<frtlib_add_rpath>;
def r : Flag<["-"], "r">, Flags<[LinkerInput,NoArgumentUnused]>,
Group<Link_Group>;
def regcall4 : Flag<["-"], "regcall4">, Group<m_Group>, Flags<[CC1Option]>,
HelpText<"Set __regcall4 as a default calling convention to respect __regcall ABI v.4">,
MarshallingInfoFlag<LangOpts<"RegCall4">>;
def save_temps_EQ : Joined<["-", "--"], "save-temps=">, Flags<[CC1Option, FlangOption, FC1Option, NoXarchOption]>,
HelpText<"Save intermediate compilation results.">;
def save_temps : Flag<["-", "--"], "save-temps">, Flags<[FlangOption, FC1Option, NoXarchOption]>,
Expand Down Expand Up @@ -7292,6 +7295,8 @@ def _SLASH_Gv : CLFlag<"Gv">,
HelpText<"Set __vectorcall as a default calling convention">;
def _SLASH_Gregcall : CLFlag<"Gregcall">,
HelpText<"Set __regcall as a default calling convention">;
def _SLASH_Gregcall4 : CLFlag<"Gregcall4">,
HelpText<"Set __regcall4 as a default calling convention to respect __regcall ABI v.4">;

// GNU Driver aliases

Expand Down
8 changes: 6 additions & 2 deletions clang/lib/AST/ItaniumMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1688,8 +1688,12 @@ void CXXNameMangler::mangleRegCallName(const IdentifierInfo *II) {
// <source-name> ::= <positive length number> __regcall3__ <identifier>
// <number> ::= [n] <non-negative decimal integer>
// <identifier> ::= <unqualified source code identifier>
Out << II->getLength() + sizeof("__regcall3__") - 1 << "__regcall3__"
<< II->getName();
if (getASTContext().getLangOpts().RegCall4)
Out << II->getLength() + sizeof("__regcall4__") - 1 << "__regcall4__"
<< II->getName();
else
Out << II->getLength() + sizeof("__regcall3__") - 1 << "__regcall3__"
<< II->getName();
}

void CXXNameMangler::mangleDeviceStubName(const IdentifierInfo *II) {
Expand Down
8 changes: 6 additions & 2 deletions clang/lib/AST/Mangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -198,8 +198,12 @@ void MangleContext::mangleName(GlobalDecl GD, raw_ostream &Out) {
Out << '_';
else if (CC == CCM_Fast)
Out << '@';
else if (CC == CCM_RegCall)
Out << "__regcall3__";
else if (CC == CCM_RegCall) {
if (getASTContext().getLangOpts().RegCall4)
Out << "__regcall4__";
else
Out << "__regcall3__";
}

if (!MCXX)
Out << D->getIdentifier()->getName();
Expand Down
8 changes: 7 additions & 1 deletion clang/lib/AST/MicrosoftMangle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2853,6 +2853,7 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) {
// ::= T # __attribute__((__swiftasynccall__))
// // Clang-only
// ::= w # __regcall
// ::= x # __regcall4
// The 'export' calling conventions are from a bygone era
// (*cough*Win16*cough*) when functions were declared for export with
// that keyword. (It didn't actually export them, it just made them so
Expand All @@ -2873,7 +2874,12 @@ void MicrosoftCXXNameMangler::mangleCallingConvention(CallingConv CC) {
case CC_Swift: Out << 'S'; break;
case CC_SwiftAsync: Out << 'W'; break;
case CC_PreserveMost: Out << 'U'; break;
case CC_X86RegCall: Out << 'w'; break;
case CC_X86RegCall:
if (getASTContext().getLangOpts().RegCall4)
Out << "x";
else
Out << "w";
break;
}
}
void MicrosoftCXXNameMangler::mangleCallingConvention(const FunctionType *T) {
Expand Down
7 changes: 6 additions & 1 deletion clang/lib/CodeGen/CodeGenModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1196,6 +1196,8 @@ void CodeGenModule::Release() {
getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment);
if (getCodeGenOpts().SkipRaxSetup)
getModule().addModuleFlag(llvm::Module::Override, "SkipRaxSetup", 1);
if (getLangOpts().RegCall4)
getModule().addModuleFlag(llvm::Module::Override, "RegCallv4", 1);

if (getContext().getTargetInfo().getMaxTLSAlign())
getModule().addModuleFlag(llvm::Module::Error, "MaxTLSAlign",
Expand Down Expand Up @@ -1707,7 +1709,10 @@ static std::string getMangledNameImpl(CodeGenModule &CGM, GlobalDecl GD,

if (FD &&
FD->getType()->castAs<FunctionType>()->getCallConv() == CC_X86RegCall) {
Out << "__regcall3__" << II->getName();
if (CGM.getLangOpts().RegCall4)
Out << "__regcall4__" << II->getName();
else
Out << "__regcall3__" << II->getName();
} else if (FD && FD->hasAttr<CUDAGlobalAttr>() &&
GD.getKernelReferenceKind() == KernelReferenceKind::Stub) {
Out << "__device_stub__" << II->getName();
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/Driver/ToolChains/Clang.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7936,6 +7936,9 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
CmdArgs.push_back("-fms-memptr-rep=virtual");
}

if (Args.hasArg(options::OPT_regcall4))
CmdArgs.push_back("-regcall4");

// Parse the default calling convention options.
if (Arg *CCArg =
Args.getLastArg(options::OPT__SLASH_Gd, options::OPT__SLASH_Gr,
Expand Down Expand Up @@ -7972,6 +7975,9 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType,
CmdArgs.push_back(DCCFlag);
}

if (Args.hasArg(options::OPT__SLASH_Gregcall4))
CmdArgs.push_back("-regcall4");

Args.AddLastArg(CmdArgs, options::OPT_vtordisp_mode_EQ);

if (!Args.hasArg(options::OPT_fdiagnostics_format_EQ)) {
Expand Down
7 changes: 7 additions & 0 deletions clang/test/CodeGen/check-regcall4-moduleflag.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm %s -o - | FileCheck %s -check-prefix=NO-REGCALL4
// RUN: %clang_cc1 -triple x86_64-unknown-unknown -regcall4 -emit-llvm %s -o - | FileCheck %s -check-prefix=REGCALL4

void f(void) {}

// REGCALL4: !"RegCallv4", i32 1}
// NO-REGCALL4-NOT: "RegCallv4"
100 changes: 100 additions & 0 deletions clang/test/CodeGen/regcall4.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-win32 | FileCheck %s --check-prefixes=X86,Win32
// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-win32 | FileCheck %s --check-prefixes=X64,Win64
// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=i386-pc-linux-gnu | FileCheck %s --check-prefixes=X86,Lin32
// RUN: %clang_cc1 -regcall4 -emit-llvm %s -o - -ffreestanding -triple=x86_64-pc-linux-gnu | FileCheck %s --check-prefixes=X64,Lin64

#include <xmmintrin.h>

void __regcall v1(int a, int b) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v1(i32 inreg noundef %a, i32 inreg noundef %b)
// X64: define dso_local x86_regcallcc void @__regcall4__v1(i32 noundef %a, i32 noundef %b)

void __attribute__((regcall)) v1b(int a, int b) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v1b(i32 inreg noundef %a, i32 inreg noundef %b)
// X64: define dso_local x86_regcallcc void @__regcall4__v1b(i32 noundef %a, i32 noundef %b)

void __regcall v2(char a, char b) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v2(i8 inreg noundef signext %a, i8 inreg noundef signext %b)
// Win64: define dso_local x86_regcallcc void @__regcall4__v2(i8 noundef %a, i8 noundef %b)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v2(i8 noundef signext %a, i8 noundef signext %b)

struct Small { int x; };
void __regcall v3(int a, struct Small b, int c) {}
// Win32: define dso_local x86_regcallcc void @__regcall4__v3(i32 inreg noundef %a, i32 %b.0, i32 inreg noundef %c)
// Lin32: define dso_local x86_regcallcc void @__regcall4__v3(i32 inreg noundef %a, i32 inreg %0, i32 %b.0, i32 inreg noundef %c)
// X64: define dso_local x86_regcallcc void @__regcall4__v3(i32 noundef %a, i32 %b.coerce, i32 noundef %c)

struct Large { int a[5]; };
void __regcall v4(int a, struct Large b, int c) {}
// Win32: define dso_local x86_regcallcc void @__regcall4__v4(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 inreg noundef %c)
// Lin32: define dso_local x86_regcallcc void @__regcall4__v4(i32 inreg noundef %a, ptr noundef byval(%struct.Large) align 4 %b, i32 noundef %c)
// Win64: define dso_local x86_regcallcc void @__regcall4__v4(i32 noundef %a, ptr noundef %b, i32 noundef %c)
// Lin64: define dso_local x86_regcallcc void @__regcall4__v4(i32 noundef %a, [5 x i32] %b.coerce, i32 noundef %c)

void __regcall v5(long long a, int b, int c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__v5(i64 noundef %a, i32 inreg noundef %b, i32 inreg noundef %c)
// X64: define dso_local x86_regcallcc void @__regcall4__v5(i64 noundef %a, i32 noundef %b, i32 noundef %c)

struct HFA2 { double x, y; };
struct HFA4 { double w, x, y, z; };
struct HFA5 { double v, w, x, y, z; };

void __regcall hfa1(int a, struct HFA4 b, int c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hfa1(i32 inreg noundef %a, double %b.0, double %b.1, double %b.2, double %b.3, i32 inreg noundef %c)
// X64: define dso_local x86_regcallcc void @__regcall4__hfa1(i32 noundef %a, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, i32 noundef %c)

// HFAs that would require more than six total SSE registers are passed
// indirectly. Additional vector arguments can consume the rest of the SSE
// registers.
void __regcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hfa2(double %a.0, double %a.1, double %a.2, double %a.3, double %b.0, double %b.1, double %b.2, double %b.3, ptr inreg noundef %0)
// X64: define dso_local x86_regcallcc void @__regcall4__hfa2(double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double %{{.*}}, double noundef %c)

// Ensure that we pass builtin types directly while counting them against the
// SSE register usage.
void __regcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hfa3(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double %f.0, double %f.1)
// X64: define dso_local x86_regcallcc void @__regcall4__hfa3(double noundef %a, double noundef %b, double noundef %c, double noundef %d, double noundef %e, double %{{.*}}, double %{{.*}})

// Aggregates with more than four elements are not HFAs and are passed byval(%b.3, double noundef).
// Because they are not classified as homogeneous, they don't get special
// handling to ensure alignment.
void __regcall hfa4(struct HFA5 a) {}
// X32: define dso_local x86_regcallcc void @__regcall4__hfa4(ptr noundef byval(%struct.HFA5) align 4 %{{.*}})
// Win64: define dso_local x86_regcallcc void @__regcall4__hfa4(ptr noundef %a)
// Lin64: define dso_local x86_regcallcc void @__regcall4__hfa4(double %a.coerce0, double %a.coerce1, double %a.coerce2, double %a.coerce3, double %a.coerce4)

// Return HFAs of 4 or fewer elements in registers.
static struct HFA2 g_hfa2;
struct HFA2 __regcall hfa5(void) { return g_hfa2; }
// X86: define dso_local x86_regcallcc %struct.HFA2 @__regcall4__hfa5()
// X64: define dso_local x86_regcallcc %struct.HFA2 @__regcall4__hfa5()

typedef float __attribute__((vector_size(16))) v4f32;
struct HVA2 { v4f32 x, y; };
struct HVA4 { v4f32 w, x, y, z; };

void __regcall hva1(int a, struct HVA4 b, int c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hva1(i32 inreg noundef %a, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, i32 inreg noundef %c)
// X64: define dso_local x86_regcallcc void @__regcall4__hva1(i32 noundef %a, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, i32 noundef %c)

void __regcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hva2(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, ptr inreg noundef %0)
// X64: define dso_local x86_regcallcc void @__regcall4__hva2(<4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> %{{.*}}, <4 x float> noundef %c)

void __regcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {}
// X86: define dso_local x86_regcallcc void @__regcall4__hva3(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, <4 x float> %f.0, <4 x float> %f.1)
// X64: define dso_local x86_regcallcc void @__regcall4__hva3(<4 x float> noundef %a, <4 x float> noundef %b, <4 x float> noundef %c, <4 x float> noundef %d, <4 x float> noundef %e, <4 x float> %{{.*}}, <4 x float> %{{.*}})

typedef float __attribute__((ext_vector_type(3))) v3f32;
struct OddSizeHVA { v3f32 x, y; };

void __regcall odd_size_hva(struct OddSizeHVA a) {}
// X86: define dso_local x86_regcallcc void @__regcall4__odd_size_hva(<3 x float> %a.0, <3 x float> %a.1)
// X64: define dso_local x86_regcallcc void @__regcall4__odd_size_hva(<3 x float> %{{.*}}, <3 x float> %{{.*}})

struct HFA6 { __m128 f[4]; };
struct HFA6 __regcall ret_reg_reused(struct HFA6 a, struct HFA6 b, struct HFA6 c, struct HFA6 d){ struct HFA6 h; return h;}
// X86: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, ptr inreg noundef %c, ptr inreg noundef %d)
// Win64: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused(<4 x float> %a.0, <4 x float> %a.1, <4 x float> %a.2, <4 x float> %a.3, <4 x float> %b.0, <4 x float> %b.1, <4 x float> %b.2, <4 x float> %b.3, <4 x float> %c.0, <4 x float> %c.1, <4 x float> %c.2, <4 x float> %c.3, <4 x float> %d.0, <4 x float> %d.1, <4 x float> %d.2, <4 x float> %d.3)
// Lin64: define dso_local x86_regcallcc %struct.HFA6 @__regcall4__ret_reg_reused([4 x <4 x float>] %a.coerce, [4 x <4 x float>] %b.coerce, [4 x <4 x float>] %c.coerce, [4 x <4 x float>] %d.coerce)
Loading

0 comments on commit 6ee497a

Please sign in to comment.