diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 828dd10e3d6db..89ea2f0930cec 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -651,6 +651,9 @@ Bug Fixes in This Version - Fixed false positive error emitted by clang when performing qualified name lookup and the current class instantiation has dependent bases. Fixes (`#13826 `_) +- Fix the name of the ifunc symbol emitted for multiversion functions declared with the + ``target_clones`` attribute. This addresses a linker error that would otherwise occur + when these functions are referenced from other TUs. - Fixes compile error that double colon operator cannot resolve macro with parentheses. Fixes (`#64467 `_) - Clang's ``-Wchar-subscripts`` no longer warns on chars whose values are known non-negative constants. diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 53c3dff19fc9b..bbe4de94cbabe 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2552,6 +2552,13 @@ example, the following will emit 4 versions of the function: __attribute__((target_clones("arch=atom,avx2","arch=ivybridge","default"))) void foo() {} +For targets that support the GNU indirect function (IFUNC) feature, dispatch +is performed by emitting an indirect function that is resolved to the appropriate +target clone at load time. The indirect function is given the name the +multiversioned function would have if it had been declared without the attribute. +For backward compatibility with earlier Clang releases, a function alias with an +``.ifunc`` suffix is also emitted. The ``.ifunc`` suffixed symbol is a deprecated +feature and support for it may be removed in the future. }]; } diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index dea58a7ff4146..6a20723bf2bca 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4178,8 +4178,29 @@ void CodeGenModule::emitMultiVersionFunctions() { } llvm::Constant *ResolverConstant = GetOrCreateMultiVersionResolver(GD); - if (auto *IFunc = dyn_cast(ResolverConstant)) + if (auto *IFunc = dyn_cast(ResolverConstant)) { ResolverConstant = IFunc->getResolver(); + // In Aarch64, default versions of multiversioned functions are mangled to + // their 'normal' assembly name. This deviates from other targets which + // append a '.default' string. As a result we need to continue appending + // .ifunc in Aarch64. + // FIXME: Should Aarch64 mangling for 'default' multiversion function and + // in turn ifunc function match that of other targets? + if (FD->isTargetClonesMultiVersion() && + !getTarget().getTriple().isAArch64()) { + const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD); + llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI); + std::string MangledName = getMangledNameImpl( + *this, GD, FD, /*OmitMultiVersionMangling=*/true); + // In prior versions of Clang, the mangling for ifuncs incorrectly + // included an .ifunc suffix. This alias is generated for backward + // compatibility. It is deprecated, and may be removed in the future. + auto *Alias = llvm::GlobalAlias::create( + DeclTy, 0, getMultiversionLinkage(*this, GD), + MangledName + ".ifunc", IFunc, &getModule()); + SetCommonAttributes(FD, Alias); + } + } llvm::Function *ResolverFunc = cast(ResolverConstant); ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD)); @@ -4346,10 +4367,19 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) { // Holds the name of the resolver, in ifunc mode this is the ifunc (which has // a separate resolver). std::string ResolverName = MangledName; - if (getTarget().supportsIFunc()) - ResolverName += ".ifunc"; - else if (FD->isTargetMultiVersion()) + if (getTarget().supportsIFunc()) { + // In Aarch64, default versions of multiversioned functions are mangled to + // their 'normal' assembly name. This deviates from other targets which + // append a '.default' string. As a result we need to continue appending + // .ifunc in Aarch64. + // FIXME: Should Aarch64 mangling for 'default' multiversion function and + // in turn ifunc function match that of other targets? + if (!FD->isTargetClonesMultiVersion() || + getTarget().getTriple().isAArch64()) + ResolverName += ".ifunc"; + } else if (FD->isTargetMultiVersion()) { ResolverName += ".resolver"; + } // If the resolver has already been created, just return it. if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName)) diff --git a/clang/test/CodeGen/attr-target-clones.c b/clang/test/CodeGen/attr-target-clones.c index 98ffea40f56d8..4b99914031b10 100644 --- a/clang/test/CodeGen/attr-target-clones.c +++ b/clang/test/CodeGen/attr-target-clones.c @@ -16,13 +16,23 @@ // LINUX: @__cpu_model = external dso_local global { i32, i32, i32, [1 x i32] } // LINUX: @__cpu_features2 = external dso_local global [3 x i32] -// LINUX: @internal.ifunc = internal ifunc i32 (), ptr @internal.resolver -// LINUX: @foo.ifunc = weak_odr ifunc i32 (), ptr @foo.resolver -// LINUX: @foo_dupes.ifunc = weak_odr ifunc void (), ptr @foo_dupes.resolver -// LINUX: @unused.ifunc = weak_odr ifunc void (), ptr @unused.resolver -// LINUX: @foo_inline.ifunc = weak_odr ifunc i32 (), ptr @foo_inline.resolver -// LINUX: @foo_inline2.ifunc = weak_odr ifunc i32 (), ptr @foo_inline2.resolver -// LINUX: @foo_used_no_defn.ifunc = weak_odr ifunc i32 (), ptr @foo_used_no_defn.resolver +// LINUX: @internal.ifunc = internal alias i32 (), ptr @internal +// LINUX: @foo.ifunc = weak_odr alias i32 (), ptr @foo +// LINUX: @foo_dupes.ifunc = weak_odr alias void (), ptr @foo_dupes +// LINUX: @unused.ifunc = weak_odr alias void (), ptr @unused +// LINUX: @foo_inline.ifunc = weak_odr alias i32 (), ptr @foo_inline +// LINUX: @foo_inline2.ifunc = weak_odr alias i32 (), ptr @foo_inline2 +// LINUX: @foo_used_no_defn.ifunc = weak_odr alias i32 (), ptr @foo_used_no_defn +// LINUX: @isa_level.ifunc = weak_odr alias i32 (i32), ptr @isa_level + +// LINUX: @internal = internal ifunc i32 (), ptr @internal.resolver +// LINUX: @foo = weak_odr ifunc i32 (), ptr @foo.resolver +// LINUX: @foo_dupes = weak_odr ifunc void (), ptr @foo_dupes.resolver +// LINUX: @unused = weak_odr ifunc void (), ptr @unused.resolver +// LINUX: @foo_inline = weak_odr ifunc i32 (), ptr @foo_inline.resolver +// LINUX: @foo_inline2 = weak_odr ifunc i32 (), ptr @foo_inline2.resolver +// LINUX: @foo_used_no_defn = weak_odr ifunc i32 (), ptr @foo_used_no_defn.resolver +// LINUX: @isa_level = weak_odr ifunc i32 (i32), ptr @isa_level.resolver static int __attribute__((target_clones("sse4.2, default"))) internal(void) { return 0; } int use(void) { return internal(); } @@ -60,7 +70,7 @@ void bar2(void) { // LINUX: define {{.*}}void @bar2() // WINDOWS: define dso_local void @bar2() foo_dupes(); - // LINUX: call void @foo_dupes.ifunc() + // LINUX: call void @foo_dupes() // WINDOWS: call void @foo_dupes() } @@ -68,7 +78,7 @@ int bar(void) { // LINUX: define {{.*}}i32 @bar() #[[DEF:[0-9]+]] // WINDOWS: define dso_local i32 @bar() #[[DEF:[0-9]+]] return foo(); - // LINUX: call i32 @foo.ifunc() + // LINUX: call i32 @foo() // WINDOWS: call i32 @foo() } @@ -95,8 +105,8 @@ int bar3(void) { // LINUX: define {{.*}}i32 @bar3() // WINDOWS: define dso_local i32 @bar3() return foo_inline() + foo_inline2(); - // LINUX: call i32 @foo_inline.ifunc() - // LINUX: call i32 @foo_inline2.ifunc() + // LINUX: call i32 @foo_inline() + // LINUX: call i32 @foo_inline2() // WINDOWS: call i32 @foo_inline() // WINDOWS: call i32 @foo_inline2() } @@ -134,7 +144,7 @@ int test_foo_used_no_defn(void) { // LINUX: define {{.*}}i32 @test_foo_used_no_defn() // WINDOWS: define dso_local i32 @test_foo_used_no_defn() return foo_used_no_defn(); - // LINUX: call i32 @foo_used_no_defn.ifunc() + // LINUX: call i32 @foo_used_no_defn() // WINDOWS: call i32 @foo_used_no_defn() } diff --git a/clang/test/CodeGenCXX/attr-target-clones.cpp b/clang/test/CodeGenCXX/attr-target-clones.cpp index 86293b98dbbd3..fd2d38062a71e 100644 --- a/clang/test/CodeGenCXX/attr-target-clones.cpp +++ b/clang/test/CodeGenCXX/attr-target-clones.cpp @@ -1,13 +1,20 @@ // RUN: %clang_cc1 -std=c++11 -triple x86_64-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=LINUX // RUN: %clang_cc1 -std=c++11 -triple x86_64-windows-pc -emit-llvm %s -o - | FileCheck %s --check-prefix=WINDOWS +// Aliases for ifuncs +// LINUX: @_Z10overloadedi.ifunc = weak_odr alias i32 (i32), ptr @_Z10overloadedi +// LINUX: @_Z10overloadedPKc.ifunc = weak_odr alias i32 (ptr), ptr @_Z10overloadedPKc +// LINUX: @_ZN1CIssE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIssE3fooEv +// LINUX: @_ZN1CIisE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIisE3fooEv +// LINUX: @_ZN1CIdfE3fooEv.ifunc = weak_odr alias i32 (ptr), ptr @_ZN1CIdfE3fooEv + // Overloaded ifuncs -// LINUX: @_Z10overloadedi.ifunc = weak_odr ifunc i32 (i32), ptr @_Z10overloadedi.resolver -// LINUX: @_Z10overloadedPKc.ifunc = weak_odr ifunc i32 (ptr), ptr @_Z10overloadedPKc.resolver +// LINUX: @_Z10overloadedi = weak_odr ifunc i32 (i32), ptr @_Z10overloadedi.resolver +// LINUX: @_Z10overloadedPKc = weak_odr ifunc i32 (ptr), ptr @_Z10overloadedPKc.resolver // struct 'C' ifuncs, note the 'float, U' one doesn't get one. -// LINUX: @_ZN1CIssE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIssE3fooEv.resolver -// LINUX: @_ZN1CIisE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIisE3fooEv.resolver -// LINUX: @_ZN1CIdfE3fooEv.ifunc = weak_odr ifunc i32 (ptr), ptr @_ZN1CIdfE3fooEv.resolver +// LINUX: @_ZN1CIssE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIssE3fooEv.resolver +// LINUX: @_ZN1CIisE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIisE3fooEv.resolver +// LINUX: @_ZN1CIdfE3fooEv = weak_odr ifunc i32 (ptr), ptr @_ZN1CIdfE3fooEv.resolver int __attribute__((target_clones("sse4.2", "default"))) overloaded(int) { return 1; } // LINUX: define {{.*}}i32 @_Z10overloadedi.sse4.2.0(i32{{.+}}) @@ -37,10 +44,10 @@ int __attribute__((target_clones("arch=ivybridge", "default"))) overloaded(const void use_overloaded() { overloaded(1); - // LINUX: call noundef i32 @_Z10overloadedi.ifunc + // LINUX: call noundef i32 @_Z10overloadedi // WINDOWS: call noundef i32 @"?overloaded@@YAHH@Z" overloaded(nullptr); - // LINUX: call noundef i32 @_Z10overloadedPKc.ifunc + // LINUX: call noundef i32 @_Z10overloadedPKc // WINDOWS: call noundef i32 @"?overloaded@@YAHPEBD@Z" } @@ -64,11 +71,11 @@ int __attribute__((target_clones("sse4.2", "default"))) foo(){ return 3;} void uses_specialized() { C c; c.foo(); - // LINUX: call noundef i32 @_ZN1CIssE3fooEv.ifunc(ptr + // LINUX: call noundef i32 @_ZN1CIssE3fooEv(ptr // WINDOWS: call noundef i32 @"?foo@?$C@FF@@QEAAHXZ"(ptr C c2; c2.foo(); - // LINUX: call noundef i32 @_ZN1CIisE3fooEv.ifunc(ptr + // LINUX: call noundef i32 @_ZN1CIisE3fooEv(ptr // WINDOWS: call noundef i32 @"?foo@?$C@HF@@QEAAHXZ"(ptr C c3; c3.foo(); @@ -77,7 +84,7 @@ void uses_specialized() { // WINDOWS: call noundef i32 @"?foo@?$C@MF@@QEAAHXZ"(ptr C c4; c4.foo(); - // LINUX: call noundef i32 @_ZN1CIdfE3fooEv.ifunc(ptr + // LINUX: call noundef i32 @_ZN1CIdfE3fooEv(ptr // WINDOWS: call noundef i32 @"?foo@?$C@NM@@QEAAHXZ"(ptr }