Skip to content

Commit

Permalink
[clang] Corrections for target_clones multiversion functions.
Browse files Browse the repository at this point in the history
This change merges code for emit of target and target_clones multiversion
resolver functions and, in doing so, corrects handling of target_clones
functions that are declared but not defined. Previously, a use of such
a target_clones function would result in an attempted emit of an ifunc
that referenced an undefined resolver function. Ifunc references to
undefined resolver functions are not allowed and, when the LLVM verifier
is not disabled (via '-disable-llvm-verifier'), resulted in the verifier
issuing a "IFunc resolver must be a definition" error and aborting the
compilation. With this change, ifuncs and resolver function definitions
are always emitted for used target_clones functions regardless of whether
the target_clones function is defined (if the function is defined, then
the ifunc and resolver are emitted regardless of whether the function is
used).

This change has the side effect of causing target_clones variants and
resolver functions to be emitted in a different order than they were
previously. This is harmless and is reflected in the updated tests.

Reviewed By: erichkeane

Differential Revision: https://reviews.llvm.org/D122958
  • Loading branch information
tahonermann committed Apr 5, 2022
1 parent 40af8df commit 5531aba
Show file tree
Hide file tree
Showing 4 changed files with 119 additions and 118 deletions.
133 changes: 48 additions & 85 deletions clang/lib/CodeGen/CodeGenModule.cpp
Expand Up @@ -3320,13 +3320,13 @@ void CodeGenModule::EmitMultiVersionFunctionDefinition(GlobalDecl GD,
auto *Spec = FD->getAttr<CPUSpecificAttr>();
for (unsigned I = 0; I < Spec->cpus_size(); ++I)
EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
// Requires multiple emits.
} else if (FD->isTargetClonesMultiVersion()) {
auto *Clone = FD->getAttr<TargetClonesAttr>();
for (unsigned I = 0; I < Clone->featuresStrs_size(); ++I)
if (Clone->isFirstOfVersion(I))
EmitGlobalFunctionDefinition(GD.getWithMultiVersionIndex(I), nullptr);
EmitTargetClonesResolver(GD);
// Ensure that the resolver function is also emitted.
GetOrCreateMultiVersionResolver(GD);
} else
EmitGlobalFunctionDefinition(GD, GV);
}
Expand Down Expand Up @@ -3408,57 +3408,6 @@ llvm::GlobalValue::LinkageTypes getMultiversionLinkage(CodeGenModule &CGM,
return llvm::GlobalValue::WeakODRLinkage;
}

void CodeGenModule::EmitTargetClonesResolver(GlobalDecl GD) {
const auto *FD = cast<FunctionDecl>(GD.getDecl());
assert(FD && "Not a FunctionDecl?");
const auto *TC = FD->getAttr<TargetClonesAttr>();
assert(TC && "Not a target_clones Function?");

llvm::Function *ResolverFunc;
if (getTarget().supportsIFunc()) {
auto *IFunc = cast<llvm::GlobalIFunc>(GetOrCreateMultiVersionResolver(GD));
ResolverFunc = cast<llvm::Function>(IFunc->getResolver());
} else
ResolverFunc = cast<llvm::Function>(GetOrCreateMultiVersionResolver(GD));

SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
for (unsigned VersionIndex = 0; VersionIndex < TC->featuresStrs_size();
++VersionIndex) {
if (!TC->isFirstOfVersion(VersionIndex))
continue;
StringRef Version = TC->getFeatureStr(VersionIndex);
StringRef MangledName =
getMangledName(GD.getWithMultiVersionIndex(VersionIndex));
llvm::Constant *Func = GetGlobalValue(MangledName);
assert(Func &&
"Should have already been created before calling resolver emit");

StringRef Architecture;
llvm::SmallVector<StringRef, 1> Feature;

if (Version.startswith("arch="))
Architecture = Version.drop_front(sizeof("arch=") - 1);
else if (Version != "default")
Feature.push_back(Version);

Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature);
}

if (supportsCOMDAT())
ResolverFunc->setComdat(
getModule().getOrInsertComdat(ResolverFunc->getName()));

const TargetInfo &TI = getTarget();
std::stable_sort(
Options.begin(), Options.end(),
[&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
const CodeGenFunction::MultiVersionResolverOption &RHS) {
return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS);
});
CodeGenFunction CGF(*this);
CGF.EmitMultiVersionResolver(ResolverFunc, Options);
}

void CodeGenModule::emitMultiVersionFunctions() {
std::vector<GlobalDecl> MVFuncsToEmit;
MultiVersionFuncs.swap(MVFuncsToEmit);
Expand Down Expand Up @@ -3495,26 +3444,58 @@ void CodeGenModule::emitMultiVersionFunctions() {
Options.emplace_back(cast<llvm::Function>(Func),
TA->getArchitecture(), Feats);
});
} else if (FD->isTargetClonesMultiVersion()) {
const auto *TC = FD->getAttr<TargetClonesAttr>();
for (unsigned VersionIndex = 0; VersionIndex < TC->featuresStrs_size();
++VersionIndex) {
if (!TC->isFirstOfVersion(VersionIndex))
continue;
GlobalDecl CurGD{(FD->isDefined() ? FD->getDefinition() : FD),
VersionIndex};
StringRef Version = TC->getFeatureStr(VersionIndex);
StringRef MangledName = getMangledName(CurGD);
llvm::Constant *Func = GetGlobalValue(MangledName);
if (!Func) {
if (FD->isDefined()) {
EmitGlobalFunctionDefinition(CurGD, nullptr);
Func = GetGlobalValue(MangledName);
} else {
const CGFunctionInfo &FI =
getTypes().arrangeGlobalDeclaration(CurGD);
llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
Func = GetAddrOfFunction(CurGD, Ty, /*ForVTable=*/false,
/*DontDefer=*/false, ForDefinition);
}
assert(Func && "This should have just been created");
}

StringRef Architecture;
llvm::SmallVector<StringRef, 1> Feature;

if (Version.startswith("arch="))
Architecture = Version.drop_front(sizeof("arch=") - 1);
else if (Version != "default")
Feature.push_back(Version);

Options.emplace_back(cast<llvm::Function>(Func), Architecture, Feature);
}
} else {
assert(0 && "Expected a target multiversion function");
assert(0 && "Expected a target or target_clones multiversion function");
continue;
}

llvm::Function *ResolverFunc;
const TargetInfo &TI = getTarget();
llvm::Constant *ResolverConstant = GetOrCreateMultiVersionResolver(GD);
if (auto *IFunc = dyn_cast<llvm::GlobalIFunc>(ResolverConstant))
ResolverConstant = IFunc->getResolver();
llvm::Function *ResolverFunc = cast<llvm::Function>(ResolverConstant);

if (TI.supportsIFunc() || FD->isTargetMultiVersion()) {
ResolverFunc = cast<llvm::Function>(
GetGlobalValue((getMangledName(GD) + ".resolver").str()));
ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD));
} else {
ResolverFunc = cast<llvm::Function>(GetGlobalValue(getMangledName(GD)));
}
ResolverFunc->setLinkage(getMultiversionLinkage(*this, GD));

if (supportsCOMDAT())
ResolverFunc->setComdat(
getModule().getOrInsertComdat(ResolverFunc->getName()));

const TargetInfo &TI = getTarget();
llvm::stable_sort(
Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS,
const CodeGenFunction::MultiVersionResolverOption &RHS) {
Expand Down Expand Up @@ -3676,35 +3657,17 @@ llvm::Constant *CodeGenModule::GetOrCreateMultiVersionResolver(GlobalDecl GD) {
else if (FD->isTargetMultiVersion())
ResolverName += ".resolver";

// If this already exists, just return that one.
// If the resolver has already been created, just return it.
if (llvm::GlobalValue *ResolverGV = GetGlobalValue(ResolverName))
return ResolverGV;

const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
llvm::FunctionType *DeclTy = getTypes().GetFunctionType(FI);

// Since this is the first time we've created this IFunc, make sure
// that we put this multiversioned function into the list to be
// replaced later if necessary (target multiversioning only).
if (FD->isTargetMultiVersion())
// The resolver needs to be created. For target and target_clones, defer
// creation until the end of the TU.
if (FD->isTargetMultiVersion() || FD->isTargetClonesMultiVersion())
MultiVersionFuncs.push_back(GD);
else if (FD->isTargetClonesMultiVersion()) {
// In target_clones multiversioning, make sure we emit this if used.
auto DDI =
DeferredDecls.find(getMangledName(GD.getWithMultiVersionIndex(0)));
if (DDI != DeferredDecls.end()) {
addDeferredDeclToEmit(GD);
DeferredDecls.erase(DDI);
} else {
// Emit the symbol of the 1st variant, so that the deferred decls know we
// need it, otherwise the only global value will be the resolver/ifunc,
// which end up getting broken if we search for them with GetGlobalValue'.
GetOrCreateLLVMFunction(
getMangledName(GD.getWithMultiVersionIndex(0)), DeclTy, FD,
/*ForVTable=*/false, /*DontDefer=*/true,
/*IsThunk=*/false, llvm::AttributeList(), ForDefinition);
}
}

// For cpu_specific, don't create an ifunc yet because we don't know if the
// cpu_dispatch will be emitted in this translation unit.
Expand Down
1 change: 0 additions & 1 deletion clang/lib/CodeGen/CodeGenModule.h
Expand Up @@ -1505,7 +1505,6 @@ class CodeGenModule : public CodeGenTypeCache {
void EmitAliasDefinition(GlobalDecl GD);
void emitIFuncDefinition(GlobalDecl GD);
void emitCPUDispatchDefinition(GlobalDecl GD);
void EmitTargetClonesResolver(GlobalDecl GD);
void EmitObjCPropertyImplementations(const ObjCImplementationDecl *D);
void EmitObjCIvarInitializations(ObjCImplementationDecl *D);

Expand Down
67 changes: 51 additions & 16 deletions clang/test/CodeGen/attr-target-clones.c
Expand Up @@ -18,30 +18,31 @@
// LINUX: @unused.ifunc = weak_odr ifunc void (), void ()* ()* @unused.resolver
// LINUX: @foo_inline.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo_inline.resolver
// LINUX: @foo_inline2.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo_inline2.resolver
// LINUX: @foo_used_no_defn.ifunc = weak_odr ifunc i32 (), i32 ()* ()* @foo_used_no_defn.resolver

int __attribute__((target_clones("sse4.2, default"))) foo(void) { return 0; }
// LINUX: define {{.*}}i32 @foo.sse4.2.0()
// LINUX: define {{.*}}i32 @foo.default.1()
// LINUX: define i32 ()* @foo.resolver() comdat
// LINUX: define weak_odr i32 ()* @foo.resolver() comdat
// LINUX: ret i32 ()* @foo.sse4.2.0
// LINUX: ret i32 ()* @foo.default.1

// WINDOWS: define dso_local i32 @foo.sse4.2.0()
// WINDOWS: define dso_local i32 @foo.default.1()
// WINDOWS: define dso_local i32 @foo() comdat
// WINDOWS: define weak_odr dso_local i32 @foo() comdat
// WINDOWS: musttail call i32 @foo.sse4.2.0
// WINDOWS: musttail call i32 @foo.default.1

__attribute__((target_clones("default,default ,sse4.2"))) void foo_dupes(void) {}
// LINUX: define {{.*}}void @foo_dupes.default.1()
// LINUX: define {{.*}}void @foo_dupes.sse4.2.0()
// LINUX: define void ()* @foo_dupes.resolver() comdat
// LINUX: define weak_odr void ()* @foo_dupes.resolver() comdat
// LINUX: ret void ()* @foo_dupes.sse4.2.0
// LINUX: ret void ()* @foo_dupes.default.1

// WINDOWS: define dso_local void @foo_dupes.default.1()
// WINDOWS: define dso_local void @foo_dupes.sse4.2.0()
// WINDOWS: define dso_local void @foo_dupes() comdat
// WINDOWS: define weak_odr dso_local void @foo_dupes() comdat
// WINDOWS: musttail call void @foo_dupes.sse4.2.0
// WINDOWS: musttail call void @foo_dupes.default.1

Expand All @@ -64,13 +65,13 @@ int bar(void) {
void __attribute__((target_clones("default, arch=ivybridge"))) unused(void) {}
// LINUX: define {{.*}}void @unused.default.1()
// LINUX: define {{.*}}void @unused.arch_ivybridge.0()
// LINUX: define void ()* @unused.resolver() comdat
// LINUX: define weak_odr void ()* @unused.resolver() comdat
// LINUX: ret void ()* @unused.arch_ivybridge.0
// LINUX: ret void ()* @unused.default.1

// WINDOWS: define dso_local void @unused.default.1()
// WINDOWS: define dso_local void @unused.arch_ivybridge.0()
// WINDOWS: define dso_local void @unused() comdat
// WINDOWS: define weak_odr dso_local void @unused() comdat
// WINDOWS: musttail call void @unused.arch_ivybridge.0
// WINDOWS: musttail call void @unused.default.1

Expand All @@ -90,46 +91,80 @@ int bar3(void) {
// WINDOWS: call i32 @foo_inline2()
}

// Deferred emission of foo_inline, which got delayed because it is inline.
// LINUX: define i32 ()* @foo_inline.resolver() comdat
// LINUX: define weak_odr i32 ()* @foo_inline.resolver() comdat
// LINUX: ret i32 ()* @foo_inline.arch_sandybridge.0
// LINUX: ret i32 ()* @foo_inline.sse4.2.1
// LINUX: ret i32 ()* @foo_inline.default.2

// WINDOWS: define dso_local i32 @foo_inline() comdat
// WINDOWS: define weak_odr dso_local i32 @foo_inline() comdat
// WINDOWS: musttail call i32 @foo_inline.arch_sandybridge.0
// WINDOWS: musttail call i32 @foo_inline.sse4.2.1
// WINDOWS: musttail call i32 @foo_inline.default.2

inline int __attribute__((target_clones("arch=sandybridge,default,sse4.2")))
foo_inline2(void){ return 0; }
// LINUX: define linkonce i32 @foo_inline2.arch_sandybridge.0() #[[SB:[0-9]+]]
// LINUX: define i32 ()* @foo_inline2.resolver() comdat
// LINUX: define weak_odr i32 ()* @foo_inline2.resolver() comdat
// LINUX: ret i32 ()* @foo_inline2.arch_sandybridge.0
// LINUX: ret i32 ()* @foo_inline2.sse4.2.1
// LINUX: ret i32 ()* @foo_inline2.default.2

// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.arch_sandybridge.0() #[[SB:[0-9]+]]
// WINDOWS: define dso_local i32 @foo_inline2() comdat
// WINDOWS: define weak_odr dso_local i32 @foo_inline2() comdat
// WINDOWS: musttail call i32 @foo_inline2.arch_sandybridge.0
// WINDOWS: musttail call i32 @foo_inline2.sse4.2.1
// WINDOWS: musttail call i32 @foo_inline2.default.2

// LINUX: define linkonce i32 @foo_inline.arch_sandybridge.0() #[[SB]]
// LINUX: define linkonce i32 @foo_inline.default.2() #[[DEF]]

int __attribute__((target_clones("default", "sse4.2")))
foo_unused_no_defn(void);

int __attribute__((target_clones("default", "sse4.2")))
foo_used_no_defn(void);

int test_foo_used_no_defn(void) {
// LINUX: define {{.*}}i32 @test_foo_used_no_defn()
// WINDOWS: define dso_local i32 @test_foo_used_no_defn()
return foo_used_no_defn();
// LINUX: call i32 @foo_used_no_defn.ifunc()
// WINDOWS: call i32 @foo_used_no_defn()
}


// LINUX: define weak_odr i32 ()* @foo_used_no_defn.resolver() comdat
// LINUX: ret i32 ()* @foo_used_no_defn.sse4.2.0
// LINUX: ret i32 ()* @foo_used_no_defn.default.1

// WINDOWS: define weak_odr dso_local i32 @foo_used_no_defn() comdat
// WINDOWS: musttail call i32 @foo_used_no_defn.sse4.2.0
// WINDOWS: musttail call i32 @foo_used_no_defn.default.1


// Deferred emission of inline definitions.

// LINUX: define linkonce i32 @foo_inline.arch_sandybridge.0() #[[SB:[0-9]+]]
// LINUX: define linkonce i32 @foo_inline.default.2() #[[DEF:[0-9]+]]
// LINUX: define linkonce i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]]

// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.arch_sandybridge.0() #[[SB]]
// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.arch_sandybridge.0() #[[SB:[0-9]+]]
// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.default.2() #[[DEF]]
// WINDOWS: define linkonce_odr dso_local i32 @foo_inline.sse4.2.1() #[[SSE42:[0-9]+]]


// LINUX: define linkonce i32 @foo_inline2.arch_sandybridge.0() #[[SB]]
// LINUX: define linkonce i32 @foo_inline2.default.2() #[[DEF]]
// LINUX: define linkonce i32 @foo_inline2.sse4.2.1() #[[SSE42]]

// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.arch_sandybridge.0() #[[SB]]
// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.default.2() #[[DEF]]
// WINDOWS: define linkonce_odr dso_local i32 @foo_inline2.sse4.2.1() #[[SSE42]]


// LINUX: declare i32 @foo_used_no_defn.default.1()
// LINUX: declare i32 @foo_used_no_defn.sse4.2.0()

// WINDOWS: declare dso_local i32 @foo_used_no_defn.default.1()
// WINDOWS: declare dso_local i32 @foo_used_no_defn.sse4.2.0()


// CHECK: attributes #[[SSE42]] =
// CHECK-SAME: "target-features"="+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87"
// CHECK: attributes #[[DEF]] =
Expand Down

0 comments on commit 5531aba

Please sign in to comment.