Skip to content

Commit 0ed6136

Browse files
author
Erich Keane
committed
Ensure target-multiversioning emits deferred declarations
As reported in PR50025, sometimes we would end up not emitting functions needed by inline multiversioned variants. This is because we typically use the 'deferred decl' mechanism to emit these. However, the variants are emitted after that typically happens. This fixes that by ensuring we re-run deferred decls after this happens. Also, the multiversion emission is done recursively to ensure that MV functions that require other MV functions to be emitted get emitted.
1 parent 83a25a1 commit 0ed6136

File tree

2 files changed

+37
-1
lines changed

2 files changed

+37
-1
lines changed

clang/lib/CodeGen/CodeGenModule.cpp

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3189,7 +3189,9 @@ TargetMVPriority(const TargetInfo &TI,
31893189
}
31903190

31913191
void CodeGenModule::emitMultiVersionFunctions() {
3192-
for (GlobalDecl GD : MultiVersionFuncs) {
3192+
std::vector<GlobalDecl> MVFuncsToEmit;
3193+
MultiVersionFuncs.swap(MVFuncsToEmit);
3194+
for (GlobalDecl GD : MVFuncsToEmit) {
31933195
SmallVector<CodeGenFunction::MultiVersionResolverOption, 10> Options;
31943196
const FunctionDecl *FD = cast<FunctionDecl>(GD.getDecl());
31953197
getContext().forEachMultiversionedFunctionVersion(
@@ -3243,6 +3245,17 @@ void CodeGenModule::emitMultiVersionFunctions() {
32433245
CodeGenFunction CGF(*this);
32443246
CGF.EmitMultiVersionResolver(ResolverFunc, Options);
32453247
}
3248+
3249+
// Ensure that any additions to the deferred decls list caused by emitting a
3250+
// variant are emitted. This can happen when the variant itself is inline and
3251+
// calls a function without linkage.
3252+
if (!MVFuncsToEmit.empty())
3253+
EmitDeferred();
3254+
3255+
// Ensure that any additions to the multiversion funcs list from either the
3256+
// deferred decls or the multiversion functions themselves are emitted.
3257+
if (!MultiVersionFuncs.empty())
3258+
emitMultiVersionFunctions();
32463259
}
32473260

32483261
void CodeGenModule::emitCPUDispatchDefinition(GlobalDecl GD) {

clang/test/CodeGen/attr-target-mv.c

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,16 @@ __attribute__((target("avx,sse4.2"))) inline void foo_used(int i, double d) {}
6666
__attribute__((target("default"))) inline void foo_used2(int i, double d) {}
6767
__attribute__((target("avx,sse4.2"), used)) inline void foo_used2(int i, double d) {}
6868

69+
// PR50025:
70+
static void must_be_emitted(void) {}
71+
inline __attribute__((target("default"))) void pr50025(void) { must_be_emitted(); }
72+
void calls_pr50025() { pr50025(); }
73+
74+
// Also need to make sure we get other multiversion functions.
75+
inline __attribute__((target("default"))) void pr50025b(void) { must_be_emitted(); }
76+
inline __attribute__((target("default"))) void pr50025c(void) { pr50025b(); }
77+
void calls_pr50025c() { pr50025c(); }
78+
6979
// LINUX: @llvm.compiler.used = appending global [2 x i8*] [i8* bitcast (void (i32, double)* @foo_used to i8*), i8* bitcast (void (i32, double)* @foo_used2.avx_sse4.2 to i8*)], section "llvm.metadata"
7080
// WINDOWS: @llvm.used = appending global [2 x i8*] [i8* bitcast (void (i32, double)* @foo_used to i8*), i8* bitcast (void (i32, double)* @foo_used2.avx_sse4.2 to i8*)], section "llvm.metadata"
7181

@@ -300,3 +310,16 @@ __attribute__((target("avx,sse4.2"), used)) inline void foo_used2(int i, double
300310
// WINDOWS: define linkonce_odr dso_local void @foo_multi.avx_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
301311
// WINDOWS: define linkonce_odr dso_local void @foo_multi.fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
302312
// WINDOWS: define linkonce_odr dso_local void @foo_multi.arch_ivybridge_fma4_sse4.2(i32 %{{[^,]+}}, double %{{[^\)]+}})
313+
314+
// Ensure that we emit the 'static' function here.
315+
// LINUX: define linkonce void @pr50025()
316+
// LINUX: call void @must_be_emitted
317+
// LINUX: define internal void @must_be_emitted()
318+
// WINDOWS: define linkonce_odr dso_local void @pr50025()
319+
// WINDOWS: call void @must_be_emitted
320+
// WINDOWS: define internal void @must_be_emitted()
321+
322+
// LINUX: define linkonce void @pr50025c()
323+
// LINUX: define linkonce void @pr50025b()
324+
// WINDOWS: define linkonce_odr dso_local void @pr50025c()
325+
// WINDOWS: define linkonce_odr dso_local void @pr50025b()

0 commit comments

Comments
 (0)