diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 28c211aa631e4..3f6e171c67345 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5437,6 +5437,74 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &ConcreteCallee = Callee.prepareConcreteCallee(*this); llvm::Value *CalleePtr = ConcreteCallee.getFunctionPointer(); + // If a multi-versioned caller calls a multi-versioned callee, skip the + // resolver when there is a precise match on the feature sets, and no + // possibility of a better match at runtime. + if (const auto *CallerFD = dyn_cast_or_null(CurGD.getDecl())) + if (CGM.getCodeGenOpts().OptimizationLevel > 0 && + !CallerFD->hasAttr()) + if (const auto *CallerTVA = CallerFD->getAttr()) + if (const FunctionDecl *FD = dyn_cast_or_null(TargetDecl)) + // FIXME: do the same where either the caller or callee are + // target_clones. + if (FD->isTargetMultiVersion()) { + llvm::SmallVector CallerFeats; + CallerTVA->getFeatures(CallerFeats); + MultiVersionResolverOption CallerMVRO(nullptr, "", CallerFeats); + + bool HasHigherPriorityCallee = false; + llvm::Constant *FoundMatchingCallee = nullptr; + getContext().forEachMultiversionedFunctionVersion( + FD, [this, FD, &CallerMVRO, &HasHigherPriorityCallee, + &FoundMatchingCallee](const FunctionDecl *CurFD) { + const auto *CalleeTVA = CurFD->getAttr(); + + GlobalDecl CurGD{ + (CurFD->isDefined() ? CurFD->getDefinition() : CurFD)}; + StringRef MangledName = CGM.getMangledName(CurFD); + + llvm::SmallVector CalleeFeats; + CalleeTVA->getFeatures(CalleeFeats); + MultiVersionResolverOption CalleeMVRO(nullptr, "", + CalleeFeats); + + const TargetInfo &TI = getTarget(); + + // If there is a higher priority callee, we can't do the + // optimization at all, as it would be a valid choice at + // runtime. + if (CalleeMVRO.priority(TI) > CallerMVRO.priority(TI)) { + HasHigherPriorityCallee = true; + return; + } + + // FIXME: we could allow a lower-priority match when the + // features are a proper subset. But for now, to keep things + // simpler, we only care about a precise match. + if (CalleeMVRO.priority(TI) < CallerMVRO.priority(TI)) + return; + + if (llvm::Constant *Func = CGM.GetGlobalValue(MangledName)) { + FoundMatchingCallee = Func; + return; + } + + if (CurFD->isDefined()) { + // FIXME: not sure how to get the address + } else { + const CGFunctionInfo &FI = + getTypes().arrangeGlobalDeclaration(FD); + llvm::FunctionType *Ty = getTypes().GetFunctionType(FI); + FoundMatchingCallee = CGM.GetAddrOfFunction( + CurGD, Ty, /*ForVTable=*/false, + /*DontDefer=*/false, ForDefinition); + } + }); + + if (FoundMatchingCallee && !HasHigherPriorityCallee) + CalleePtr = FoundMatchingCallee; + } + // If we're using inalloca, set up that argument. if (ArgMemory.isValid()) { llvm::Value *Arg = ArgMemory.getPointer(); diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 1ad905078d349..f2c93b5e5398b 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -2697,6 +2697,24 @@ void CodeGenFunction::EmitSanitizerStatReport(llvm::SanitizerStatKind SSK) { CGM.getSanStats().create(IRB, SSK); } +unsigned CodeGenFunction::MultiVersionResolverOption::priority( + const TargetInfo &TI) const { + unsigned Priority = 0; + unsigned NumFeatures = 0; + for (StringRef Feat : Conditions.Features) { + Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); + NumFeatures++; + } + + if (!Conditions.Architecture.empty()) + Priority = std::max(Priority, + TI.multiVersionSortPriority(Conditions.Architecture)); + + Priority += TI.multiVersionFeatureCost() * NumFeatures; + + return Priority; +} + void CodeGenFunction::EmitKCFIOperandBundle( const CGCallee &Callee, SmallVectorImpl &Bundles) { const FunctionProtoType *FP = diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 143ad64e8816b..525852437dbb8 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -4965,6 +4965,8 @@ class CodeGenFunction : public CodeGenTypeCache { MultiVersionResolverOption(llvm::Function *F, StringRef Arch, ArrayRef Feats) : Function(F), Conditions(Arch, Feats) {} + + unsigned priority(const TargetInfo &TI) const; }; // Emits the body of a multiversion function's resolver. Assumes that the diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 6ec54cc01c923..d6abd4cc9454d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -4092,25 +4092,6 @@ void CodeGenModule::EmitGlobalDefinition(GlobalDecl GD, llvm::GlobalValue *GV) { static void ReplaceUsesOfNonProtoTypeWithRealFunction(llvm::GlobalValue *Old, llvm::Function *NewFn); -static unsigned -TargetMVPriority(const TargetInfo &TI, - const CodeGenFunction::MultiVersionResolverOption &RO) { - unsigned Priority = 0; - unsigned NumFeatures = 0; - for (StringRef Feat : RO.Conditions.Features) { - Priority = std::max(Priority, TI.multiVersionSortPriority(Feat)); - NumFeatures++; - } - - if (!RO.Conditions.Architecture.empty()) - Priority = std::max( - Priority, TI.multiVersionSortPriority(RO.Conditions.Architecture)); - - Priority += TI.multiVersionFeatureCost() * NumFeatures; - - return Priority; -} - // Multiversion functions should be at most 'WeakODRLinkage' so that a different // TU can forward declare the function without causing problems. Particularly // in the cases of CPUDispatch, this causes issues. This also makes sure we @@ -4244,7 +4225,7 @@ void CodeGenModule::emitMultiVersionFunctions() { llvm::stable_sort( Options, [&TI](const CodeGenFunction::MultiVersionResolverOption &LHS, const CodeGenFunction::MultiVersionResolverOption &RHS) { - return TargetMVPriority(TI, LHS) > TargetMVPriority(TI, RHS); + return LHS.priority(TI) > RHS.priority(TI); }); CodeGenFunction CGF(*this); CGF.EmitMultiVersionResolver(ResolverFunc, Options); diff --git a/clang/test/CodeGen/attr-target-mv-direct-call.c b/clang/test/CodeGen/attr-target-mv-direct-call.c new file mode 100644 index 0000000000000..c856ae0d4f8cb --- /dev/null +++ b/clang/test/CodeGen/attr-target-mv-direct-call.c @@ -0,0 +1,57 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O0 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O0 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -O2 -S -emit-llvm -disable-llvm-optzns -o - %s | FileCheck %s --check-prefixes=CHECK,O2 + + +// Check that we make a direct call from direct_caller._Msimd to +// direct_callee._Msimd when there is no better option. +__attribute__((target_version("simd"))) int direct_callee(void) { return 1; } +__attribute__((target_version("default"))) int direct_callee(void) { return 2; } +__attribute__((target_version("simd"))) int direct_caller(void) { return direct_callee(); } +__attribute__((target_version("default"))) int direct_caller(void) { return direct_callee(); } +// O0-LABEL: @direct_caller._Msimd( +// O0: = call i32 @direct_callee.ifunc() +// O2-LABEL: @direct_caller._Msimd( +// O2: = call i32 @direct_callee._Msimd() + + +__attribute__((target_version("simd"), optnone)) int optnone_caller(void) { return direct_callee(); } +__attribute__((target_version("default"), optnone)) int optnone_caller(void) { return direct_callee(); } +// CHECK-LABEL: @optnone_caller._Msimd( +// CHECK: = call i32 @direct_callee.ifunc() + + +// ... and that we go through the ifunc+resolver when there is a better option +// that might be chosen at runtime. +__attribute__((target_version("simd"))) int resolved_callee1(void) { return 3; } +__attribute__((target_version("fcma"))) int resolved_callee1(void) { return 4; } +__attribute__((target_version("default"))) int resolved_callee1(void) { return 5; } +__attribute__((target_version("simd"))) int resolved_caller1(void) { return resolved_callee1(); } +__attribute__((target_version("default"))) int resolved_caller1(void) { return resolved_callee1(); } +// CHECK-LABEL: @resolved_caller1._Msimd( +// CHECK: = call i32 @resolved_callee1.ifunc() + + +// FIXME: we could direct call in cases like this: +__attribute__((target_version("fp"))) int resolved_callee2(void) { return 6; } +__attribute__((target_version("default"))) int resolved_callee2(void) { return 7; } +__attribute__((target_version("simd+fp"))) int resolved_caller2(void) { return resolved_callee2(); } +__attribute__((target_version("default"))) int resolved_caller2(void) { return resolved_callee2(); } +// CHECK-LABEL: @resolved_caller2._MfpMsimd( +// CHECK: = call i32 @resolved_callee2.ifunc() + + +// CHECK: @direct_caller.default( +// CHECK = call i32 @direct_callee.ifunc() +// CHECK-LABEL: @optnone_caller.default( +// CHECK: = call i32 @direct_callee.ifunc() +// CHECK-LABEL: @resolved_caller1.default( +// CHECK: = call i32 @resolved_callee1.ifunc() +// CHECK-LABEL: @resolved_caller2.default( +// CHECK: = call i32 @resolved_callee2.ifunc() + +int source() { + return direct_caller() + + optnone_caller() + + resolved_caller1() + + resolved_caller2(); +}