diff --git a/clang/include/clang/Basic/TargetInfo.h b/clang/include/clang/Basic/TargetInfo.h index 34884f621e770..94da7ac593a07 100644 --- a/clang/include/clang/Basic/TargetInfo.h +++ b/clang/include/clang/Basic/TargetInfo.h @@ -1375,6 +1375,13 @@ class TargetInfo : public virtual TransferrableTargetInfo, "cpu_specific Multiversioning not implemented on this target"); } + // Get the value for the 'tune-cpu' flag for a cpu_specific variant with the + // programmer-specified 'Name'. + virtual StringRef getCPUSpecificTuneName(StringRef Name) const { + llvm_unreachable( + "cpu_specific Multiversioning not implemented on this target"); + } + // Get a list of the features that make up the CPU option for // cpu_specific/cpu_dispatch so that it can be passed to llvm as optimization // options. diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp index 5c4bd364b06a3..1ec2bb9c249f0 100644 --- a/clang/lib/Basic/Targets/X86.cpp +++ b/clang/lib/Basic/Targets/X86.cpp @@ -1095,22 +1095,22 @@ unsigned X86TargetInfo::multiVersionSortPriority(StringRef Name) const { bool X86TargetInfo::validateCPUSpecificCPUDispatch(StringRef Name) const { return llvm::StringSwitch(Name) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, true) -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, true) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, true) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, true) #include "llvm/Support/X86TargetParser.def" .Default(false); } static StringRef CPUSpecificCPUDispatchNameDealias(StringRef Name) { return llvm::StringSwitch(Name) -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) .Case(NEW_NAME, NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, NAME) #include "llvm/Support/X86TargetParser.def" .Default(Name); } char X86TargetInfo::CPUSpecificManglingCharacter(StringRef Name) const { return llvm::StringSwitch(CPUSpecificCPUDispatchNameDealias(Name)) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, MANGLING) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, MANGLING) #include "llvm/Support/X86TargetParser.def" .Default(0); } @@ -1119,12 +1119,20 @@ void X86TargetInfo::getCPUSpecificCPUDispatchFeatures( StringRef Name, llvm::SmallVectorImpl &Features) const { StringRef WholeList = llvm::StringSwitch(CPUSpecificCPUDispatchNameDealias(Name)) -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) .Case(NAME, FEATURES) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, FEATURES) #include "llvm/Support/X86TargetParser.def" .Default(""); WholeList.split(Features, ',', /*MaxSplit=*/-1, /*KeepEmpty=*/false); } +StringRef X86TargetInfo::getCPUSpecificTuneName(StringRef Name) const { + return llvm::StringSwitch(Name) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) .Case(NAME, TUNE_NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) .Case(NEW_NAME, TUNE_NAME) +#include "llvm/Support/X86TargetParser.def" + .Default(""); +} + // We can't use a generic validation scheme for the cpus accepted here // versus subtarget cpus accepted in the target attribute because the // variables intitialized by the runtime only support the below currently diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h index e0bb3c344c5b6..6b49b0f28bfc4 100644 --- a/clang/lib/Basic/Targets/X86.h +++ b/clang/lib/Basic/Targets/X86.h @@ -201,6 +201,8 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo { StringRef Name, llvm::SmallVectorImpl &Features) const override; + StringRef getCPUSpecificTuneName(StringRef Name) const override; + Optional getCPUCacheLineSize() const override; bool validateAsmConstraint(const char *&Name, diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 9c76648f5f193..ad52c0df9b7ed 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -2060,6 +2060,13 @@ bool CodeGenModule::GetCPUAndFeaturesAttributes(GlobalDecl GD, getTarget().isValidCPUName(ParsedAttr.Tune)) TuneCPU = ParsedAttr.Tune; } + + if (SD) { + // Apply the given CPU name as the 'tune-cpu' so that the optimizer can + // favor this processor. + TuneCPU = getTarget().getCPUSpecificTuneName( + SD->getCPUName(GD.getMultiVersionIndex())->getName()); + } } else { // Otherwise just add the existing target cpu and target features to the // function. diff --git a/clang/test/CodeGen/attr-cpuspecific-avx-abi.c b/clang/test/CodeGen/attr-cpuspecific-avx-abi.c index ad9c82b5dbc3b..9089035a70995 100644 --- a/clang/test/CodeGen/attr-cpuspecific-avx-abi.c +++ b/clang/test/CodeGen/attr-cpuspecific-avx-abi.c @@ -23,4 +23,6 @@ __m256d foo(void) { return bar_avx2(); } // CHECK: define{{.*}} @foo.V() #[[V:[0-9]+]] // CHECK: attributes #[[A]] = {{.*}}"target-features"="+avx,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="generic" // CHECK: attributes #[[V]] = {{.*}}"target-features"="+avx,+avx2,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="haswell" diff --git a/clang/test/CodeGen/attr-cpuspecific.c b/clang/test/CodeGen/attr-cpuspecific.c index 15ecd77de2a64..90fb1307523ec 100644 --- a/clang/test/CodeGen/attr-cpuspecific.c +++ b/clang/test/CodeGen/attr-cpuspecific.c @@ -340,5 +340,8 @@ ATTR(cpu_specific(knl)) void OrderDispatchUsageSpecific(void) {} // CHECK: attributes #[[S]] = {{.*}}"target-features"="+avx,+cmov,+crc32,+cx8,+f16c,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="ivybridge" // CHECK: attributes #[[K]] = {{.*}}"target-features"="+adx,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+cmov,+crc32,+cx8,+f16c,+fma,+lzcnt,+mmx,+movbe,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" +// CHECK-SAME: "tune-cpu"="knl" // CHECK: attributes #[[O]] = {{.*}}"target-features"="+cmov,+cx8,+mmx,+movbe,+sse,+sse2,+sse3,+ssse3,+x87" +// CHECK-SAME: "tune-cpu"="atom" diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def index 4443d822d3e8c..58fa3b3842e7a 100644 --- a/llvm/include/llvm/Support/X86TargetParser.def +++ b/llvm/include/llvm/Support/X86TargetParser.def @@ -211,47 +211,47 @@ X86_FEATURE (LVI_LOAD_HARDENING, "lvi-load-hardening") #undef X86_FEATURE #ifndef CPU_SPECIFIC -#define CPU_SPECIFIC(NAME, MANGLING, FEATURES) +#define CPU_SPECIFIC(NAME, TUNE_NAME, MANGLING, FEATURES) #endif #ifndef CPU_SPECIFIC_ALIAS -#define CPU_SPECIFIC_ALIAS(NEW_NAME, NAME) +#define CPU_SPECIFIC_ALIAS(NEW_NAME, TUNE_NAME, NAME) #endif -CPU_SPECIFIC("generic", 'A', "") -CPU_SPECIFIC("pentium", 'B', "") -CPU_SPECIFIC("pentium_pro", 'C', "+cmov") -CPU_SPECIFIC("pentium_mmx", 'D', "+mmx") -CPU_SPECIFIC("pentium_ii", 'E', "+cmov,+mmx") -CPU_SPECIFIC("pentium_iii", 'H', "+cmov,+mmx,+sse") -CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium_iii") -CPU_SPECIFIC("pentium_4", 'J', "+cmov,+mmx,+sse,+sse2") -CPU_SPECIFIC("pentium_m", 'K', "+cmov,+mmx,+sse,+sse2") -CPU_SPECIFIC("pentium_4_sse3", 'L', "+cmov,+mmx,+sse,+sse2,+sse3") -CPU_SPECIFIC("core_2_duo_ssse3", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3") -CPU_SPECIFIC("core_2_duo_sse4_1", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1") -CPU_SPECIFIC("atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe") -CPU_SPECIFIC("atom_sse4_2", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("core_i7_sse4_2", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("core_aes_pclmulqdq", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") -CPU_SPECIFIC("atom_sse4_2_movbe", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") -CPU_SPECIFIC("goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") -CPU_SPECIFIC("sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx") -CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge") -CPU_SPECIFIC("ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx") -CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge") -CPU_SPECIFIC("haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") -CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell") -CPU_SPECIFIC("core_4th_gen_avx_tsx", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") -CPU_SPECIFIC("broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") -CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell") -CPU_SPECIFIC("core_5th_gen_avx_tsx", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") -CPU_SPECIFIC("knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd") -CPU_SPECIFIC_ALIAS("mic_avx512", "knl") -CPU_SPECIFIC("skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx") -CPU_SPECIFIC( "skylake_avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb") -CPU_SPECIFIC("cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi") -CPU_SPECIFIC("knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq") +CPU_SPECIFIC("generic", "generic", 'A', "") +CPU_SPECIFIC("pentium", "pentium", 'B', "") +CPU_SPECIFIC("pentium_pro", "pentiumpro", 'C', "+cmov") +CPU_SPECIFIC("pentium_mmx", "pentium-mmx", 'D', "+mmx") +CPU_SPECIFIC("pentium_ii", "pentium2", 'E', "+cmov,+mmx") +CPU_SPECIFIC("pentium_iii", "pentium3", 'H', "+cmov,+mmx,+sse") +CPU_SPECIFIC_ALIAS("pentium_iii_no_xmm_regs", "pentium3", "pentium_iii") +CPU_SPECIFIC("pentium_4", "pentium4", 'J', "+cmov,+mmx,+sse,+sse2") +CPU_SPECIFIC("pentium_m", "pentium-m", 'K', "+cmov,+mmx,+sse,+sse2") +CPU_SPECIFIC("pentium_4_sse3", "prescott", 'L', "+cmov,+mmx,+sse,+sse2,+sse3") +CPU_SPECIFIC("core_2_duo_ssse3", "core2", 'M', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3") +CPU_SPECIFIC("core_2_duo_sse4_1", "penryn", 'N', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1") +CPU_SPECIFIC("atom", "atom", 'O', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+movbe") +CPU_SPECIFIC("atom_sse4_2", "silvermont", 'c', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("core_i7_sse4_2", "nehalem", 'P', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("core_aes_pclmulqdq", "westmere", 'Q', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt") +CPU_SPECIFIC("atom_sse4_2_movbe", "silvermont", 'd', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") +CPU_SPECIFIC("goldmont", "goldmont", 'i', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt") +CPU_SPECIFIC("sandybridge", "sandybridge", 'R', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+avx") +CPU_SPECIFIC_ALIAS("core_2nd_gen_avx", "sandybridge", "sandybridge") +CPU_SPECIFIC("ivybridge", "ivybridge", 'S', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+popcnt,+f16c,+avx") +CPU_SPECIFIC_ALIAS("core_3rd_gen_avx", "ivybridge", "ivybridge") +CPU_SPECIFIC("haswell", "haswell", 'V', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") +CPU_SPECIFIC_ALIAS("core_4th_gen_avx", "haswell", "haswell") +CPU_SPECIFIC("core_4th_gen_avx_tsx", "haswell", 'W', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2") +CPU_SPECIFIC("broadwell", "broadwell", 'X', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") +CPU_SPECIFIC_ALIAS("core_5th_gen_avx", "broadwell", "broadwell") +CPU_SPECIFIC("core_5th_gen_avx_tsx", "broadwell", 'Y', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx") +CPU_SPECIFIC("knl", "knl", 'Z', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd") +CPU_SPECIFIC_ALIAS("mic_avx512", "knl", "knl") +CPU_SPECIFIC("skylake", "skylake", 'b', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+adx,+mpx") +CPU_SPECIFIC( "skylake_avx512", "skylake-avx512", 'a', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512cd,+avx512bw,+avx512vl,+clwb") +CPU_SPECIFIC("cannonlake", "cannonlake", 'e', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512dq,+avx512f,+adx,+avx512ifma,+avx512cd,+avx512bw,+avx512vl,+avx512vbmi") +CPU_SPECIFIC("knm", "knm", 'j', "+cmov,+mmx,+sse,+sse2,+sse3,+ssse3,+sse4.1,+sse4.2,+movbe,+popcnt,+f16c,+avx,+fma,+bmi,+lzcnt,+avx2,+avx512f,+adx,+avx512er,+avx512pf,+avx512cd,+avx5124fmaps,+avx5124vnniw,+avx512vpopcntdq") #undef CPU_SPECIFIC_ALIAS #undef CPU_SPECIFIC