diff --git a/clang/test/CodeGen/target-builtin-noerror.c b/clang/test/CodeGen/target-builtin-noerror.c index 505f4a3e94565..06bb40a2b71ea 100644 --- a/clang/test/CodeGen/target-builtin-noerror.c +++ b/clang/test/CodeGen/target-builtin-noerror.c @@ -82,6 +82,8 @@ void verifyfeaturestrings(void) { (void)__builtin_cpu_supports("avx512bitalg"); (void)__builtin_cpu_supports("avx512bf16"); (void)__builtin_cpu_supports("avx512vp2intersect"); + (void)__builtin_cpu_supports("f16c"); + (void)__builtin_cpu_supports("avx512fp16"); } void verifycpustrings(void) { diff --git a/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp b/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp index ef2498bd7e14c..183eb4fb6ac61 100644 --- a/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp +++ b/clang/test/CodeGenCXX/attr-cpuspecific-outoflinedefs.cpp @@ -80,8 +80,8 @@ OutOfLineDefs::foo(int, int, int) { // LINUX: define dso_local noundef i32 @_ZN13OutOfLineDefs3fooEiii.S // LINUX: define dso_local noundef i32 @_ZN13OutOfLineDefs3fooEiii.R // LINUX: define weak_odr ptr @_ZN13OutOfLineDefs3fooEiii.resolver() -// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.R // LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.S +// LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.R // LINUX: ret ptr @_ZN13OutOfLineDefs3fooEiii.O // LINUX: call void @llvm.trap // LINUX: define linkonce_odr noundef i32 @_ZN13OutOfLineDefs3fooEiii.O @@ -89,8 +89,8 @@ OutOfLineDefs::foo(int, int, int) { // WINDOWS: define dso_local noundef i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.S" // WINDOWS: define dso_local noundef i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.R" // WINDOWS: define weak_odr dso_local i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z"(ptr %0, i32 %1, i32 %2, i32 %3) -// WINDOWS: musttail call i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.R"(ptr %0, i32 %1, i32 %2, i32 %3) // WINDOWS: musttail call i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.S"(ptr %0, i32 %1, i32 %2, i32 %3) +// WINDOWS: musttail call i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.R"(ptr %0, i32 %1, i32 %2, i32 %3) // WINDOWS: musttail call i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.O"(ptr %0, i32 %1, i32 %2, i32 %3) // WINDOWS: call void @llvm.trap // WINDOWS: define linkonce_odr dso_local noundef i32 @"?foo@OutOfLineDefs@@QEAAHHHH@Z.O" diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index 9d9a5d3f1542c..0750e29f989a8 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -148,7 +148,8 @@ enum ProcessorFeatures { FEATURE_LZCNT, FEATURE_MOVBE, - FEATURE_X86_64_BASELINE = 95, + FEATURE_AVX512FP16 = 94, + FEATURE_X86_64_BASELINE, FEATURE_X86_64_V2, FEATURE_X86_64_V3, FEATURE_X86_64_V4, @@ -812,6 +813,8 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, setFeature(FEATURE_AVX5124FMAPS); if (HasLeaf7 && ((EDX >> 8) & 1) && HasAVX512Save) setFeature(FEATURE_AVX512VP2INTERSECT); + if (HasLeaf7 && ((EDX >> 23) & 1) && HasAVX512Save) + setFeature(FEATURE_AVX512FP16); // EAX from subleaf 0 is the maximum subleaf supported. Some CPUs don't // return all 0s for invalid subleaves so check the limit. diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index b58feafe4e8c2..43162f2b52eba 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -122,6 +122,7 @@ X86_CPU_SUBTYPE_ALIAS(INTEL_COREI7_ALDERLAKE, "gracemont") // // We cannot just re-sort the list though because its order is dictated by the // order of bits in CodeGenFunction::GetX86CpuSupportsMask. +// We cannot re-adjust the position of X86_FEATURE_COMPAT at the whole list. #ifndef X86_FEATURE_COMPAT #define X86_FEATURE_COMPAT(ENUM, STR, PRIORITY) X86_FEATURE(ENUM, STR) #endif @@ -184,12 +185,12 @@ X86_FEATURE (AMX_TILE, "amx-tile") X86_FEATURE (CLDEMOTE, "cldemote") X86_FEATURE (CLFLUSHOPT, "clflushopt") X86_FEATURE (CLWB, "clwb") +X86_FEATURE_COMPAT(F16C, "f16c", 38) X86_FEATURE (CLZERO, "clzero") X86_FEATURE (CMPXCHG16B, "cx16") X86_FEATURE (CMPXCHG8B, "cx8") X86_FEATURE (CRC32, "crc32") X86_FEATURE (ENQCMD, "enqcmd") -X86_FEATURE (F16C, "f16c") X86_FEATURE (FSGSBASE, "fsgsbase") X86_FEATURE (FXSR, "fxsr") X86_FEATURE (INVPCID, "invpcid") @@ -229,9 +230,9 @@ X86_FEATURE (XSAVE, "xsave") X86_FEATURE (XSAVEC, "xsavec") X86_FEATURE (XSAVEOPT, "xsaveopt") X86_FEATURE (XSAVES, "xsaves") +X86_FEATURE_COMPAT(AVX512FP16, "avx512fp16", 39) X86_FEATURE (HRESET, "hreset") X86_FEATURE (RAOINT, "raoint") -X86_FEATURE (AVX512FP16, "avx512fp16") X86_FEATURE (AMX_FP16, "amx-fp16") X86_FEATURE (CMPCCXADD, "cmpccxadd") X86_FEATURE (AVXNECONVERT, "avxneconvert") diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index d46ff07ec7340..518fb9d892164 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -347,7 +347,7 @@ constexpr ProcInfo Processors[] = { // Tigerlake microarchitecture based processors. { {"tigerlake"}, CK_Tigerlake, FEATURE_AVX512VP2INTERSECT, FeaturesTigerlake, 'l', false }, // Sapphire Rapids microarchitecture based processors. - { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false }, + { {"sapphirerapids"}, CK_SapphireRapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false }, // Alderlake microarchitecture based processors. { {"alderlake"}, CK_Alderlake, FEATURE_AVX2, FeaturesAlderlake, 'p', false }, // Raptorlake microarchitecture based processors. @@ -369,12 +369,12 @@ constexpr ProcInfo Processors[] = { // Grandridge microarchitecture based processors. { {"grandridge"}, CK_Grandridge, FEATURE_AVX2, FeaturesSierraforest, 'p', false }, // Granite Rapids microarchitecture based processors. - { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512BF16, FeaturesGraniteRapids, 'n', false }, + { {"graniterapids"}, CK_Graniterapids, FEATURE_AVX512FP16, FeaturesGraniteRapids, 'n', false }, // Granite Rapids D microarchitecture based processors. - { {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false }, - { {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512BF16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true }, + { {"graniterapids-d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, '\0', false }, + { {"graniterapids_d"}, CK_GraniterapidsD, FEATURE_AVX512FP16, FeaturesGraniteRapids | FeatureAMX_COMPLEX, 'n', true }, // Emerald Rapids microarchitecture based processors. - { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512BF16, FeaturesSapphireRapids, 'n', false }, + { {"emeraldrapids"}, CK_Emeraldrapids, FEATURE_AVX512FP16, FeaturesSapphireRapids, 'n', false }, // Clearwaterforest microarchitecture based processors. { {"clearwaterforest"}, CK_Lunarlake, FEATURE_AVX2, FeaturesClearwaterforest, 'p', false }, // Knights Landing processor.