diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h index 45700c635831d..432747ba1f6ad 100644 --- a/clang/lib/Headers/cpuid.h +++ b/clang/lib/Headers/cpuid.h @@ -253,10 +253,6 @@ #define bit_RDPRU 0x00000010 #define bit_WBNOINVD 0x00000200 -/* Features in %ebx for leaf 0x24 */ -#define bit_AVX10_256 0x00020000 -#define bit_AVX10_512 0x00040000 - #ifdef __i386__ #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \ __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \ diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c index d89e386061702..474fa93629d89 100644 --- a/clang/test/CodeGen/attr-target-x86.c +++ b/clang/test/CodeGen/attr-target-x86.c @@ -33,7 +33,7 @@ __attribute__((target("fpmath=387"))) void f_fpmath_387(void) {} // CHECK-NOT: tune-cpu -// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" +// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686" __attribute__((target("no-sse2"))) void f_no_sse2(void) {} @@ -41,7 +41,7 @@ void f_no_sse2(void) {} __attribute__((target("sse4"))) void f_sse4(void) {} -// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" +// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686" __attribute__((target("no-sse4"))) void f_no_sse4(void) {} diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c index 45b7055abf454..b4b60986022d4 100644 --- a/compiler-rt/lib/builtins/cpu_model/x86.c +++ b/compiler-rt/lib/builtins/cpu_model/x86.c @@ -229,10 +229,8 @@ enum ProcessorFeatures { FEATURE_SM4, FEATURE_APXF, FEATURE_USERMSR, - FEATURE_AVX10_1_256, - FEATURE_AVX10_1_512, - FEATURE_AVX10_2_256, - FEATURE_AVX10_2_512, + FEATURE_AVX10_1 = 114, + FEATURE_AVX10_2 = 116, FEATURE_MOVRS, CPU_FEATURE_MAX }; @@ -1093,18 +1091,11 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf, bool HasLeaf24 = MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX); if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24) { - bool Has512Len = (EBX >> 18) & 1; int AVX10Ver = EBX & 0xff; - if (AVX10Ver >= 2) { - setFeature(FEATURE_AVX10_2_256); - if (Has512Len) - setFeature(FEATURE_AVX10_2_512); - } - if (AVX10Ver >= 1) { - setFeature(FEATURE_AVX10_1_256); - if (Has512Len) - setFeature(FEATURE_AVX10_1_512); - } + if (AVX10Ver >= 1) + setFeature(FEATURE_AVX10_1); + if (AVX10Ver >= 2) + setFeature(FEATURE_AVX10_2); } unsigned MaxExtLevel = 0; diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def index 78cf46406192e..826752b088bcd 100644 --- a/llvm/include/llvm/TargetParser/X86TargetParser.def +++ b/llvm/include/llvm/TargetParser/X86TargetParser.def @@ -261,9 +261,9 @@ X86_FEATURE_COMPAT(SM4, "sm4", 0) X86_FEATURE (EGPR, "egpr") X86_FEATURE_COMPAT(USERMSR, "usermsr", 0) X86_FEATURE_COMPAT(AVX10_1, "avx10.1", 36) -X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37) -X86_FEATURE_COMPAT(AVX10_2, "avx10.2", 0) -X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0) +X86_FEATURE (DUMMYFEATURE3, "__dummyfeature3") +X86_FEATURE_COMPAT(AVX10_2, "avx10.2", 37) +X86_FEATURE (DUMMYFEATURE4, "__dummyfeature4") //FIXME: make MOVRS _COMPAT defined when gcc landed relate patch. X86_FEATURE (MOVRS, "movrs") X86_FEATURE (ZU, "zu") diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp index 37e8ad986aa55..293cc42ab81c1 100644 --- a/llvm/lib/TargetParser/X86TargetParser.cpp +++ b/llvm/lib/TargetParser/X86TargetParser.cpp @@ -544,6 +544,8 @@ constexpr FeatureBitset ImpliedFeaturesX87 = {}; constexpr FeatureBitset ImpliedFeaturesXSAVE = {}; constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {}; constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {}; +constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE3 = {}; +constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE4 = {}; // Not really CPU features, but need to be in the table because clang uses // target features to communicate them to the backend. @@ -644,8 +646,6 @@ constexpr FeatureBitset ImpliedFeaturesAVX10_1 = FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 | FeatureAVX512DQ | FeatureAVX512VL; constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1; -constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1; -constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 = FeatureAVX10_2; // APX Features constexpr FeatureBitset ImpliedFeaturesEGPR = {}; diff --git a/llvm/test/CodeGen/X86/llc-accept-avx10-512.ll b/llvm/test/CodeGen/X86/llc-accept-avx10-512.ll new file mode 100644 index 0000000000000..b5c9895fefd98 --- /dev/null +++ b/llvm/test/CodeGen/X86/llc-accept-avx10-512.ll @@ -0,0 +1,97 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 + +; avx10.x-512 is just avx10.x -- 512 is kept for compatibility purposes. + +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 2>&1 | FileCheck --check-prefixes=CHECK-AVX10_1 %s + +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 2>&1 | FileCheck --check-prefixes=CHECK-AVX10_2 %s + +; CHECK-AVX10_1-NOT: is not recognizable +; CHECK-AVX10_2-NOT: is not recognizable + +define <32 x bfloat> @foo_avx10.1(<16 x float> %a, <16 x float> %b) { +; CHECK-AVX10_1-LABEL: foo_avx10.1: +; CHECK-AVX10_1: # %bb.0: +; CHECK-AVX10_1-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 +; CHECK-AVX10_1-NEXT: retq +; +; CHECK-AVX10_2-LABEL: foo_avx10.1: +; CHECK-AVX10_2: # %bb.0: +; CHECK-AVX10_2-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0 +; CHECK-AVX10_2-NEXT: retq + %ret = call <32 x bfloat> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a, <16 x float> %b) + ret <32 x bfloat> %ret +} + +define <8 x i32> @foo_avx10.2(<8 x double> %f) { +; CHECK-AVX10_1-LABEL: foo_avx10.2: +; CHECK-AVX10_1: # %bb.0: +; CHECK-AVX10_1-NEXT: vextractf32x4 $2, %zmm0, %xmm1 +; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0] +; CHECK-AVX10_1-NEXT: vmovsd {{.*#+}} xmm3 = [-2.147483648E+9,0.0E+0] +; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4 +; CHECK-AVX10_1-NEXT: vmovsd {{.*#+}} xmm5 = [2.147483647E+9,0.0E+0] +; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4 +; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx +; CHECK-AVX10_1-NEXT: xorl %eax, %eax +; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2 +; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx +; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm1, %xmm2 +; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm2, %xmm2 +; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm2, %edx +; CHECK-AVX10_1-NEXT: vucomisd %xmm1, %xmm1 +; CHECK-AVX10_1-NEXT: cmovpl %eax, %edx +; CHECK-AVX10_1-NEXT: vmovd %edx, %xmm1 +; CHECK-AVX10_1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 +; CHECK-AVX10_1-NEXT: vextractf32x4 $3, %zmm0, %xmm2 +; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4 +; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4 +; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx +; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2 +; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx +; CHECK-AVX10_1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 +; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0] +; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4 +; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4 +; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx +; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2 +; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx +; CHECK-AVX10_1-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1 +; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0] +; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4 +; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4 +; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx +; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2 +; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx +; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm2 +; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm2, %xmm2 +; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm2, %edx +; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0 +; CHECK-AVX10_1-NEXT: cmovpl %eax, %edx +; CHECK-AVX10_1-NEXT: vmovd %edx, %xmm2 +; CHECK-AVX10_1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2 +; CHECK-AVX10_1-NEXT: vextractf128 $1, %ymm0, %xmm0 +; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm4 +; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4 +; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx +; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0 +; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx +; CHECK-AVX10_1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2 +; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0] +; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm3 +; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm3, %xmm3 +; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm3, %ecx +; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0 +; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx +; CHECK-AVX10_1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0 +; CHECK-AVX10_1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; CHECK-AVX10_1-NEXT: retq +; +; CHECK-AVX10_2-LABEL: foo_avx10.2: +; CHECK-AVX10_2: # %bb.0: +; CHECK-AVX10_2-NEXT: vcvttpd2dqs %zmm0, %ymm0 +; CHECK-AVX10_2-NEXT: retq + %x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f) + ret <8 x i32> %x +} +