Skip to content

Conversation

@mikolaj-pirog
Copy link
Member

As in title. AVX10.x doesn't distinguish between available vector lengths.

Bit-positions of avx10.1/2 features in compiler-rt and X86TargetParser are synced to match those in the gcc

@llvmbot llvmbot added clang Clang issues not falling into any other category compiler-rt backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics compiler-rt:builtins labels Nov 12, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 12, 2025

@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-x86

Author: Mikołaj Piróg (mikolaj-pirog)

Changes

As in title. AVX10.x doesn't distinguish between available vector lengths.

Bit-positions of avx10.1/2 features in compiler-rt and X86TargetParser are synced to match those in the gcc


Full diff: https://github.com/llvm/llvm-project/pull/167736.diff

5 Files Affected:

  • (modified) clang/lib/Headers/cpuid.h (-4)
  • (modified) clang/test/CodeGen/attr-target-x86.c (+2-2)
  • (modified) compiler-rt/lib/builtins/cpu_model/x86.c (+6-15)
  • (modified) llvm/include/llvm/TargetParser/X86TargetParser.def (+3-3)
  • (modified) llvm/lib/TargetParser/X86TargetParser.cpp (+2-2)
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 45700c635831d..432747ba1f6ad 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -253,10 +253,6 @@
 #define bit_RDPRU       0x00000010
 #define bit_WBNOINVD    0x00000200
 
-/* Features in %ebx for leaf 0x24 */
-#define bit_AVX10_256   0x00020000
-#define bit_AVX10_512   0x00040000
-
 #ifdef __i386__
 #define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
     __asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index d89e386061702..474fa93629d89 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -33,7 +33,7 @@ __attribute__((target("fpmath=387")))
 void f_fpmath_387(void) {}
 
 // CHECK-NOT: tune-cpu
-// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
 __attribute__((target("no-sse2")))
 void f_no_sse2(void) {}
 
@@ -41,7 +41,7 @@ void f_no_sse2(void) {}
 __attribute__((target("sse4")))
 void f_sse4(void) {}
 
-// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
 __attribute__((target("no-sse4")))
 void f_no_sse4(void) {}
 
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 45b7055abf454..b4b60986022d4 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -229,10 +229,8 @@ enum ProcessorFeatures {
   FEATURE_SM4,
   FEATURE_APXF,
   FEATURE_USERMSR,
-  FEATURE_AVX10_1_256,
-  FEATURE_AVX10_1_512,
-  FEATURE_AVX10_2_256,
-  FEATURE_AVX10_2_512,
+  FEATURE_AVX10_1 = 114,
+  FEATURE_AVX10_2 = 116,
   FEATURE_MOVRS,
   CPU_FEATURE_MAX
 };
@@ -1093,18 +1091,11 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
   bool HasLeaf24 =
       MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
   if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24) {
-    bool Has512Len = (EBX >> 18) & 1;
     int AVX10Ver = EBX & 0xff;
-    if (AVX10Ver >= 2) {
-      setFeature(FEATURE_AVX10_2_256);
-      if (Has512Len)
-        setFeature(FEATURE_AVX10_2_512);
-    }
-    if (AVX10Ver >= 1) {
-      setFeature(FEATURE_AVX10_1_256);
-      if (Has512Len)
-        setFeature(FEATURE_AVX10_1_512);
-    }
+    if (AVX10Ver >= 1)
+      setFeature(FEATURE_AVX10_1);
+    if (AVX10Ver >= 2)
+      setFeature(FEATURE_AVX10_2);
   }
 
   unsigned MaxExtLevel = 0;
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 78cf46406192e..826752b088bcd 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -261,9 +261,9 @@ X86_FEATURE_COMPAT(SM4,             "sm4",                    0)
 X86_FEATURE       (EGPR,            "egpr")
 X86_FEATURE_COMPAT(USERMSR,         "usermsr",                0)
 X86_FEATURE_COMPAT(AVX10_1,         "avx10.1",               36)
-X86_FEATURE_COMPAT(AVX10_1_512,     "avx10.1-512",           37)
-X86_FEATURE_COMPAT(AVX10_2,         "avx10.2",                0)
-X86_FEATURE_COMPAT(AVX10_2_512,     "avx10.2-512",            0)
+X86_FEATURE       (DUMMYFEATURE3,   "__dummyfeature3")
+X86_FEATURE_COMPAT(AVX10_2,         "avx10.2",               37)
+X86_FEATURE       (DUMMYFEATURE4,   "__dummyfeature4")
 //FIXME: make MOVRS _COMPAT defined when gcc landed relate patch.
 X86_FEATURE       (MOVRS,           "movrs")
 X86_FEATURE       (ZU,              "zu")
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 37e8ad986aa55..293cc42ab81c1 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -544,6 +544,8 @@ constexpr FeatureBitset ImpliedFeaturesX87 = {};
 constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
 constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {};
 constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {};
+constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE3 = {};
+constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE4 = {};
 
 // Not really CPU features, but need to be in the table because clang uses
 // target features to communicate them to the backend.
@@ -644,8 +646,6 @@ constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
     FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 |
     FeatureAVX512DQ | FeatureAVX512VL;
 constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1;
-constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1;
-constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 = FeatureAVX10_2;
 
 // APX Features
 constexpr FeatureBitset ImpliedFeaturesEGPR = {};

@mikolaj-pirog
Copy link
Member Author

There are still remnants of 256 vs 512 distinction, like defining __AVX10_1_512__ and FeatureAVX10_1_512 in the X86.td -- but I believe those should stay, for compatibility purposes, right?

@phoebewang
Copy link
Contributor

There are still remnants of 256 vs 512 distinction, like defining __AVX10_1_512__ and FeatureAVX10_1_512 in the X86.td -- but I believe those should stay, for compatibility purposes, right?

Yes, please add tests for -mattr=+avx10.1/2-512 in case we won't remove them by accident.

@mikolaj-pirog
Copy link
Member Author

There are still remnants of 256 vs 512 distinction, like defining __AVX10_1_512__ and FeatureAVX10_1_512 in the X86.td -- but I believe those should stay, for compatibility purposes, right?

Yes, please add tests for -mattr=+avx10.1/2-512 in case we won't remove them by accident.

I've added a test

@@ -0,0 +1,10 @@
; avx10.x-512 is just avx10.x -- 512 is kept for compatibility purposes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The simply way is to add avx10.1/2-512 to any files test avx10.1/2 with a RUN line.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's clearer to have a separate test for this

Comment on lines 3 to 5
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx10.1-512 2>&1 | grep -v "is not a recognized feature"

; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx10.2-512 2>&1 | grep -v "is not a recognized feature"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use FileCheck and CHECK-NOT for it.

Comment on lines 7 to 9
define float @foo(float %f) {
ret float %f
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's better to use a test that generates ZMM and AVX10.2 instrcutions if you prefer a seperate test.

Comment on lines +7 to +8
; CHECK-AVX10_1-NOT: is not recognizable
; CHECK-AVX10_2-NOT: is not recognizable
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can generate full assemble test with script. If avx10.x-512 is not recognizable, the test will fail due to missing avx10.x features.

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@mikolaj-pirog mikolaj-pirog merged commit 8f6c7aa into llvm:main Nov 15, 2025
10 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category compiler-rt:builtins compiler-rt

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants