-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86] Remove vector length (256 vs 512) distinction of AVX10 #167736
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86] Remove vector length (256 vs 512) distinction of AVX10 #167736
Conversation
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Mikołaj Piróg (mikolaj-pirog) ChangesAs in title. AVX10.x doesn't distinguish between available vector lengths. Bit-positions of avx10.1/2 features in compiler-rt and X86TargetParser are synced to match those in the gcc Full diff: https://github.com/llvm/llvm-project/pull/167736.diff 5 Files Affected:
diff --git a/clang/lib/Headers/cpuid.h b/clang/lib/Headers/cpuid.h
index 45700c635831d..432747ba1f6ad 100644
--- a/clang/lib/Headers/cpuid.h
+++ b/clang/lib/Headers/cpuid.h
@@ -253,10 +253,6 @@
#define bit_RDPRU 0x00000010
#define bit_WBNOINVD 0x00000200
-/* Features in %ebx for leaf 0x24 */
-#define bit_AVX10_256 0x00020000
-#define bit_AVX10_512 0x00040000
-
#ifdef __i386__
#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index d89e386061702..474fa93629d89 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -33,7 +33,7 @@ __attribute__((target("fpmath=387")))
void f_fpmath_387(void) {}
// CHECK-NOT: tune-cpu
-// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
__attribute__((target("no-sse2")))
void f_no_sse2(void) {}
@@ -41,7 +41,7 @@ void f_no_sse2(void) {}
__attribute__((target("sse4")))
void f_sse4(void) {}
-// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
__attribute__((target("no-sse4")))
void f_no_sse4(void) {}
diff --git a/compiler-rt/lib/builtins/cpu_model/x86.c b/compiler-rt/lib/builtins/cpu_model/x86.c
index 45b7055abf454..b4b60986022d4 100644
--- a/compiler-rt/lib/builtins/cpu_model/x86.c
+++ b/compiler-rt/lib/builtins/cpu_model/x86.c
@@ -229,10 +229,8 @@ enum ProcessorFeatures {
FEATURE_SM4,
FEATURE_APXF,
FEATURE_USERMSR,
- FEATURE_AVX10_1_256,
- FEATURE_AVX10_1_512,
- FEATURE_AVX10_2_256,
- FEATURE_AVX10_2_512,
+ FEATURE_AVX10_1 = 114,
+ FEATURE_AVX10_2 = 116,
FEATURE_MOVRS,
CPU_FEATURE_MAX
};
@@ -1093,18 +1091,11 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
bool HasLeaf24 =
MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24) {
- bool Has512Len = (EBX >> 18) & 1;
int AVX10Ver = EBX & 0xff;
- if (AVX10Ver >= 2) {
- setFeature(FEATURE_AVX10_2_256);
- if (Has512Len)
- setFeature(FEATURE_AVX10_2_512);
- }
- if (AVX10Ver >= 1) {
- setFeature(FEATURE_AVX10_1_256);
- if (Has512Len)
- setFeature(FEATURE_AVX10_1_512);
- }
+ if (AVX10Ver >= 1)
+ setFeature(FEATURE_AVX10_1);
+ if (AVX10Ver >= 2)
+ setFeature(FEATURE_AVX10_2);
}
unsigned MaxExtLevel = 0;
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index 78cf46406192e..826752b088bcd 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -261,9 +261,9 @@ X86_FEATURE_COMPAT(SM4, "sm4", 0)
X86_FEATURE (EGPR, "egpr")
X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
X86_FEATURE_COMPAT(AVX10_1, "avx10.1", 36)
-X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
-X86_FEATURE_COMPAT(AVX10_2, "avx10.2", 0)
-X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0)
+X86_FEATURE (DUMMYFEATURE3, "__dummyfeature3")
+X86_FEATURE_COMPAT(AVX10_2, "avx10.2", 37)
+X86_FEATURE (DUMMYFEATURE4, "__dummyfeature4")
//FIXME: make MOVRS _COMPAT defined when gcc landed relate patch.
X86_FEATURE (MOVRS, "movrs")
X86_FEATURE (ZU, "zu")
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index 37e8ad986aa55..293cc42ab81c1 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -544,6 +544,8 @@ constexpr FeatureBitset ImpliedFeaturesX87 = {};
constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {};
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {};
+constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE3 = {};
+constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE4 = {};
// Not really CPU features, but need to be in the table because clang uses
// target features to communicate them to the backend.
@@ -644,8 +646,6 @@ constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 |
FeatureAVX512DQ | FeatureAVX512VL;
constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1;
-constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1;
-constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 = FeatureAVX10_2;
// APX Features
constexpr FeatureBitset ImpliedFeaturesEGPR = {};
|
|
There are still remnants of 256 vs 512 distinction, like defining |
Yes, please add tests for |
I've added a test |
| @@ -0,0 +1,10 @@ | |||
| ; avx10.x-512 is just avx10.x -- 512 is kept for compatibility purposes. | |||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The simply way is to add avx10.1/2-512 to any files test avx10.1/2 with a RUN line.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's clearer to have a separate test for this
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx10.1-512 2>&1 | grep -v "is not a recognized feature" | ||
|
|
||
| ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx10.2-512 2>&1 | grep -v "is not a recognized feature" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use FileCheck and CHECK-NOT for it.
| define float @foo(float %f) { | ||
| ret float %f | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's better to use a test that generates ZMM and AVX10.2 instrcutions if you prefer a seperate test.
| ; CHECK-AVX10_1-NOT: is not recognizable | ||
| ; CHECK-AVX10_2-NOT: is not recognizable |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think we can generate full assemble test with script. If avx10.x-512 is not recognizable, the test will fail due to missing avx10.x features.
phoebewang
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM.
As in title. AVX10.x doesn't distinguish between available vector lengths.
Bit-positions of avx10.1/2 features in compiler-rt and X86TargetParser are synced to match those in the gcc