Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions clang/lib/Headers/cpuid.h
Original file line number Diff line number Diff line change
Expand Up @@ -253,10 +253,6 @@
#define bit_RDPRU 0x00000010
#define bit_WBNOINVD 0x00000200

/* Features in %ebx for leaf 0x24 */
#define bit_AVX10_256 0x00020000
#define bit_AVX10_512 0x00040000

#ifdef __i386__
#define __cpuid(__leaf, __eax, __ebx, __ecx, __edx) \
__asm("cpuid" : "=a"(__eax), "=b" (__ebx), "=c"(__ecx), "=d"(__edx) \
Expand Down
4 changes: 2 additions & 2 deletions clang/test/CodeGen/attr-target-x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@ __attribute__((target("fpmath=387")))
void f_fpmath_387(void) {}

// CHECK-NOT: tune-cpu
// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: [[f_no_sse2]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sm3,-sm4,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
__attribute__((target("no-sse2")))
void f_no_sse2(void) {}

// CHECK: [[f_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
__attribute__((target("sse4")))
void f_sse4(void) {}

// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.1-512,-avx10.2,-avx10.2-512,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: [[f_no_sse4]] = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-amx-avx512,-avx,-avx10.1,-avx10.2,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512f,-avx512fp16,-avx512ifma,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint16,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sm3,-sm4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
__attribute__((target("no-sse4")))
void f_no_sse4(void) {}

Expand Down
21 changes: 6 additions & 15 deletions compiler-rt/lib/builtins/cpu_model/x86.c
Original file line number Diff line number Diff line change
Expand Up @@ -229,10 +229,8 @@ enum ProcessorFeatures {
FEATURE_SM4,
FEATURE_APXF,
FEATURE_USERMSR,
FEATURE_AVX10_1_256,
FEATURE_AVX10_1_512,
FEATURE_AVX10_2_256,
FEATURE_AVX10_2_512,
FEATURE_AVX10_1 = 114,
FEATURE_AVX10_2 = 116,
FEATURE_MOVRS,
CPU_FEATURE_MAX
};
Expand Down Expand Up @@ -1093,18 +1091,11 @@ static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
bool HasLeaf24 =
MaxLevel >= 0x24 && !getX86CpuIDAndInfo(0x24, &EAX, &EBX, &ECX, &EDX);
if (HasLeaf7Subleaf1 && ((EDX >> 19) & 1) && HasLeaf24) {
bool Has512Len = (EBX >> 18) & 1;
int AVX10Ver = EBX & 0xff;
if (AVX10Ver >= 2) {
setFeature(FEATURE_AVX10_2_256);
if (Has512Len)
setFeature(FEATURE_AVX10_2_512);
}
if (AVX10Ver >= 1) {
setFeature(FEATURE_AVX10_1_256);
if (Has512Len)
setFeature(FEATURE_AVX10_1_512);
}
if (AVX10Ver >= 1)
setFeature(FEATURE_AVX10_1);
if (AVX10Ver >= 2)
setFeature(FEATURE_AVX10_2);
}

unsigned MaxExtLevel = 0;
Expand Down
6 changes: 3 additions & 3 deletions llvm/include/llvm/TargetParser/X86TargetParser.def
Original file line number Diff line number Diff line change
Expand Up @@ -261,9 +261,9 @@ X86_FEATURE_COMPAT(SM4, "sm4", 0)
X86_FEATURE (EGPR, "egpr")
X86_FEATURE_COMPAT(USERMSR, "usermsr", 0)
X86_FEATURE_COMPAT(AVX10_1, "avx10.1", 36)
X86_FEATURE_COMPAT(AVX10_1_512, "avx10.1-512", 37)
X86_FEATURE_COMPAT(AVX10_2, "avx10.2", 0)
X86_FEATURE_COMPAT(AVX10_2_512, "avx10.2-512", 0)
X86_FEATURE (DUMMYFEATURE3, "__dummyfeature3")
X86_FEATURE_COMPAT(AVX10_2, "avx10.2", 37)
X86_FEATURE (DUMMYFEATURE4, "__dummyfeature4")
//FIXME: make MOVRS _COMPAT defined when gcc landed relate patch.
X86_FEATURE (MOVRS, "movrs")
X86_FEATURE (ZU, "zu")
Expand Down
4 changes: 2 additions & 2 deletions llvm/lib/TargetParser/X86TargetParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,6 +544,8 @@ constexpr FeatureBitset ImpliedFeaturesX87 = {};
constexpr FeatureBitset ImpliedFeaturesXSAVE = {};
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE1 = {};
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE2 = {};
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE3 = {};
constexpr FeatureBitset ImpliedFeaturesDUMMYFEATURE4 = {};

// Not really CPU features, but need to be in the table because clang uses
// target features to communicate them to the backend.
Expand Down Expand Up @@ -644,8 +646,6 @@ constexpr FeatureBitset ImpliedFeaturesAVX10_1 =
FeatureAVX512VBMI2 | FeatureAVX512BITALG | FeatureAVX512FP16 |
FeatureAVX512DQ | FeatureAVX512VL;
constexpr FeatureBitset ImpliedFeaturesAVX10_2 = FeatureAVX10_1;
constexpr FeatureBitset ImpliedFeaturesAVX10_1_512 = FeatureAVX10_1;
constexpr FeatureBitset ImpliedFeaturesAVX10_2_512 = FeatureAVX10_2;

// APX Features
constexpr FeatureBitset ImpliedFeaturesEGPR = {};
Expand Down
97 changes: 97 additions & 0 deletions llvm/test/CodeGen/X86/llc-accept-avx10-512.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6

; avx10.x-512 is just avx10.x -- 512 is kept for compatibility purposes.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The simply way is to add avx10.1/2-512 to any files test avx10.1/2 with a RUN line.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's clearer to have a separate test for this


; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.1-512 2>&1 | FileCheck --check-prefixes=CHECK-AVX10_1 %s

; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx10.2-512 2>&1 | FileCheck --check-prefixes=CHECK-AVX10_2 %s

; CHECK-AVX10_1-NOT: is not recognizable
; CHECK-AVX10_2-NOT: is not recognizable
Comment on lines +9 to +10
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we can generate full assemble test with script. If avx10.x-512 is not recognizable, the test will fail due to missing avx10.x features.


define <32 x bfloat> @foo_avx10.1(<16 x float> %a, <16 x float> %b) {
; CHECK-AVX10_1-LABEL: foo_avx10.1:
; CHECK-AVX10_1: # %bb.0:
; CHECK-AVX10_1-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0
; CHECK-AVX10_1-NEXT: retq
;
; CHECK-AVX10_2-LABEL: foo_avx10.1:
; CHECK-AVX10_2: # %bb.0:
; CHECK-AVX10_2-NEXT: vcvtne2ps2bf16 %zmm1, %zmm0, %zmm0
; CHECK-AVX10_2-NEXT: retq
%ret = call <32 x bfloat> @llvm.x86.avx512bf16.cvtne2ps2bf16.512(<16 x float> %a, <16 x float> %b)
ret <32 x bfloat> %ret
}

define <8 x i32> @foo_avx10.2(<8 x double> %f) {
; CHECK-AVX10_1-LABEL: foo_avx10.2:
; CHECK-AVX10_1: # %bb.0:
; CHECK-AVX10_1-NEXT: vextractf32x4 $2, %zmm0, %xmm1
; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm1[1,0]
; CHECK-AVX10_1-NEXT: vmovsd {{.*#+}} xmm3 = [-2.147483648E+9,0.0E+0]
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4
; CHECK-AVX10_1-NEXT: vmovsd {{.*#+}} xmm5 = [2.147483647E+9,0.0E+0]
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
; CHECK-AVX10_1-NEXT: xorl %eax, %eax
; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm1, %xmm2
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm2, %xmm2
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm2, %edx
; CHECK-AVX10_1-NEXT: vucomisd %xmm1, %xmm1
; CHECK-AVX10_1-NEXT: cmovpl %eax, %edx
; CHECK-AVX10_1-NEXT: vmovd %edx, %xmm1
; CHECK-AVX10_1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1
; CHECK-AVX10_1-NEXT: vextractf32x4 $3, %zmm0, %xmm2
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
; CHECK-AVX10_1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1
; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm2[1,0]
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
; CHECK-AVX10_1-NEXT: vpinsrd $3, %ecx, %xmm1, %xmm1
; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm2 = xmm0[1,0]
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm2, %xmm4
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
; CHECK-AVX10_1-NEXT: vucomisd %xmm2, %xmm2
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm2
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm2, %xmm2
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm2, %edx
; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0
; CHECK-AVX10_1-NEXT: cmovpl %eax, %edx
; CHECK-AVX10_1-NEXT: vmovd %edx, %xmm2
; CHECK-AVX10_1-NEXT: vpinsrd $1, %ecx, %xmm2, %xmm2
; CHECK-AVX10_1-NEXT: vextractf128 $1, %ymm0, %xmm0
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm4
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm4, %xmm4
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm4, %ecx
; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
; CHECK-AVX10_1-NEXT: vpinsrd $2, %ecx, %xmm2, %xmm2
; CHECK-AVX10_1-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1,0]
; CHECK-AVX10_1-NEXT: vmaxsd %xmm3, %xmm0, %xmm3
; CHECK-AVX10_1-NEXT: vminsd %xmm5, %xmm3, %xmm3
; CHECK-AVX10_1-NEXT: vcvttsd2si %xmm3, %ecx
; CHECK-AVX10_1-NEXT: vucomisd %xmm0, %xmm0
; CHECK-AVX10_1-NEXT: cmovpl %eax, %ecx
; CHECK-AVX10_1-NEXT: vpinsrd $3, %ecx, %xmm2, %xmm0
; CHECK-AVX10_1-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
; CHECK-AVX10_1-NEXT: retq
;
; CHECK-AVX10_2-LABEL: foo_avx10.2:
; CHECK-AVX10_2: # %bb.0:
; CHECK-AVX10_2-NEXT: vcvttpd2dqs %zmm0, %ymm0
; CHECK-AVX10_2-NEXT: retq
%x = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f64(<8 x double> %f)
ret <8 x i32> %x
}

Loading