-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[X86][AVX10] Allow 64-bit mask register used without EVEX512 #75571
Conversation
This is to reflect new document change that 64-bit mask is support by AVX10 256-bit targets. Latest documents can be found in: https://cdrdv2.intel.com/v1/dl/getContent/784267 https://cdrdv2.intel.com/v1/dl/getContent/784343
@llvm/pr-subscribers-clang Author: Phoebe Wang (phoebewang) ChangesThis is to reflect new document change that 64-bit mask is support by AVX10 256-bit targets. Latest documents can be found in: Full diff: https://github.com/llvm/llvm-project/pull/75571.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e4802f8ab1c156..60b752ad48548f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -979,7 +979,7 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512p
TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
@@ -1349,7 +1349,7 @@ TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "a
TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512bw,evex512")
@@ -1665,56 +1665,56 @@ TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq
TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 51dba5427b0fc0..e90e3ce74477c5 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -19,7 +19,6 @@ typedef unsigned long long __mmask64;
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512"), __min_vector_width__(512)))
-#define __DEFAULT_FN_ATTRS64 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512")))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,no-evex512")))
@@ -30,7 +29,7 @@ _knot_mask32(__mmask32 __M)
return __builtin_ia32_knotsi(__M);
}
-static __inline __mmask64 __DEFAULT_FN_ATTRS64
+static __inline __mmask64 __DEFAULT_FN_ATTRS
_knot_mask64(__mmask64 __M)
{
return __builtin_ia32_knotdi(__M);
@@ -42,7 +41,7 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kand_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
@@ -54,7 +53,7 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kandn_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
@@ -66,7 +65,7 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
@@ -78,7 +77,7 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
@@ -90,7 +89,7 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
@@ -114,19 +113,19 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
@@ -150,19 +149,19 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
@@ -174,7 +173,7 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kadd_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
@@ -197,7 +196,7 @@ _cvtmask32_u32(__mmask32 __A) {
return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
}
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS64
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_cvtmask64_u64(__mmask64 __A) {
return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
}
@@ -207,7 +206,7 @@ _cvtu32_mask32(unsigned int __A) {
return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_cvtu64_mask64(unsigned long long __A) {
return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
}
@@ -217,7 +216,7 @@ _load_mask32(__mmask32 *__A) {
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_load_mask64(__mmask64 *__A) {
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
}
@@ -227,7 +226,7 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
}
-static __inline__ void __DEFAULT_FN_ATTRS64
+static __inline__ void __DEFAULT_FN_ATTRS
_store_mask64(__mmask64 *__A, __mmask64 __B) {
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
}
@@ -1717,7 +1716,7 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
(__v64qi) _mm512_setzero_si512());
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
diff --git a/clang/test/CodeGen/X86/avx512-error.c b/clang/test/CodeGen/X86/avx512-error.c
index 99824a373e8ebf..422cc7a8679dcf 100644
--- a/clang/test/CodeGen/X86/avx512-error.c
+++ b/clang/test/CodeGen/X86/avx512-error.c
@@ -1,21 +1,24 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify
#include <immintrin.h>
-#if FEATURE_TEST & 3
-// expected-no-diagnostics
-#endif
+// No error emitted whether we have "evex512" feature or not.
+__attribute__((target("avx512bw,no-evex512")))
+__mmask64 k64_verify_1(__mmask64 a) {
+ return _knot_mask64(a); // expected-no-diagnostics
+}
+
+__mmask64 k64_verify_2(__mmask64 a) {
+ return _knot_mask64(a); // expected-no-diagnostic
+}
-#if FEATURE_TEST & 1
__attribute__((target("avx512bw,evex512")))
__m512d zmm_verify_ok(__m512d a) {
// No error emitted if we have "evex512" feature.
- return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION);
+ return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // expected-no-diagnostic
}
__m512d zmm_error(__m512d a) {
@@ -27,24 +30,3 @@ __m512d zmm_error(__m512d a) {
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
#endif
-#endif
-
-#if FEATURE_TEST & 2
-__attribute__((target("avx512bw,evex512")))
-__mmask64 k64_verify_ok(__mmask64 a) {
- // No error emitted if we have "evex512" feature.
- return _knot_mask64(a);
-}
-#if defined(__AVX10_1__) && !defined(__AVX10_1_512__)
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-#endif
-
-__mmask64 test_knot_mask64(__mmask64 a) {
- return _knot_mask64(a); // noevex-error {{always_inline function '_knot_mask64' requires target feature 'evex512', but would be inlined into function 'test_knot_mask64' that is compiled without support for 'evex512'}}
-}
-#endif
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 5c32519dab3714..e76d0d7bf50e19 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1077,12 +1077,6 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
VirtReg, Order, Hints, MF, VRM, Matrix);
unsigned ID = RC.getID();
- const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
- if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) &&
- Subtarget.hasAVX512() && !Subtarget.hasEVEX512())
- report_fatal_error(
- "64-bit mask registers are not supported without EVEX512");
-
if (ID != X86::TILERegClassID)
return BaseImplRetVal;
|
@llvm/pr-subscribers-backend-x86 Author: Phoebe Wang (phoebewang) ChangesThis is to reflect new document change that 64-bit mask is support by AVX10 256-bit targets. Latest documents can be found in: Full diff: https://github.com/llvm/llvm-project/pull/75571.diff 4 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e4802f8ab1c156..60b752ad48548f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -979,7 +979,7 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512p
TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
@@ -1349,7 +1349,7 @@ TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "a
TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512bw,evex512")
@@ -1665,56 +1665,56 @@ TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq
TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 51dba5427b0fc0..e90e3ce74477c5 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -19,7 +19,6 @@ typedef unsigned long long __mmask64;
/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512"), __min_vector_width__(512)))
-#define __DEFAULT_FN_ATTRS64 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512")))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,no-evex512")))
@@ -30,7 +29,7 @@ _knot_mask32(__mmask32 __M)
return __builtin_ia32_knotsi(__M);
}
-static __inline __mmask64 __DEFAULT_FN_ATTRS64
+static __inline __mmask64 __DEFAULT_FN_ATTRS
_knot_mask64(__mmask64 __M)
{
return __builtin_ia32_knotdi(__M);
@@ -42,7 +41,7 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kand_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
@@ -54,7 +53,7 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kandn_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
@@ -66,7 +65,7 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
@@ -78,7 +77,7 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
@@ -90,7 +89,7 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kxor_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
@@ -114,19 +113,19 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
@@ -150,19 +149,19 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
@@ -174,7 +173,7 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_kadd_mask64(__mmask64 __A, __mmask64 __B)
{
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
@@ -197,7 +196,7 @@ _cvtmask32_u32(__mmask32 __A) {
return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
}
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS64
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_cvtmask64_u64(__mmask64 __A) {
return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
}
@@ -207,7 +206,7 @@ _cvtu32_mask32(unsigned int __A) {
return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_cvtu64_mask64(unsigned long long __A) {
return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
}
@@ -217,7 +216,7 @@ _load_mask32(__mmask32 *__A) {
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_load_mask64(__mmask64 *__A) {
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
}
@@ -227,7 +226,7 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
}
-static __inline__ void __DEFAULT_FN_ATTRS64
+static __inline__ void __DEFAULT_FN_ATTRS
_store_mask64(__mmask64 *__A, __mmask64 __B) {
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
}
@@ -1717,7 +1716,7 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
(__v64qi) _mm512_setzero_si512());
}
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
diff --git a/clang/test/CodeGen/X86/avx512-error.c b/clang/test/CodeGen/X86/avx512-error.c
index 99824a373e8ebf..422cc7a8679dcf 100644
--- a/clang/test/CodeGen/X86/avx512-error.c
+++ b/clang/test/CodeGen/X86/avx512-error.c
@@ -1,21 +1,24 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify
#include <immintrin.h>
-#if FEATURE_TEST & 3
-// expected-no-diagnostics
-#endif
+// No error emitted whether we have "evex512" feature or not.
+__attribute__((target("avx512bw,no-evex512")))
+__mmask64 k64_verify_1(__mmask64 a) {
+ return _knot_mask64(a); // expected-no-diagnostics
+}
+
+__mmask64 k64_verify_2(__mmask64 a) {
+ return _knot_mask64(a); // expected-no-diagnostic
+}
-#if FEATURE_TEST & 1
__attribute__((target("avx512bw,evex512")))
__m512d zmm_verify_ok(__m512d a) {
// No error emitted if we have "evex512" feature.
- return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION);
+ return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // expected-no-diagnostic
}
__m512d zmm_error(__m512d a) {
@@ -27,24 +30,3 @@ __m512d zmm_error(__m512d a) {
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
#endif
-#endif
-
-#if FEATURE_TEST & 2
-__attribute__((target("avx512bw,evex512")))
-__mmask64 k64_verify_ok(__mmask64 a) {
- // No error emitted if we have "evex512" feature.
- return _knot_mask64(a);
-}
-#if defined(__AVX10_1__) && !defined(__AVX10_1_512__)
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-#endif
-
-__mmask64 test_knot_mask64(__mmask64 a) {
- return _knot_mask64(a); // noevex-error {{always_inline function '_knot_mask64' requires target feature 'evex512', but would be inlined into function 'test_knot_mask64' that is compiled without support for 'evex512'}}
-}
-#endif
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 5c32519dab3714..e76d0d7bf50e19 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1077,12 +1077,6 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
VirtReg, Order, Hints, MF, VRM, Matrix);
unsigned ID = RC.getID();
- const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
- if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) &&
- Subtarget.hasAVX512() && !Subtarget.hasEVEX512())
- report_fatal_error(
- "64-bit mask registers are not supported without EVEX512");
-
if (ID != X86::TILERegClassID)
return BaseImplRetVal;
|
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
LGTM. Thanks! |
This is to reflect new document change that 64-bit mask is support by AVX10 256-bit targets.
Latest documents can be found in:
https://cdrdv2.intel.com/v1/dl/getContent/784267
https://cdrdv2.intel.com/v1/dl/getContent/784343