Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[X86][AVX10] Allow 64-bit mask register used without EVEX512 #75571

Merged
merged 2 commits into from
Dec 15, 2023

Conversation

phoebewang
Copy link
Contributor

This is to reflect new document change that 64-bit mask is support by AVX10 256-bit targets.

Latest documents can be found in:
https://cdrdv2.intel.com/v1/dl/getContent/784267
https://cdrdv2.intel.com/v1/dl/getContent/784343

This is to reflect new document change that 64-bit mask is support by
AVX10 256-bit targets.

Latest documents can be found in:
https://cdrdv2.intel.com/v1/dl/getContent/784267
https://cdrdv2.intel.com/v1/dl/getContent/784343
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics labels Dec 15, 2023
@llvmbot
Copy link
Collaborator

llvmbot commented Dec 15, 2023

@llvm/pr-subscribers-clang

Author: Phoebe Wang (phoebewang)

Changes

This is to reflect new document change that 64-bit mask is support by AVX10 256-bit targets.

Latest documents can be found in:
https://cdrdv2.intel.com/v1/dl/getContent/784267
https://cdrdv2.intel.com/v1/dl/getContent/784343


Full diff: https://github.com/llvm/llvm-project/pull/75571.diff

4 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsX86.def (+15-15)
  • (modified) clang/lib/Headers/avx512bwintrin.h (+18-19)
  • (modified) clang/test/CodeGen/X86/avx512-error.c (+14-32)
  • (modified) llvm/lib/Target/X86/X86RegisterInfo.cpp (-6)
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e4802f8ab1c156..60b752ad48548f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -979,7 +979,7 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512p
 TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
@@ -1349,7 +1349,7 @@ TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "a
 TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
 TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512bw,evex512")
@@ -1665,56 +1665,56 @@ TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq
 TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 51dba5427b0fc0..e90e3ce74477c5 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -19,7 +19,6 @@ typedef unsigned long long __mmask64;
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512"), __min_vector_width__(512)))
-#define __DEFAULT_FN_ATTRS64 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512")))
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512bw,no-evex512")))
@@ -30,7 +29,7 @@ _knot_mask32(__mmask32 __M)
   return __builtin_ia32_knotsi(__M);
 }
 
-static __inline __mmask64 __DEFAULT_FN_ATTRS64
+static __inline __mmask64 __DEFAULT_FN_ATTRS
 _knot_mask64(__mmask64 __M)
 {
   return __builtin_ia32_knotdi(__M);
@@ -42,7 +41,7 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kand_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
@@ -54,7 +53,7 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kandn_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
@@ -66,7 +65,7 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
@@ -78,7 +77,7 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kxnor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
@@ -90,7 +89,7 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kxor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
@@ -114,19 +113,19 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
   return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
@@ -150,19 +149,19 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
   return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
@@ -174,7 +173,7 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kadd_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
@@ -197,7 +196,7 @@ _cvtmask32_u32(__mmask32 __A) {
   return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS64
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _cvtmask64_u64(__mmask64 __A) {
   return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
 }
@@ -207,7 +206,7 @@ _cvtu32_mask32(unsigned int __A) {
   return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _cvtu64_mask64(unsigned long long __A) {
   return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
 }
@@ -217,7 +216,7 @@ _load_mask32(__mmask32 *__A) {
   return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _load_mask64(__mmask64 *__A) {
   return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
 }
@@ -227,7 +226,7 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
   *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS64
+static __inline__ void __DEFAULT_FN_ATTRS
 _store_mask64(__mmask64 *__A, __mmask64 __B) {
   *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
 }
@@ -1717,7 +1716,7 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
                                               (__v64qi) _mm512_setzero_si512());
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
diff --git a/clang/test/CodeGen/X86/avx512-error.c b/clang/test/CodeGen/X86/avx512-error.c
index 99824a373e8ebf..422cc7a8679dcf 100644
--- a/clang/test/CodeGen/X86/avx512-error.c
+++ b/clang/test/CodeGen/X86/avx512-error.c
@@ -1,21 +1,24 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify
 
 #include <immintrin.h>
 
-#if FEATURE_TEST & 3
-// expected-no-diagnostics
-#endif
+// No error emitted whether we have "evex512" feature or not.
+__attribute__((target("avx512bw,no-evex512")))
+__mmask64 k64_verify_1(__mmask64 a) {
+  return _knot_mask64(a); // expected-no-diagnostics
+}
+
+__mmask64 k64_verify_2(__mmask64 a) {
+  return _knot_mask64(a); // expected-no-diagnostic
+}
 
-#if FEATURE_TEST & 1
 __attribute__((target("avx512bw,evex512")))
 __m512d zmm_verify_ok(__m512d a) {
   // No error emitted if we have "evex512" feature.
-  return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // expected-no-diagnostic
 }
 
 __m512d zmm_error(__m512d a) {
@@ -27,24 +30,3 @@ __m512d zmm_error(__m512d a) {
 // noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
 // noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
 #endif
-#endif
-
-#if FEATURE_TEST & 2
-__attribute__((target("avx512bw,evex512")))
-__mmask64 k64_verify_ok(__mmask64 a) {
-  // No error emitted if we have "evex512" feature.
-  return _knot_mask64(a);
-}
-#if defined(__AVX10_1__) && !defined(__AVX10_1_512__)
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-#endif
-
-__mmask64 test_knot_mask64(__mmask64 a) {
-  return _knot_mask64(a); // noevex-error {{always_inline function '_knot_mask64' requires target feature 'evex512', but would be inlined into function 'test_knot_mask64' that is compiled without support for 'evex512'}}
-}
-#endif
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 5c32519dab3714..e76d0d7bf50e19 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1077,12 +1077,6 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
       VirtReg, Order, Hints, MF, VRM, Matrix);
 
   unsigned ID = RC.getID();
-  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
-  if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) &&
-      Subtarget.hasAVX512() && !Subtarget.hasEVEX512())
-    report_fatal_error(
-        "64-bit mask registers are not supported without EVEX512");
-
   if (ID != X86::TILERegClassID)
     return BaseImplRetVal;
 

@llvmbot
Copy link
Collaborator

llvmbot commented Dec 15, 2023

@llvm/pr-subscribers-backend-x86

Author: Phoebe Wang (phoebewang)

Changes

This is to reflect new document change that 64-bit mask is support by AVX10 256-bit targets.

Latest documents can be found in:
https://cdrdv2.intel.com/v1/dl/getContent/784267
https://cdrdv2.intel.com/v1/dl/getContent/784343


Full diff: https://github.com/llvm/llvm-project/pull/75571.diff

4 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsX86.def (+15-15)
  • (modified) clang/lib/Headers/avx512bwintrin.h (+18-19)
  • (modified) clang/test/CodeGen/X86/avx512-error.c (+14-32)
  • (modified) llvm/lib/Target/X86/X86RegisterInfo.cpp (-6)
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index e4802f8ab1c156..60b752ad48548f 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -979,7 +979,7 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512p
 TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw")
 
 TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
 TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
@@ -1349,7 +1349,7 @@ TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "a
 TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
 TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512bw,evex512")
@@ -1665,56 +1665,56 @@ TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq
 TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
-TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw,evex512")
+TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw,evex512")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
 TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 51dba5427b0fc0..e90e3ce74477c5 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -19,7 +19,6 @@ typedef unsigned long long __mmask64;
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512"), __min_vector_width__(512)))
-#define __DEFAULT_FN_ATTRS64 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512")))
 #define __DEFAULT_FN_ATTRS                                                     \
   __attribute__((__always_inline__, __nodebug__,                               \
                  __target__("avx512bw,no-evex512")))
@@ -30,7 +29,7 @@ _knot_mask32(__mmask32 __M)
   return __builtin_ia32_knotsi(__M);
 }
 
-static __inline __mmask64 __DEFAULT_FN_ATTRS64
+static __inline __mmask64 __DEFAULT_FN_ATTRS
 _knot_mask64(__mmask64 __M)
 {
   return __builtin_ia32_knotdi(__M);
@@ -42,7 +41,7 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kand_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
@@ -54,7 +53,7 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kandn_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
@@ -66,7 +65,7 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
@@ -78,7 +77,7 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kxnor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
@@ -90,7 +89,7 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kxor_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
@@ -114,19 +113,19 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
   return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
   return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
@@ -150,19 +149,19 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
   return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
 {
   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
 }
 
-static __inline__ unsigned char __DEFAULT_FN_ATTRS64
+static __inline__ unsigned char __DEFAULT_FN_ATTRS
 _ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
   *__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
   return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
@@ -174,7 +173,7 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
   return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _kadd_mask64(__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
@@ -197,7 +196,7 @@ _cvtmask32_u32(__mmask32 __A) {
   return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS64
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS
 _cvtmask64_u64(__mmask64 __A) {
   return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
 }
@@ -207,7 +206,7 @@ _cvtu32_mask32(unsigned int __A) {
   return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _cvtu64_mask64(unsigned long long __A) {
   return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
 }
@@ -217,7 +216,7 @@ _load_mask32(__mmask32 *__A) {
   return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _load_mask64(__mmask64 *__A) {
   return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
 }
@@ -227,7 +226,7 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
   *(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS64
+static __inline__ void __DEFAULT_FN_ATTRS
 _store_mask64(__mmask64 *__A, __mmask64 __B) {
   *(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
 }
@@ -1717,7 +1716,7 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
                                               (__v64qi) _mm512_setzero_si512());
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS
 _mm512_kunpackd (__mmask64 __A, __mmask64 __B)
 {
   return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
diff --git a/clang/test/CodeGen/X86/avx512-error.c b/clang/test/CodeGen/X86/avx512-error.c
index 99824a373e8ebf..422cc7a8679dcf 100644
--- a/clang/test/CodeGen/X86/avx512-error.c
+++ b/clang/test/CodeGen/X86/avx512-error.c
@@ -1,21 +1,24 @@
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
-// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify
 
 #include <immintrin.h>
 
-#if FEATURE_TEST & 3
-// expected-no-diagnostics
-#endif
+// No error emitted whether we have "evex512" feature or not.
+__attribute__((target("avx512bw,no-evex512")))
+__mmask64 k64_verify_1(__mmask64 a) {
+  return _knot_mask64(a); // expected-no-diagnostics
+}
+
+__mmask64 k64_verify_2(__mmask64 a) {
+  return _knot_mask64(a); // expected-no-diagnostic
+}
 
-#if FEATURE_TEST & 1
 __attribute__((target("avx512bw,evex512")))
 __m512d zmm_verify_ok(__m512d a) {
   // No error emitted if we have "evex512" feature.
-  return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION);
+  return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // expected-no-diagnostic
 }
 
 __m512d zmm_error(__m512d a) {
@@ -27,24 +30,3 @@ __m512d zmm_error(__m512d a) {
 // noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
 // noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
 #endif
-#endif
-
-#if FEATURE_TEST & 2
-__attribute__((target("avx512bw,evex512")))
-__mmask64 k64_verify_ok(__mmask64 a) {
-  // No error emitted if we have "evex512" feature.
-  return _knot_mask64(a);
-}
-#if defined(__AVX10_1__) && !defined(__AVX10_1_512__)
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
-#endif
-
-__mmask64 test_knot_mask64(__mmask64 a) {
-  return _knot_mask64(a); // noevex-error {{always_inline function '_knot_mask64' requires target feature 'evex512', but would be inlined into function 'test_knot_mask64' that is compiled without support for 'evex512'}}
-}
-#endif
diff --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 5c32519dab3714..e76d0d7bf50e19 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -1077,12 +1077,6 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
       VirtReg, Order, Hints, MF, VRM, Matrix);
 
   unsigned ID = RC.getID();
-  const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
-  if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) &&
-      Subtarget.hasAVX512() && !Subtarget.hasEVEX512())
-    report_fatal_error(
-        "64-bit mask registers are not supported without EVEX512");
-
   if (ID != X86::TILERegClassID)
     return BaseImplRetVal;
 

Copy link

github-actions bot commented Dec 15, 2023

:white_check_mark: With the latest revision this PR passed the C/C++ code formatter.

Copy link
Contributor

@KanRobert KanRobert left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@phoebewang phoebewang merged commit f5e48fe into llvm:main Dec 15, 2023
4 checks passed
@phoebewang phoebewang deleted the mask64 branch December 15, 2023 12:41
@e-kud
Copy link
Contributor

e-kud commented Dec 15, 2023

LGTM. Thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category
Projects
None yet
Development

Successfully merging this pull request may close these issues.

None yet

4 participants