Skip to content

Commit

Permalink
[X86][AVX10] Allow 64-bit mask register used without EVEX512 (#75571)
Browse files Browse the repository at this point in the history
This is to reflect new document change that 64-bit mask is support by
AVX10 256-bit targets.

Latest documents can be found in:
https://cdrdv2.intel.com/v1/dl/getContent/784267
https://cdrdv2.intel.com/v1/dl/getContent/784343
  • Loading branch information
phoebewang committed Dec 15, 2023
1 parent 681eacc commit f5e48fe
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 99 deletions.
30 changes: 15 additions & 15 deletions clang/include/clang/Basic/BuiltinsX86.def
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8Oiv*IiIi", "nV:512:", "avx512p
TARGET_BUILTIN(__builtin_ia32_knotqi, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_knotsi, "UiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_knotdi, "UOiUOi", "nc", "avx512bw")

TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
Expand Down Expand Up @@ -1349,7 +1349,7 @@ TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2OiV2OiV2OiV2Oi", "ncV:128:", "a
TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4OiV4OiV4OiV4Oi", "ncV:256:", "avx512ifma,avx512vl|avxifma")
TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kunpckdi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64cC*V64cUOi", "nV:512:", "avx512bw,evex512")
Expand Down Expand Up @@ -1665,56 +1665,56 @@ TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq
TARGET_BUILTIN(__builtin_ia32_kaddqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kaddhi, "UsUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kaddsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kadddi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kandsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kanddi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandnqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kandnsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kandndi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_korqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_korsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestcqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kortestchi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestzhi, "iUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kortestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kortestcdi, "iUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kortestzdi, "iUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestcqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestzqi, "iUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestchi, "iUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestzhi, "iUsUs", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_ktestcsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestzsi, "iUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_ktestcdi, "iUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_ktestzdi, "iUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxnorsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kxnordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kxorqi, "UcUcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kxorsi, "UiUiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kxordi, "UOiUOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kshiftliqi, "UcUcIUi", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kshiftlihi, "UsUsIUi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kshiftlisi, "UiUiIUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kshiftlidi, "UOiUOiIUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kshiftriqi, "UcUcIUi", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kshiftrihi, "UsUsIUi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kshiftrisi, "UiUiIUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kshiftridi, "UOiUOiIUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kmovb, "UcUc", "nc", "avx512dq")
TARGET_BUILTIN(__builtin_ia32_kmovw, "UsUs", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_kmovd, "UiUi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_kmovq, "UOiUOi", "nc", "avx512bw")
TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw,evex512")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
Expand Down
79 changes: 33 additions & 46 deletions clang/lib/Headers/avx512bwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@ typedef unsigned int __mmask32;
typedef unsigned long long __mmask64;

/* Define the default attributes for the functions in this file. */
#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS64 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,evex512")))
#define __DEFAULT_FN_ATTRS512 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,evex512"), __min_vector_width__(512)))
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512bw,no-evex512")))
Expand All @@ -30,9 +31,7 @@ _knot_mask32(__mmask32 __M)
return __builtin_ia32_knotsi(__M);
}

static __inline __mmask64 __DEFAULT_FN_ATTRS64
_knot_mask64(__mmask64 __M)
{
static __inline __mmask64 __DEFAULT_FN_ATTRS _knot_mask64(__mmask64 __M) {
return __builtin_ia32_knotdi(__M);
}

Expand All @@ -42,9 +41,8 @@ _kand_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
_kand_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kand_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kanddi((__mmask64)__A, (__mmask64)__B);
}

Expand All @@ -54,9 +52,8 @@ _kandn_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kandnsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
_kandn_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kandn_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kandndi((__mmask64)__A, (__mmask64)__B);
}

Expand All @@ -66,9 +63,8 @@ _kor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_korsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
_kor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kordi((__mmask64)__A, (__mmask64)__B);
}

Expand All @@ -78,9 +74,8 @@ _kxnor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxnorsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
_kxnor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxnor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kxnordi((__mmask64)__A, (__mmask64)__B);
}

Expand All @@ -90,9 +85,8 @@ _kxor_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kxorsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
_kxor_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kxor_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kxordi((__mmask64)__A, (__mmask64)__B);
}

Expand All @@ -114,19 +108,17 @@ _kortest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_kortestzsi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS64
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS64
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS64
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_kortest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_kortestcdi(__A, __B);
return (unsigned char)__builtin_ia32_kortestzdi(__A, __B);
Expand All @@ -150,19 +142,17 @@ _ktest_mask32_u8(__mmask32 __A, __mmask32 __B, unsigned char *__C) {
return (unsigned char)__builtin_ia32_ktestzsi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS64
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B)
{
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestc_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS64
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B)
{
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktestz_mask64_u8(__mmask64 __A, __mmask64 __B) {
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
}

static __inline__ unsigned char __DEFAULT_FN_ATTRS64
static __inline__ unsigned char __DEFAULT_FN_ATTRS
_ktest_mask64_u8(__mmask64 __A, __mmask64 __B, unsigned char *__C) {
*__C = (unsigned char)__builtin_ia32_ktestcdi(__A, __B);
return (unsigned char)__builtin_ia32_ktestzdi(__A, __B);
Expand All @@ -174,9 +164,8 @@ _kadd_mask32(__mmask32 __A, __mmask32 __B)
return (__mmask32)__builtin_ia32_kaddsi((__mmask32)__A, (__mmask32)__B);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
_kadd_mask64(__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _kadd_mask64(__mmask64 __A,
__mmask64 __B) {
return (__mmask64)__builtin_ia32_kadddi((__mmask64)__A, (__mmask64)__B);
}

Expand All @@ -197,7 +186,7 @@ _cvtmask32_u32(__mmask32 __A) {
return (unsigned int)__builtin_ia32_kmovd((__mmask32)__A);
}

static __inline__ unsigned long long __DEFAULT_FN_ATTRS64
static __inline__ unsigned long long __DEFAULT_FN_ATTRS
_cvtmask64_u64(__mmask64 __A) {
return (unsigned long long)__builtin_ia32_kmovq((__mmask64)__A);
}
Expand All @@ -207,7 +196,7 @@ _cvtu32_mask32(unsigned int __A) {
return (__mmask32)__builtin_ia32_kmovd((__mmask32)__A);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
static __inline__ __mmask64 __DEFAULT_FN_ATTRS
_cvtu64_mask64(unsigned long long __A) {
return (__mmask64)__builtin_ia32_kmovq((__mmask64)__A);
}
Expand All @@ -217,8 +206,7 @@ _load_mask32(__mmask32 *__A) {
return (__mmask32)__builtin_ia32_kmovd(*(__mmask32 *)__A);
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
_load_mask64(__mmask64 *__A) {
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _load_mask64(__mmask64 *__A) {
return (__mmask64)__builtin_ia32_kmovq(*(__mmask64 *)__A);
}

Expand All @@ -227,8 +215,8 @@ _store_mask32(__mmask32 *__A, __mmask32 __B) {
*(__mmask32 *)__A = __builtin_ia32_kmovd((__mmask32)__B);
}

static __inline__ void __DEFAULT_FN_ATTRS64
_store_mask64(__mmask64 *__A, __mmask64 __B) {
static __inline__ void __DEFAULT_FN_ATTRS _store_mask64(__mmask64 *__A,
__mmask64 __B) {
*(__mmask64 *)__A = __builtin_ia32_kmovq((__mmask64)__B);
}

Expand Down Expand Up @@ -1717,9 +1705,8 @@ _mm512_maskz_set1_epi8 (__mmask64 __M, char __A)
(__v64qi) _mm512_setzero_si512());
}

static __inline__ __mmask64 __DEFAULT_FN_ATTRS64
_mm512_kunpackd (__mmask64 __A, __mmask64 __B)
{
static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
__mmask64 __B) {
return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
(__mmask64) __B);
}
Expand Down
46 changes: 14 additions & 32 deletions clang/test/CodeGen/X86/avx512-error.c
Original file line number Diff line number Diff line change
@@ -1,21 +1,24 @@
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=1
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex -DFEATURE_TEST=2
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify -DFEATURE_TEST=3
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -target-feature -evex512 -emit-llvm -o /dev/null -verify=noevex
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx512bw -emit-llvm -o /dev/null -verify
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-256 -emit-llvm -o /dev/null -verify=noevex
// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +avx10.1-512 -emit-llvm -o /dev/null -verify

#include <immintrin.h>

#if FEATURE_TEST & 3
// expected-no-diagnostics
#endif
// No error emitted whether we have "evex512" feature or not.
__attribute__((target("avx512bw,no-evex512")))
__mmask64 k64_verify_1(__mmask64 a) {
return _knot_mask64(a); // expected-no-diagnostics
}

__mmask64 k64_verify_2(__mmask64 a) {
return _knot_mask64(a); // expected-no-diagnostic
}

#if FEATURE_TEST & 1
__attribute__((target("avx512bw,evex512")))
__m512d zmm_verify_ok(__m512d a) {
// No error emitted if we have "evex512" feature.
return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION);
return __builtin_ia32_sqrtpd512(a, _MM_FROUND_CUR_DIRECTION); // expected-no-diagnostic
}

__m512d zmm_error(__m512d a) {
Expand All @@ -27,24 +30,3 @@ __m512d zmm_error(__m512d a) {
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
#endif
#endif

#if FEATURE_TEST & 2
__attribute__((target("avx512bw,evex512")))
__mmask64 k64_verify_ok(__mmask64 a) {
// No error emitted if we have "evex512" feature.
return _knot_mask64(a);
}
#if defined(__AVX10_1__) && !defined(__AVX10_1_512__)
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
// noevex-warning@*:* {{invalid feature combination: +avx512bw +avx10.1-256; will be promoted to avx10.1-512}}
#endif

__mmask64 test_knot_mask64(__mmask64 a) {
return _knot_mask64(a); // noevex-error {{always_inline function '_knot_mask64' requires target feature 'evex512', but would be inlined into function 'test_knot_mask64' that is compiled without support for 'evex512'}}
}
#endif
6 changes: 0 additions & 6 deletions llvm/lib/Target/X86/X86RegisterInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1077,12 +1077,6 @@ bool X86RegisterInfo::getRegAllocationHints(Register VirtReg,
VirtReg, Order, Hints, MF, VRM, Matrix);

unsigned ID = RC.getID();
const X86Subtarget &Subtarget = MF.getSubtarget<X86Subtarget>();
if ((ID == X86::VK64RegClassID || ID == X86::VK64WMRegClassID) &&
Subtarget.hasAVX512() && !Subtarget.hasEVEX512())
report_fatal_error(
"64-bit mask registers are not supported without EVEX512");

if (ID != X86::TILERegClassID)
return BaseImplRetVal;

Expand Down

0 comments on commit f5e48fe

Please sign in to comment.