From 9e2b1aab5e48c69b7ad7c0e5c10d8809c218b71e Mon Sep 17 00:00:00 2001 From: NagaChaitanya Vellanki Date: Sun, 12 Oct 2025 02:19:26 -0700 Subject: [PATCH] [Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 conflict intrinsics to be used in constexpr Resolves #160524 --- clang/include/clang/Basic/BuiltinsX86.td | 14 +- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 35 ++++- clang/lib/AST/ExprConstant.cpp | 31 ++++ clang/lib/Headers/avx512cdintrin.h | 81 +++++----- clang/lib/Headers/avx512vlcdintrin.h | 149 ++++++++----------- clang/test/CodeGen/X86/avx512cd-builtins.c | 28 +++- clang/test/CodeGen/X86/avx512vlcd-builtins.c | 55 +++++-- 7 files changed, 231 insertions(+), 162 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 279c0c7935e36..62c70fba946be 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1359,23 +1359,17 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">; } -let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">; -} - -let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">; -} - -let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def vpconflictsi_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">; } -let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">; def vpconflictsi_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">; } -let Features = "avx512cd", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">; def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b69f3607e82d6..a0d2c764121d9 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3101,6 +3101,33 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 1); + + QualType Arg0Type = Call->getArg(0)->getType(); + const auto *VecT = Arg0Type->castAs(); + PrimType ElemT = *S.getContext().classify(VecT->getElementType()); + unsigned NumElems = VecT->getNumElements(); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + const Pointer &Src = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + for (unsigned I = 0; I != NumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + APSInt ElemI = Src.elem(I).toAPSInt(); + APInt ConflictMask(ElemI.getBitWidth(), 0); + for (unsigned J = 0; J != I; ++J) { + APSInt ElemJ = Src.elem(J).toAPSInt(); + ConflictMask.setBitVal(J, ElemI == ElemJ); + } + Dst.elem(I) = static_cast(APSInt(ConflictMask, DestUnsigned)); + }); + } + Dst.initializeAllElements(); + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -3891,7 +3918,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) { return llvm::APIntOps::fshr(Hi, Lo, Amt); }); - + case X86::BI__builtin_ia32_vpconflictsi_128: + case X86::BI__builtin_ia32_vpconflictsi_256: + case X86::BI__builtin_ia32_vpconflictsi_512: + case X86::BI__builtin_ia32_vpconflictdi_128: + case X86::BI__builtin_ia32_vpconflictdi_256: + case X86::BI__builtin_ia32_vpconflictdi_512: + return interp__builtin_ia32_vpconflict(S, OpPC, Call); case clang::X86::BI__builtin_ia32_blendpd: case clang::X86::BI__builtin_ia32_blendpd256: case clang::X86::BI__builtin_ia32_blendps: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index a07eb2254e13f..16141b27f4ce8 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12179,6 +12179,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_vpconflictsi_128: + case X86::BI__builtin_ia32_vpconflictsi_256: + case X86::BI__builtin_ia32_vpconflictsi_512: + case X86::BI__builtin_ia32_vpconflictdi_128: + case X86::BI__builtin_ia32_vpconflictdi_256: + case X86::BI__builtin_ia32_vpconflictdi_512: { + APValue Source; + + if (!EvaluateAsRValue(Info, E->getArg(0), Source)) + return false; + + unsigned SourceLen = Source.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(SourceLen); + + const auto *VecT = E->getType()->castAs(); + bool DestUnsigned = + VecT->getElementType()->isUnsignedIntegerOrEnumerationType(); + + for (unsigned I = 0; I != SourceLen; ++I) { + const APValue &EltI = Source.getVectorElt(I); + + APInt ConflictMask(EltI.getInt().getBitWidth(), 0); + for (unsigned J = 0; J != I; ++J) { + const APValue &EltJ = Source.getVectorElt(J); + ConflictMask.setBitVal(J, EltI.getInt() == EltJ.getInt()); + } + ResultElements.push_back(APValue(APSInt(ConflictMask, DestUnsigned))); + } + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case X86::BI__builtin_ia32_blendpd: case X86::BI__builtin_ia32_blendpd256: case X86::BI__builtin_ia32_blendps: diff --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h index b16144044d928..fb6dcb6dd8ad1 100644 --- a/clang/lib/Headers/avx512cdintrin.h +++ b/clang/lib/Headers/avx512cdintrin.h @@ -15,111 +15,98 @@ #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512cd"), __min_vector_width__(512))) +#else #define __DEFAULT_FN_ATTRS \ __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \ __min_vector_width__(512))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr -#else -#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS #endif static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_conflict_epi64 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A); +_mm512_conflict_epi64(__m512i __A) { + return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, - (__v8di)_mm512_conflict_epi64(__A), - (__v8di)__W); +_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A) -{ +_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), - (__v8di)_mm512_setzero_si512 ()); + (__v8di)_mm512_setzero_si512()); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_conflict_epi32 (__m512i __A) -{ - return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A); +_mm512_conflict_epi32(__m512i __A) { + return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_conflict_epi32(__A), - (__v16si)__W); +_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W); } static __inline__ __m512i __DEFAULT_FN_ATTRS -_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A) -{ - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_conflict_epi32(__A), - (__v16si)_mm512_setzero_si512()); +_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), + (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_lzcnt_epi32(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) { return (__m512i)__builtin_elementwise_clzg((__v16si)__A, (__v16si)_mm512_set1_epi32(32)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { - return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, - (__v16si)_mm512_lzcnt_epi32(__A), - (__v16si)__W); + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR -_mm512_lzcnt_epi64(__m512i __A) { +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) { return (__m512i)__builtin_elementwise_clzg( (__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64)); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { - return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, - (__v8di)_mm512_lzcnt_epi64(__A), - (__v8di)__W); + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastmb_epi64(__mmask8 __A) { - return (__m512i) _mm512_set1_epi64((long long) __A); + return (__m512i)_mm512_set1_epi64((long long)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_broadcastmw_epi32(__mmask16 __A) { return (__m512i)_mm512_set1_epi32((int)__A); } #undef __DEFAULT_FN_ATTRS -#undef __DEFAULT_FN_ATTRS_CONSTEXPR #endif diff --git a/clang/lib/Headers/avx512vlcdintrin.h b/clang/lib/Headers/avx512vlcdintrin.h index cb98e7c514bde..7719680faf93a 100644 --- a/clang/lib/Headers/avx512vlcdintrin.h +++ b/clang/lib/Headers/avx512vlcdintrin.h @@ -14,203 +14,182 @@ #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ +#if defined(__cplusplus) && (__cplusplus >= 201103L) +#define __DEFAULT_FN_ATTRS128 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd"), \ + __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + constexpr __attribute__((__always_inline__, __nodebug__, \ + __target__("avx512vl,avx512cd"), \ + __min_vector_width__(256))) +#else #define __DEFAULT_FN_ATTRS128 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512cd"), __min_vector_width__(128))) #define __DEFAULT_FN_ATTRS256 \ __attribute__((__always_inline__, __nodebug__, \ __target__("avx512vl,avx512cd"), __min_vector_width__(256))) - -#if defined(__cplusplus) && (__cplusplus >= 201103L) -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr -#else -#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 -#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 #endif -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmb_epi64(__mmask8 __A) { return (__m128i) _mm_set1_epi64x((long long) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmb_epi64(__mmask8 __A) { - return (__m256i) _mm256_set1_epi64x((long long)__A); + return (__m256i)_mm256_set1_epi64x((long long)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmw_epi32(__mmask16 __A) { return (__m128i) _mm_set1_epi32((int)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmw_epi32(__mmask16 __A) { return (__m256i) _mm256_set1_epi32((int)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_conflict_epi64 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A); +_mm_conflict_epi64(__m128i __A) { + return (__m128i)__builtin_ia32_vpconflictdi_128((__v2di)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_conflict_epi64(__A), - (__v2di)__W); +_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) -{ +_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_conflict_epi64 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A); +_mm256_conflict_epi64(__m256i __A) { + return (__m256i)__builtin_ia32_vpconflictdi_256((__v4di)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_conflict_epi64(__A), - (__v4di)__W); +_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)_mm256_setzero_si256()); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_conflict_epi32 (__m128i __A) -{ - return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A); +_mm_conflict_epi32(__m128i __A) { + return (__m128i)__builtin_ia32_vpconflictsi_128((__v4si)__A); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) -{ - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_conflict_epi32(__A), - (__v4si)__W); +_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W); } static __inline__ __m128i __DEFAULT_FN_ATTRS128 -_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) -{ +_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)_mm_setzero_si128()); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_conflict_epi32 (__m256i __A) -{ - return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A); +_mm256_conflict_epi32(__m256i __A) { + return (__m256i)__builtin_ia32_vpconflictsi_256((__v8si)__A); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) -{ - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_conflict_epi32(__A), - (__v8si)__W); +_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W); } static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) -{ +_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_lzcnt_epi32(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32(__m128i __A) { return (__m128i)__builtin_elementwise_clzg((__v4si)__A, (__v4si)_mm_set1_epi32(32)); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_lzcnt_epi32(__A), - (__v4si)__W); + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi32(__mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, - (__v4si)_mm_lzcnt_epi32(__A), - (__v4si)_mm_setzero_si128()); + return (__m128i)__builtin_ia32_selectd_128( + (__mmask8)__U, (__v4si)_mm_lzcnt_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi32(__m256i __A) { return (__m256i)__builtin_elementwise_clzg((__v8si)__A, (__v8si)_mm256_set1_epi32(32)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, - (__v8si)_mm256_lzcnt_epi32(__A), - (__v8si)__W); + return (__m256i)__builtin_ia32_selectd_256( + (__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_lzcnt_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR -_mm_lzcnt_epi64(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64(__m128i __A) { return (__m128i)__builtin_elementwise_clzg( (__v2di)__A, (__v2di)_mm_set1_epi64x((long long)64)); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)__W); + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi64(__mmask8 __U, __m128i __A) { - return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, - (__v2di)_mm_lzcnt_epi64(__A), - (__v2di)_mm_setzero_si128()); + return (__m128i)__builtin_ia32_selectq_128( + (__mmask8)__U, (__v2di)_mm_lzcnt_epi64(__A), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi64(__m256i __A) { return (__m256i)__builtin_elementwise_clzg( (__v4di)__A, (__v4di)_mm256_set1_epi64x((long long)64)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { - return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, - (__v4di)_mm256_lzcnt_epi64(__A), - (__v4di)__W); + return (__m256i)__builtin_ia32_selectq_256( + (__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_lzcnt_epi64(__A), @@ -219,7 +198,5 @@ _mm256_maskz_lzcnt_epi64(__mmask8 __U, __m256i __A) { #undef __DEFAULT_FN_ATTRS128 #undef __DEFAULT_FN_ATTRS256 -#undef __DEFAULT_FN_ATTRS128_CONSTEXPR -#undef __DEFAULT_FN_ATTRS256_CONSTEXPR #endif /* __AVX512VLCDINTRIN_H */ diff --git a/clang/test/CodeGen/X86/avx512cd-builtins.c b/clang/test/CodeGen/X86/avx512cd-builtins.c index 2890889348c87..80a20b1244532 100644 --- a/clang/test/CodeGen/X86/avx512cd-builtins.c +++ b/clang/test/CodeGen/X86/avx512cd-builtins.c @@ -14,37 +14,53 @@ __m512i test_mm512_conflict_epi64(__m512i __A) { // CHECK-LABEL: test_mm512_conflict_epi64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %{{.*}}) - return _mm512_conflict_epi64(__A); + return _mm512_conflict_epi64(__A); } + +TEST_CONSTEXPR(match_v8di(_mm512_conflict_epi64((__m512i)(__v8di){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0, 1, 0, 2, 0, 5, 0)); +TEST_CONSTEXPR(match_v8di(_mm512_conflict_epi64((__m512i)(__v8di){5, 5, 5, 5, 5, 5, 5, 5}), 0, 1, 3, 7, 15, 31, 63, 127)); +TEST_CONSTEXPR(match_v8di(_mm512_conflict_epi64((__m512i)(__v8di){1, 2, 3, 4, 5, 6, 7, 8}), 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_conflict_epi64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} - return _mm512_mask_conflict_epi64(__W,__U,__A); + return _mm512_mask_conflict_epi64(__W,__U,__A); } + +TEST_CONSTEXPR(match_v8di(_mm512_mask_conflict_epi64((__m512i)(__v8di){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 0x55, (__m512i)(__v8di){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0xFF, 1, 0xFF, 2, 0xFF, 5, 0xFF)); __m512i test_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_conflict_epi64 // CHECK: call {{.*}}<8 x i64> @llvm.x86.avx512.conflict.q.512(<8 x i64> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} - return _mm512_maskz_conflict_epi64(__U,__A); + return _mm512_maskz_conflict_epi64(__U,__A); } + +TEST_CONSTEXPR(match_v8di(_mm512_maskz_conflict_epi64(0x55, (__m512i)(__v8di){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0, 1, 0, 2, 0, 5, 0)); __m512i test_mm512_conflict_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_conflict_epi32 // CHECK: call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %{{.*}}) - return _mm512_conflict_epi32(__A); + return _mm512_conflict_epi32(__A); } + +TEST_CONSTEXPR(match_v16si(_mm512_conflict_epi32((__m512i)(__v16si){1, 2, 1, 3, 2, 4, 1, 5, 6, 7, 6, 8, 7, 9, 6, 10}), 0, 0, 1, 0, 2, 0, 5, 0, 0, 0, 256, 0, 512, 0, 1280, 0)); +TEST_CONSTEXPR(match_v16si(_mm512_conflict_epi32((__m512i)(__v16si){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}), 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767)); +TEST_CONSTEXPR(match_v16si(_mm512_conflict_epi32((__m512i)(__v16si){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m512i test_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_conflict_epi32 // CHECK: call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} - return _mm512_mask_conflict_epi32(__W,__U,__A); + return _mm512_mask_conflict_epi32(__W,__U,__A); } + +TEST_CONSTEXPR(match_v16si(_mm512_mask_conflict_epi32((__m512i)(__v16si){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, 0x5555, (__m512i)(__v16si){1, 2, 1, 3, 2, 4, 1, 5, 6, 7, 6, 8, 7, 9, 6, 10}), 0, 0xFF, 1, 0xFF, 2, 0xFF, 5, 0xFF, 0, 0xFF, 256, 0xFF, 512, 0xFF, 1280, 0xFF)); __m512i test_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_conflict_epi32 // CHECK: call <16 x i32> @llvm.x86.avx512.conflict.d.512(<16 x i32> %{{.*}}) // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} - return _mm512_maskz_conflict_epi32(__U,__A); + return _mm512_maskz_conflict_epi32(__U,__A); } + +TEST_CONSTEXPR(match_v16si(_mm512_maskz_conflict_epi32(0x5555, (__m512i)(__v16si){1, 2, 1, 3, 2, 4, 1, 5, 6, 7, 6, 8, 7, 9, 6, 10}), 0, 0, 1, 0, 2, 0, 5, 0, 0, 0, 256, 0, 512, 0, 1280, 0)); __m512i test_mm512_lzcnt_epi32(__m512i __A) { // CHECK-LABEL: test_mm512_lzcnt_epi32 // CHECK: call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %{{.*}}, i1 true) diff --git a/clang/test/CodeGen/X86/avx512vlcd-builtins.c b/clang/test/CodeGen/X86/avx512vlcd-builtins.c index 56c04a08c6322..29fc6fd2e7fc8 100644 --- a/clang/test/CodeGen/X86/avx512vlcd-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlcd-builtins.c @@ -66,83 +66,114 @@ TEST_CONSTEXPR(match_v8si(_mm256_broadcastmw_epi32((__mmask16)(0xcafe)), 0xcafe, __m128i test_mm_conflict_epi64(__m128i __A) { // CHECK-LABEL: test_mm_conflict_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %{{.*}}) - return _mm_conflict_epi64(__A); + return _mm_conflict_epi64(__A); } +TEST_CONSTEXPR(match_v2di(_mm_conflict_epi64((__m128i)(__v2di){1, 2}), 0, 0)); +TEST_CONSTEXPR(match_v2di(_mm_conflict_epi64((__m128i)(__v2di){5, 5}), 0, 1)); + __m128i test_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_conflict_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %{{.*}}) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} - return _mm_mask_conflict_epi64(__W, __U, __A); + return _mm_mask_conflict_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v2di(_mm_mask_conflict_epi64((__m128i)(__v2di){0xFF, 0xFF}, 0x2, (__m128i)(__v2di){5, 5}), 0xFF, 1)); + __m128i test_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_conflict_epi64 // CHECK: call {{.*}}<2 x i64> @llvm.x86.avx512.conflict.q.128(<2 x i64> %{{.*}}) // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} - return _mm_maskz_conflict_epi64(__U, __A); + return _mm_maskz_conflict_epi64(__U, __A); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_conflict_epi64(0x2, (__m128i)(__v2di){5, 5}), 0, 1)); + __m256i test_mm256_conflict_epi64(__m256i __A) { // CHECK-LABEL: test_mm256_conflict_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %{{.*}}) - return _mm256_conflict_epi64(__A); + return _mm256_conflict_epi64(__A); } +TEST_CONSTEXPR(match_v4di(_mm256_conflict_epi64((__m256i)(__v4di){1, 2, 1, 3}), 0, 0, 1, 0)); +TEST_CONSTEXPR(match_v4di(_mm256_conflict_epi64((__m256i)(__v4di){7, 7, 7, 7}), 0, 1, 3, 7)); +TEST_CONSTEXPR(match_v4di(_mm256_conflict_epi64((__m256i)(__v4di){1, 2, 3, 4}), 0, 0, 0, 0)); + __m256i test_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_conflict_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} - return _mm256_mask_conflict_epi64(__W, __U, __A); + return _mm256_mask_conflict_epi64(__W, __U, __A); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_conflict_epi64((__m256i)(__v4di){0xFF, 0xFF, 0xFF, 0xFF}, 0x5, (__m256i)(__v4di){1, 2, 1, 3}), 0, 0xFF, 1, 0xFF)); + __m256i test_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_conflict_epi64 // CHECK: call {{.*}}<4 x i64> @llvm.x86.avx512.conflict.q.256(<4 x i64> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} - return _mm256_maskz_conflict_epi64(__U, __A); + return _mm256_maskz_conflict_epi64(__U, __A); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_conflict_epi64(0x5, (__m256i)(__v4di){1, 2, 1, 3}), 0, 0, 1, 0)); + __m128i test_mm_conflict_epi32(__m128i __A) { // CHECK-LABEL: test_mm_conflict_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %{{.*}}) - return _mm_conflict_epi32(__A); + return _mm_conflict_epi32(__A); } +TEST_CONSTEXPR(match_v4si(_mm_conflict_epi32((__m128i)(__v4si){1, 2, 1, 3}), 0, 0, 1, 0)); +TEST_CONSTEXPR(match_v4si(_mm_conflict_epi32((__m128i)(__v4si){3, 3, 3, 3}), 0, 1, 3, 7)); +TEST_CONSTEXPR(match_v4si(_mm_conflict_epi32((__m128i)(__v4si){1, 2, 3, 4}), 0, 0, 0, 0)); + __m128i test_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_conflict_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} - return _mm_mask_conflict_epi32(__W, __U, __A); + return _mm_mask_conflict_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v4si(_mm_mask_conflict_epi32((__m128i)(__v4si){0xFF, 0xFF, 0xFF, 0xFF}, 0x5, (__m128i)(__v4si){1, 2, 1, 3}), 0, 0xFF, 1, 0xFF)); + __m128i test_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_conflict_epi32 // CHECK: call <4 x i32> @llvm.x86.avx512.conflict.d.128(<4 x i32> %{{.*}}) // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} - return _mm_maskz_conflict_epi32(__U, __A); + return _mm_maskz_conflict_epi32(__U, __A); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_conflict_epi32(0x5, (__m128i)(__v4si){1, 2, 1, 3}), 0, 0, 1, 0)); + __m256i test_mm256_conflict_epi32(__m256i __A) { // CHECK-LABEL: test_mm256_conflict_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %{{.*}}) - return _mm256_conflict_epi32(__A); + return _mm256_conflict_epi32(__A); } +TEST_CONSTEXPR(match_v8si(_mm256_conflict_epi32((__m256i)(__v8si){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0, 1, 0, 2, 0, 5, 0)); +TEST_CONSTEXPR(match_v8si(_mm256_conflict_epi32((__m256i)(__v8si){4, 4, 4, 4, 4, 4, 4, 4}), 0, 1, 3, 7, 15, 31, 63, 127)); +TEST_CONSTEXPR(match_v8si(_mm256_conflict_epi32((__m256i)(__v8si){1, 2, 3, 4, 5, 6, 7, 8}), 0, 0, 0, 0, 0, 0, 0, 0)); + __m256i test_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_conflict_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} - return _mm256_mask_conflict_epi32(__W, __U, __A); + return _mm256_mask_conflict_epi32(__W, __U, __A); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_conflict_epi32((__m256i)(__v8si){0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}, /*0101 0101=*/0x55, (__m256i)(__v8si){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0xFF, 1, 0xFF, 2, 0xFF, 5, 0xFF)); + __m256i test_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_conflict_epi32 // CHECK: call <8 x i32> @llvm.x86.avx512.conflict.d.256(<8 x i32> %{{.*}}) // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} - return _mm256_maskz_conflict_epi32(__U, __A); + return _mm256_maskz_conflict_epi32(__U, __A); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_conflict_epi32(0x55, (__m256i)(__v8si){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0, 1, 0, 2, 0, 5, 0)); + __m128i test_mm_lzcnt_epi32(__m128i __A) { // CHECK-LABEL: test_mm_lzcnt_epi32 // CHECK: call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %{{.*}}, i1 true)