-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - Allow AVX512 conflict intrinsics to be used in constexpr #163293
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
dce7abe
to
87f09f3
Compare
✅ With the latest revision this PR passed the C/C++ code formatter. |
3e8b31f
to
255b52a
Compare
255b52a
to
5e0a754
Compare
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: NagaChaitanya Vellanki (chaitanyav) ChangesResolves #160524 Patch is 34.92 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163293.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 006a45347ff1a..766910b3e4f4e 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1357,23 +1357,17 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
}
-let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpconflictdi_128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>)">;
-}
-
-let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
-}
-
-let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpconflictsi_128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>)">;
}
-let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def vpconflictdi_256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>)">;
def vpconflictsi_256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>)">;
}
-let Features = "avx512cd", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512cd", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpconflictdi_512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>)">;
def vpconflictsi_512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 2d3cb6a68d7e2..856eec303cf04 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3073,6 +3073,33 @@ static bool interp__builtin_vec_set(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 1);
+
+ QualType Arg0Type = Call->getArg(0)->getType();
+ const auto *VecT = Arg0Type->castAs<VectorType>();
+ PrimType ElemT = *S.getContext().classify(VecT->getElementType());
+ unsigned NumElems = VecT->getNumElements();
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+ const Pointer &Src = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ for (unsigned I = 0; I != NumElems; ++I) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ APSInt ElemI = Src.elem<T>(I).toAPSInt();
+ APInt ConflictMask(ElemI.getBitWidth(), 0);
+ for (unsigned J = 0; J != I; ++J) {
+ APSInt ElemJ = Src.elem<T>(J).toAPSInt();
+ ConflictMask.setBitVal(J, ElemI == ElemJ);
+ }
+ Dst.elem<T>(I) = static_cast<T>(APSInt(ConflictMask, DestUnsigned));
+ });
+ }
+ Dst.initializeAllElements();
+ return true;
+}
+
bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
@@ -3863,7 +3890,13 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
[](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
return llvm::APIntOps::fshr(Hi, Lo, Amt);
});
-
+ case X86::BI__builtin_ia32_vpconflictsi_128:
+ case X86::BI__builtin_ia32_vpconflictsi_256:
+ case X86::BI__builtin_ia32_vpconflictsi_512:
+ case X86::BI__builtin_ia32_vpconflictdi_128:
+ case X86::BI__builtin_ia32_vpconflictdi_256:
+ case X86::BI__builtin_ia32_vpconflictdi_512:
+ return interp__builtin_ia32_vpconflict(S, OpPC, Call);
case clang::X86::BI__builtin_ia32_blendpd:
case clang::X86::BI__builtin_ia32_blendpd256:
case clang::X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 51c038274fd36..2e679e62950e5 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -12142,6 +12142,37 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_vpconflictsi_128:
+ case X86::BI__builtin_ia32_vpconflictsi_256:
+ case X86::BI__builtin_ia32_vpconflictsi_512:
+ case X86::BI__builtin_ia32_vpconflictdi_128:
+ case X86::BI__builtin_ia32_vpconflictdi_256:
+ case X86::BI__builtin_ia32_vpconflictdi_512: {
+ APValue Source;
+
+ if (!EvaluateAsRValue(Info, E->getArg(0), Source))
+ return false;
+
+ unsigned SourceLen = Source.getVectorLength();
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ const auto *VecT = E->getType()->castAs<VectorType>();
+ bool DestUnsigned =
+ VecT->getElementType()->isUnsignedIntegerOrEnumerationType();
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ const APValue &EltI = Source.getVectorElt(I);
+
+ APInt ConflictMask(EltI.getInt().getBitWidth(), 0);
+ for (unsigned J = 0; J != I; ++J) {
+ const APValue &EltJ = Source.getVectorElt(J);
+ ConflictMask.setBitVal(J, EltI.getInt() == EltJ.getInt());
+ }
+ ResultElements.push_back(APValue(APSInt(ConflictMask, DestUnsigned)));
+ }
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case X86::BI__builtin_ia32_blendpd:
case X86::BI__builtin_ia32_blendpd256:
case X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/Headers/avx512cdintrin.h b/clang/lib/Headers/avx512cdintrin.h
index 88992983cdd89..fb6dcb6dd8ad1 100644
--- a/clang/lib/Headers/avx512cdintrin.h
+++ b/clang/lib/Headers/avx512cdintrin.h
@@ -15,94 +15,82 @@
#define __AVX512CDINTRIN_H
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS \
+ constexpr __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512cd"), __min_vector_width__(512)))
+#else
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), \
__min_vector_width__(512)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
-#else
-#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#endif
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_conflict_epi64 (__m512i __A)
-{
- return (__m512i) __builtin_ia32_vpconflictdi_512 ((__v8di) __A);
+_mm512_conflict_epi64(__m512i __A) {
+ return (__m512i)__builtin_ia32_vpconflictdi_512((__v8di)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_conflict_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
-{
- return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
- (__v8di)_mm512_conflict_epi64(__A),
- (__v8di)__W);
+_mm512_mask_conflict_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
+ return (__m512i)__builtin_ia32_selectq_512(
+ (__mmask8)__U, (__v8di)_mm512_conflict_epi64(__A), (__v8di)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_conflict_epi64 (__mmask8 __U, __m512i __A)
-{
+_mm512_maskz_conflict_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_conflict_epi64(__A),
- (__v8di)_mm512_setzero_si512 ());
+ (__v8di)_mm512_setzero_si512());
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_conflict_epi32 (__m512i __A)
-{
- return (__m512i) __builtin_ia32_vpconflictsi_512 ((__v16si) __A);
+_mm512_conflict_epi32(__m512i __A) {
+ return (__m512i)__builtin_ia32_vpconflictsi_512((__v16si)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_mask_conflict_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
-{
- return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
- (__v16si)_mm512_conflict_epi32(__A),
- (__v16si)__W);
+_mm512_mask_conflict_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
+ return (__m512i)__builtin_ia32_selectd_512(
+ (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A), (__v16si)__W);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_maskz_conflict_epi32 (__mmask16 __U, __m512i __A)
-{
- return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
- (__v16si)_mm512_conflict_epi32(__A),
- (__v16si)_mm512_setzero_si512());
+_mm512_maskz_conflict_epi32(__mmask16 __U, __m512i __A) {
+ return (__m512i)__builtin_ia32_selectd_512(
+ (__mmask16)__U, (__v16si)_mm512_conflict_epi32(__A),
+ (__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_lzcnt_epi32(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi32(__m512i __A) {
return (__m512i)__builtin_elementwise_clzg((__v16si)__A,
(__v16si)_mm512_set1_epi32(32));
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_lzcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) {
- return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
- (__v16si)_mm512_lzcnt_epi32(__A),
- (__v16si)__W);
+ return (__m512i)__builtin_ia32_selectd_512(
+ (__mmask16)__U, (__v16si)_mm512_lzcnt_epi32(__A), (__v16si)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_lzcnt_epi32(__mmask16 __U, __m512i __A) {
return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
(__v16si)_mm512_lzcnt_epi32(__A),
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
-_mm512_lzcnt_epi64(__m512i __A) {
+static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_lzcnt_epi64(__m512i __A) {
return (__m512i)__builtin_elementwise_clzg(
(__v8di)__A, (__v8di)_mm512_set1_epi64((long long)64));
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_mask_lzcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) {
- return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
- (__v8di)_mm512_lzcnt_epi64(__A),
- (__v8di)__W);
+ return (__m512i)__builtin_ia32_selectq_512(
+ (__mmask8)__U, (__v8di)_mm512_lzcnt_epi64(__A), (__v8di)__W);
}
-static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
+static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
(__v8di)_mm512_lzcnt_epi64(__A),
@@ -110,19 +98,15 @@ _mm512_maskz_lzcnt_epi64(__mmask8 __U, __m512i __A) {
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcastmb_epi64 (__mmask8 __A)
-{
- return (__m512i) _mm512_set1_epi64((long long) __A);
+_mm512_broadcastmb_epi64(__mmask8 __A) {
+ return (__m512i)_mm512_set1_epi64((long long)__A);
}
static __inline__ __m512i __DEFAULT_FN_ATTRS
-_mm512_broadcastmw_epi32 (__mmask16 __A)
-{
- return (__m512i) _mm512_set1_epi32((int) __A);
-
+_mm512_broadcastmw_epi32(__mmask16 __A) {
+ return (__m512i)_mm512_set1_epi32((int)__A);
}
#undef __DEFAULT_FN_ATTRS
-#undef __DEFAULT_FN_ATTRS_CONSTEXPR
#endif
diff --git a/clang/lib/Headers/avx512vlcdintrin.h b/clang/lib/Headers/avx512vlcdintrin.h
index 30c9f9017f0bf..dd2996aa78aa4 100644
--- a/clang/lib/Headers/avx512vlcdintrin.h
+++ b/clang/lib/Headers/avx512vlcdintrin.h
@@ -14,208 +14,182 @@
#define __AVX512VLCDINTRIN_H
/* Define the default attributes for the functions in this file. */
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+#define __DEFAULT_FN_ATTRS128 \
+ constexpr __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512cd"), \
+ __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+ constexpr __attribute__((__always_inline__, __nodebug__, \
+ __target__("avx512vl,avx512cd"), \
+ __min_vector_width__(256)))
+#else
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512cd"), __min_vector_width__(128)))
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vl,avx512cd"), __min_vector_width__(256)))
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
-#else
-#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
-#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#endif
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastmb_epi64 (__mmask8 __A)
-{
- return (__m128i) _mm_set1_epi64x((long long) __A);
+_mm_broadcastmb_epi64(__mmask8 __A) {
+ return (__m128i)_mm_set1_epi64x((long long)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastmb_epi64 (__mmask8 __A)
-{
- return (__m256i) _mm256_set1_epi64x((long long)__A);
+_mm256_broadcastmb_epi64(__mmask8 __A) {
+ return (__m256i)_mm256_set1_epi64x((long long)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_broadcastmw_epi32 (__mmask16 __A)
-{
- return (__m128i) _mm_set1_epi32((int)__A);
+_mm_broadcastmw_epi32(__mmask16 __A) {
+ return (__m128i)_mm_set1_epi32((int)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_broadcastmw_epi32 (__mmask16 __A)
-{
- return (__m256i) _mm256_set1_epi32((int)__A);
+_mm256_broadcastmw_epi32(__mmask16 __A) {
+ return (__m256i)_mm256_set1_epi32((int)__A);
}
-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_conflict_epi64 (__m128i __A)
-{
- return (__m128i) __builtin_ia32_vpconflictdi_128 ((__v2di) __A);
+_mm_conflict_epi64(__m128i __A) {
+ return (__m128i)__builtin_ia32_vpconflictdi_128((__v2di)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
-{
- return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
- (__v2di)_mm_conflict_epi64(__A),
- (__v2di)__W);
+_mm_mask_conflict_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i)__builtin_ia32_selectq_128(
+ (__mmask8)__U, (__v2di)_mm_conflict_epi64(__A), (__v2di)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
-{
+_mm_maskz_conflict_epi64(__mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
(__v2di)_mm_conflict_epi64(__A),
(__v2di)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_conflict_epi64 (__m256i __A)
-{
- return (__m256i) __builtin_ia32_vpconflictdi_256 ((__v4di) __A);
+_mm256_conflict_epi64(__m256i __A) {
+ return (__m256i)__builtin_ia32_vpconflictdi_256((__v4di)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
-{
- return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
- (__v4di)_mm256_conflict_epi64(__A),
- (__v4di)__W);
+_mm256_mask_conflict_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i)__builtin_ia32_selectq_256(
+ (__mmask8)__U, (__v4di)_mm256_conflict_epi64(__A), (__v4di)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_conflict_epi64(__mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
(__v4di)_mm256_conflict_epi64(__A),
(__v4di)_mm256_setzero_si256());
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_conflict_epi32 (__m128i __A)
-{
- return (__m128i) __builtin_ia32_vpconflictsi_128 ((__v4si) __A);
+_mm_conflict_epi32(__m128i __A) {
+ return (__m128i)__builtin_ia32_vpconflictsi_128((__v4si)__A);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
-{
- return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
- (__v4si)_mm_conflict_epi32(__A),
- (__v4si)__W);
+_mm_mask_conflict_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
+ return (__m128i)__builtin_ia32_selectd_128(
+ (__mmask8)__U, (__v4si)_mm_conflict_epi32(__A), (__v4si)__W);
}
static __inline__ __m128i __DEFAULT_FN_ATTRS128
-_mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
-{
+_mm_maskz_conflict_epi32(__mmask8 __U, __m128i __A) {
return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
(__v4si)_mm_conflict_epi32(__A),
(__v4si)_mm_setzero_si128());
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_conflict_epi32 (__m256i __A)
-{
- return (__m256i) __builtin_ia32_vpconflictsi_256 ((__v8si) __A);
+_mm256_conflict_epi32(__m256i __A) {
+ return (__m256i)__builtin_ia32_vpconflictsi_256((__v8si)__A);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
-{
- return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
- (__v8si)_mm256_conflict_epi32(__A),
- (__v8si)__W);
+_mm256_mask_conflict_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
+ return (__m256i)__builtin_ia32_selectd_256(
+ (__mmask8)__U, (__v8si)_mm256_conflict_epi32(__A), (__v8si)__W);
}
static __inline__ __m256i __DEFAULT_FN_ATTRS256
-_mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
-{
+_mm256_maskz_conflict_epi32(__mmask8 __U, __m256i __A) {
return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
(__v8si)_mm256_conflict_epi32(__A),
(__v8si)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
-_mm_lzcnt_epi32(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32(__m128i __A) {
return (__m128i)__builtin_elementwise_clzg((__v4si)__A,
(__v4si)_mm_set1_epi32(32));
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_mask_lzcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
- return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
- (__v4si)_mm_lzcnt_epi32(__A),
- ...
[truncated]
|
@chaitanyav Make sure you merge against trunk latest after #163475 drops as it will affect your __DEFAULT_FN_ATTRS cleanups |
5e0a754
to
2cfd2c7
Compare
@RKSimon rebased with latest and resolved the conflicts. |
return _mm512_mask_conflict_epi64(__W,__U,__A); | ||
} | ||
|
||
TEST_CONSTEXPR(match_v8di(_mm512_mask_conflict_epi64(_mm512_set1_epi64((long long)0xFF), 0x55, (__m512i)(__v8di){1, 2, 1, 3, 2, 4, 1, 5}), 0, 0xFF, 1, 0xFF, 2, 0xFF, 5, 0xFF)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) don't use _mm512_set1_epi64 - use raw vector initialization (same for other cases below)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Modified those tests to use raw vector instead of _mm512_set1_epi64
… AVX512 conflict intrinsics to be used in constexpr Resolves llvm#160524
2cfd2c7
to
9e2b1aa
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/65/builds/24076 Here is the relevant piece of the build log for the reference
|
Resolves #160524