-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[Headers][X86] Allow AVX512 masked arithmetic pd/ps/epi/epu intrinsics to be used in constexpr #168496
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…s to be used in constexpr
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: woruyu (woruyu) ChangesSummaryThis PR resolves #160559 , reslove other pd/ps/epi/epu part of AVX512 masked arithmetic intrinsics. Patch is 58.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/168496.diff 4 Files Affected:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 53b18df764370..aabe05767e416 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1369,7 +1369,7 @@ _mm512_mul_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1377,7 +1377,7 @@ _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)__W);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1390,7 +1390,7 @@ _mm512_mul_epu32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1398,7 +1398,7 @@ _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)__W);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1879,28 +1879,28 @@ _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_add_pd(__A, __B),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_add_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_add_ps(__A, __B),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_add_ps(__A, __B),
@@ -1994,28 +1994,28 @@ _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_sub_pd(__A, __B),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_sub_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_sub_ps(__A, __B),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_sub_ps(__A, __B),
@@ -2109,28 +2109,28 @@ _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_mul_pd(__A, __B),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_mul_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_mul_ps(__A, __B),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_mul_ps(__A, __B),
@@ -2230,14 +2230,14 @@ static __inline __m512d
return (__m512d)((__v8df)__a/(__v8df)__b);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_div_pd(__A, __B),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_div_pd(__A, __B),
@@ -2249,14 +2249,14 @@ _mm512_div_ps(__m512 __a, __m512 __b) {
return (__m512)((__v16sf)__a/(__v16sf)__b);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_div_ps(__A, __B),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_div_ps(__A, __B),
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 1e6e42df6b5fb..5d0d0f6361a0a 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -347,7 +347,7 @@ _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -355,7 +355,7 @@ _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -363,7 +363,7 @@ _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
(__v4di)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -371,7 +371,7 @@ _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -379,7 +379,7 @@ _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -387,7 +387,7 @@ _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -395,7 +395,7 @@ _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
(__v4di)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -403,7 +403,7 @@ _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -1426,56 +1426,56 @@ _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
(__v8sf) __C);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_add_pd(__A, __B),
(__v2df)__W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_add_pd(__A, __B),
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_add_pd(__A, __B),
(__v4df)__W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_add_pd(__A, __B),
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_add_ps(__A, __B),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_add_ps(__A, __B),
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_add_ps(__A, __B),
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_add_ps(__A, __B),
@@ -2202,56 +2202,56 @@ _mm256_maskz_cvtepu32_ps(__mmask8 __U, __m256i __A) {
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_div_pd(__A, __B),
(__v2df)__W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_div_pd(__A, __B),
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_div_pd(__A, __B),
(__v4df)__W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_div_pd(__A, __B),
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_div_ps(__A, __B),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_div_ps(__A, __B),
(__v4sf)_mm_setzero_ps());
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_div_ps(__A, __B),
(__v8sf)__W);
}
-static __inline__ __m256 __DEFAULT_FN_ATTRS256
+static __inline__ __m256 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
(__v8sf)_mm256_div_ps(__A, __B),
@@ -2717,56 +2717,56 @@ _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
(__v8sf)_mm256_setzero_ps());
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_mul_pd(__A, __B),
(__v2df)__W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
(__v2df)_mm_mul_pd(__A, __B),
(__v2df)_mm_setzero_pd());
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_mul_pd(__A, __B),
(__v4df)__W);
}
-static __inline__ __m256d __DEFAULT_FN_ATTRS256
+static __inline__ __m256d __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
(__v4df)_mm256_mul_pd(__A, __B),
(__v4df)_mm256_setzero_pd());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
(__v4sf)_mm_mul_ps(__A, __B),
(__v4sf)__W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_ps(__mma...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
🐧 Linux x64 Test Results
|
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
Summary
This PR resolves #160559 , reslove other pd/ps/epi/epu part of AVX512 masked arithmetic intrinsics.