-
Notifications
You must be signed in to change notification settings - Fork 15k
[Headers][X86] Allow AVX512 masked arithmetic intrinsics to be used in constexpr #162816
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
[Headers][X86] Allow AVX512 masked arithmetic intrinsics to be used in constexpr #162816
Conversation
|
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: woruyu (woruyu) ChangesSummaryThis PR resolves #160559 Patch is 43.48 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162816.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index 217589d7add1d..e7e0a59022206 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -4104,7 +4104,7 @@ let Features = "avx512bf16", Attributes = [NoThrow, Const, RequiredVectorWidth<1
def selectsbf_128 : X86Builtin<"_Vector<8, __bf16>(unsigned char, _Vector<8, __bf16>, _Vector<8, __bf16>)">;
}
-let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def selectss_128 : X86Builtin<"_Vector<4, float>(unsigned char, _Vector<4, float>, _Vector<4, float>)">;
def selectsd_128 : X86Builtin<"_Vector<2, double>(unsigned char, _Vector<2, double>, _Vector<2, double>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index 922d67940e22f..30d6c00494506 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -3598,6 +3598,26 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return APInt::getAllOnes(DstBits);
});
+ case clang::X86::BI__builtin_ia32_selectss_128:
+ case clang::X86::BI__builtin_ia32_selectsd_128: {
+ const unsigned N = Call->getArg(1)->getType()->getAs<VectorType>()->getNumElements();
+
+ const Pointer &W = S.Stk.pop<Pointer>();
+ const Pointer &A = S.Stk.pop<Pointer>();
+ APSInt U = popToAPSInt(S, Call->getArg(0));
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+
+ const bool TakeA0 = U.getZExtValue() & 1ULL;
+
+ for (unsigned i = 0; i < N; ++i)
+ Dst.elem<Floating>(i) = W.elem<Floating>(i);
+ if (TakeA0)
+ Dst.elem<Floating>(0) = A.elem<Floating>(0);
+
+ Dst.initializeAllElements();
+ return true;
+ }
+
case clang::X86::BI__builtin_ia32_vprotbi:
case clang::X86::BI__builtin_ia32_vprotdi:
case clang::X86::BI__builtin_ia32_vprotqi:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 35a866ea5010f..aa1d7523c4f8d 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11705,6 +11705,23 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), SourceLen), E);
};
+ auto EvalSelectScalar = [&](unsigned Len) -> std::optional<APValue> {
+ APSInt Mask;
+ APValue AVal, WVal;
+ if (!EvaluateInteger(E->getArg(0), Mask, Info) ||
+ !EvaluateAsRValue(Info, E->getArg(1), AVal) ||
+ !EvaluateAsRValue(Info, E->getArg(2), WVal))
+ return std::nullopt;
+
+ const bool TakeA0 = (Mask.getZExtValue() & 1u) != 0;
+ SmallVector<APValue, 4> Res;
+ Res.reserve(Len);
+ Res.push_back(TakeA0 ? AVal.getVectorElt(0) : WVal.getVectorElt(0));
+ for (unsigned i = 1; i < Len; ++i)
+ Res.push_back(WVal.getVectorElt(i));
+ return APValue(Res.data(), Res.size());
+ };
+
switch (E->getBuiltinCallee()) {
default:
return false;
@@ -11933,6 +11950,16 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return APInt((Src).trunc(DstBits));
return APInt::getAllOnes(DstBits);
});
+ case clang::X86::BI__builtin_ia32_selectss_128: {
+ if (auto V = EvalSelectScalar(4))
+ return Success(*V, E);
+ return false;
+ }
+ case clang::X86::BI__builtin_ia32_selectsd_128: {
+ if (auto V = EvalSelectScalar(2))
+ return Success(*V, E);
+ return false;
+ }
case clang::X86::BI__builtin_ia32_pmuldq128:
case clang::X86::BI__builtin_ia32_pmuldq256:
case clang::X86::BI__builtin_ia32_pmuldq512:
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 80e58425cdd71..4601f74c130cd 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -1377,7 +1377,7 @@ _mm512_mul_epi32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1385,7 +1385,7 @@ _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)__W);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1398,7 +1398,7 @@ _mm512_mul_epu32(__m512i __X, __m512i __Y) {
return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1406,7 +1406,7 @@ _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
(__v8di)__W);
}
-static __inline __m512i __DEFAULT_FN_ATTRS512
+static __inline __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
{
return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1842,13 +1842,13 @@ _mm512_maskz_abs_epi32(__mmask16 __U, __m512i __A) {
(__v16si)_mm512_setzero_si512());
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
__A = _mm_add_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
@@ -1872,13 +1872,13 @@ _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
__A = _mm_add_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
@@ -1901,28 +1901,28 @@ _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_add_pd(__A, __B),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_add_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_add_ps(__A, __B),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_add_ps(__A, __B),
@@ -1957,13 +1957,13 @@ _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_add_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
__A = _mm_sub_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
@@ -1986,13 +1986,13 @@ _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
__A = _mm_sub_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
@@ -2016,28 +2016,28 @@ _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_sub_pd(__A, __B),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_sub_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_sub_ps(__A, __B),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_sub_ps(__A, __B),
@@ -2072,13 +2072,13 @@ _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
__A = _mm_mul_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
@@ -2101,13 +2101,13 @@ _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
__A = _mm_mul_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
@@ -2131,28 +2131,28 @@ _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
(__v2df)_mm_setzero_pd(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_mul_pd(__A, __B),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_mul_pd(__A, __B),
(__v8df)_mm512_setzero_pd());
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_mul_ps(__A, __B),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_mul_ps(__A, __B),
@@ -2187,13 +2187,13 @@ _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
(__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
(__v16sf)_mm512_setzero_ps()))
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, __W);
}
-static __inline__ __m128 __DEFAULT_FN_ATTRS128
+static __inline__ __m128 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
__A = _mm_div_ss(__A, __B);
return __builtin_ia32_selectss_128(__U, __A, _mm_setzero_ps());
@@ -2217,13 +2217,13 @@ _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
(__v4sf)_mm_setzero_ps(), \
(__mmask8)(U), (int)(R)))
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, __W);
}
-static __inline__ __m128d __DEFAULT_FN_ATTRS128
+static __inline__ __m128d __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
__A = _mm_div_sd(__A, __B);
return __builtin_ia32_selectsd_128(__U, __A, _mm_setzero_pd());
@@ -2252,14 +2252,14 @@ static __inline __m512d
return (__m512d)((__v8df)__a/(__v8df)__b);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_div_pd(__A, __B),
(__v8df)__W);
}
-static __inline__ __m512d __DEFAULT_FN_ATTRS512
+static __inline__ __m512d __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
(__v8df)_mm512_div_pd(__A, __B),
@@ -2271,14 +2271,14 @@ _mm512_div_ps(__m512 __a, __m512 __b) {
return (__m512)((__v16sf)__a/(__v16sf)__b);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_div_ps(__A, __B),
(__v16sf)__W);
}
-static __inline__ __m512 __DEFAULT_FN_ATTRS512
+static __inline__ __m512 __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
(__v16sf)_mm512_div_ps(__A, __B),
diff --git a/clang/lib/Headers/avx512vlintrin.h b/clang/lib/Headers/avx512vlintrin.h
index 965741f0ff944..e098fc880da5c 100644
--- a/clang/lib/Headers/avx512vlintrin.h
+++ b/clang/lib/Headers/avx512vlintrin.h
@@ -347,7 +347,7 @@ _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -355,7 +355,7 @@ _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
(__v4di)__W);
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -363,7 +363,7 @@ _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
(__v4di)_mm256_setzero_si256());
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -371,7 +371,7 @@ _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
(__v2di)__W);
}
-static __inline__ __m128i __DEFAULT_FN_ATTRS128
+static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
{
return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -379,7 +379,7 @@ _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
(__v2di)_mm_setzero_si128());
}
-static __inline__ __m256i __DEFAULT_FN_ATTRS256
+static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
{
return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -...
[truncated]
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
|
I think arithmetic intrinsics is too much to review and commit in one PR, is it a good idea to spilt them into different smaller PR? |
21edc64 to
88d3b99
Compare
|
|
||
| Dst.initializeAllElements(); | ||
| return true; | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use a function for this.
| APSInt U = popToAPSInt(S, Call->getArg(0)); | ||
| const Pointer &Dst = S.Stk.peek<Pointer>(); | ||
|
|
||
| const bool TakeA0 = U.getZExtValue() & 1ULL; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| const bool TakeA0 = U.getZExtValue() & 1ULL; | |
| bool TakeA0 = U.getZExtValue() & 1ULL; |
|
|
||
| case clang::X86::BI__builtin_ia32_selectss_128: | ||
| case clang::X86::BI__builtin_ia32_selectsd_128: { | ||
| const unsigned N = |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| const unsigned N = | |
| unsigned N = |
|
|
||
| const bool TakeA0 = U.getZExtValue() & 1ULL; | ||
|
|
||
| for (unsigned i = 0; i < N; ++i) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
| for (unsigned i = 0; i < N; ++i) | |
| for (unsigned I = 0; I != N; ++I) |
Yes this is always the better approach, and should allow for earlier feedback:
|
| Res.push_back(TakeA0 ? AVal.getVectorElt(0) : WVal.getVectorElt(0)); | ||
| for (unsigned i = 1; i < Len; ++i) | ||
| Res.push_back(WVal.getVectorElt(i)); | ||
| return APValue(Res.data(), Res.size()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not just return Success / false?
Summary
This PR resolves #160559