-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add SSE/AVX VPTEST/VTESTPD/VTESTPS intrinsics to be used in constexpr #160428
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
✅ With the latest revision this PR passed the C/C++ code formatter. |
Clang-format seems to break for me whenever I rebase. It just won't pick up diffs to reformat afterwards. |
Found a better pattern and got rid of the boilerplate. |
I clang-formatted the code, not sure why it's not passing that. |
assert(LHS.getFieldDesc()->isPrimitiveArray() && | ||
RHS.getFieldDesc()->isPrimitiveArray()); | ||
|
||
if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is this exercised by a test case?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Won't Sema catch these - the X86Builtin defs are very type strict.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kimsh02 drop this?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
couple of minors
assert(LHS.getFieldDesc()->isPrimitiveArray() && | ||
RHS.getFieldDesc()->isPrimitiveArray()); | ||
|
||
if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Won't Sema catch these - the X86Builtin defs are very type strict.
Thanks for another round of feedback, I'll try to incorporate all of the changes soon. |
8c0d3cd
to
ce61f47
Compare
I squashed my commits for a rebase, but haven't applied the feedback yet just fyi. |
@llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-x86 Author: Shawn K (kimsh02) ChangesFix #158653 Add handling for:
Add corresponding test cases for:
Patch is 31.89 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160428.diff 7 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index e98bee28c15be..877782c0dfdeb 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -317,10 +317,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
- def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
- def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
- def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
+ "_Vector<2,double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
@@ -328,6 +326,16 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def vec_set_v4si : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int, _Constant int)">;
}
+let Features = "sse4.1",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
+ def ptestz128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def ptestc128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+ def ptestnzc128
+ : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
+}
+
let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
@@ -513,8 +521,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
}
-
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
@@ -523,7 +531,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
}
-let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx",
+ Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -533,6 +542,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
+}
+
+let Features = "avx",
+ Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a2e97fcafdfef..b27441f8b73b5 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2773,6 +2773,53 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_test_op(
+ InterpState &S, CodePtr OpPC, const CallExpr *Call,
+ llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
+ const Pointer &RHS = S.Stk.pop<Pointer>();
+ const Pointer &LHS = S.Stk.pop<Pointer>();
+
+ assert(LHS.getNumElems() == RHS.getNumElems());
+
+ assert(LHS.getFieldDesc()->isPrimitiveArray() &&
+ RHS.getFieldDesc()->isPrimitiveArray());
+
+ if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS),
+ getElemType(RHS)))
+ return false;
+
+ unsigned SourceLen = LHS.getNumElems();
+ const QualType ElemQT = getElemType(LHS);
+ const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
+ APInt SignMask = APInt::getSignMask(LaneWidth);
+
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+
+ if (ElemQT->isIntegerType()) { // Get value
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
+ ALane = LHS.elem<T>(I).toAPSInt();
+ BLane = RHS.elem<T>(I).toAPSInt();
+ });
+ } else if (ElemQT->isFloatingType()) { // Get only sign bit
+ using T = PrimConv<PT_Float>::T;
+ ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt() & SignMask;
+ BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt() & SignMask;
+ } else { // Must be integer or floating type
+ return false;
+ }
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ pushInteger(S, Fn(AWide, BWide), Call->getType());
+ return true;
+}
+
static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3579,7 +3626,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});
-
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call,
+ [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call,
+ [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256:
+ return interp__builtin_ia32_test_op(
+ S, OpPC, Call, [](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
+ });
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..4c9136f6003c0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13650,6 +13650,42 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
+ auto EvalTestOp =
+ [&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
+ APValue SourceLHS, SourceRHS;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
+ return false;
+
+ unsigned SourceLen = SourceLHS.getVectorLength();
+ const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+ const QualType ElemQT = VT->getElementType();
+ unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
+ APInt SignMask = APInt::getSignMask(LaneWidth);
+
+ APInt AWide(LaneWidth * SourceLen, 0);
+ APInt BWide(LaneWidth * SourceLen, 0);
+
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt ALane;
+ APInt BLane;
+
+ if (ElemQT->isIntegerType()) { // Get value
+ ALane = SourceLHS.getVectorElt(I).getInt();
+ BLane = SourceRHS.getVectorElt(I).getInt();
+ } else if (ElemQT->isFloatingType()) { // Get only sign bit
+ ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt() &
+ SignMask;
+ BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt() &
+ SignMask;
+ } else { // Must be integer or floating type
+ return false;
+ }
+ AWide.insertBits(ALane, I * LaneWidth);
+ BWide.insertBits(BLane, I * LaneWidth);
+ }
+ return Success(Fn(AWide, BWide), E);
+ };
auto HandleMaskBinOp =
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
@@ -14763,7 +14799,34 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}
-
+ case X86::BI__builtin_ia32_ptestz128:
+ case X86::BI__builtin_ia32_ptestz256:
+ case X86::BI__builtin_ia32_vtestzps:
+ case X86::BI__builtin_ia32_vtestzps256:
+ case X86::BI__builtin_ia32_vtestzpd:
+ case X86::BI__builtin_ia32_vtestzpd256: {
+ return EvalTestOp(
+ [](const APInt &A, const APInt &B) { return (A & B) == 0; });
+ }
+ case X86::BI__builtin_ia32_ptestc128:
+ case X86::BI__builtin_ia32_ptestc256:
+ case X86::BI__builtin_ia32_vtestcps:
+ case X86::BI__builtin_ia32_vtestcps256:
+ case X86::BI__builtin_ia32_vtestcpd:
+ case X86::BI__builtin_ia32_vtestcpd256: {
+ return EvalTestOp(
+ [](const APInt &A, const APInt &B) { return (~A & B) == 0; });
+ }
+ case X86::BI__builtin_ia32_ptestnzc128:
+ case X86::BI__builtin_ia32_ptestnzc256:
+ case X86::BI__builtin_ia32_vtestnzcps:
+ case X86::BI__builtin_ia32_vtestnzcps256:
+ case X86::BI__builtin_ia32_vtestnzcpd:
+ case X86::BI__builtin_ia32_vtestnzcpd256: {
+ return EvalTestOp([](const APInt &A, const APInt &B) {
+ return ((A & B) != 0) && ((~A & B) != 0);
+ });
+ }
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index d6ba19a6c78af..123fa7933c4f8 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2539,9 +2539,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) {
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns the ZF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
}
@@ -2568,9 +2567,8 @@ _mm_testz_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns the CF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a,
+ __m128d __b) {
return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
}
@@ -2598,9 +2596,8 @@ _mm_testc_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [2 x double].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_pd(__m128d __a, __m128d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR
+_mm_testnzc_pd(__m128d __a, __m128d __b) {
return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
}
@@ -2627,9 +2624,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testz_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
}
@@ -2656,9 +2652,8 @@ _mm_testz_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
}
@@ -2686,9 +2681,8 @@ _mm_testc_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 128-bit vector of [4 x float].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS128
-_mm_testnzc_ps(__m128 __a, __m128 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a,
+ __m128 __b) {
return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
}
@@ -2715,9 +2709,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a,
+ __m256d __b) {
return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b);
}
@@ -2744,9 +2737,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a,
+ __m256d __b) {
return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b);
}
@@ -2774,9 +2766,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [4 x double].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_pd(__m256d __a, __m256d __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_pd(__m256d __a, __m256d __b) {
return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b);
}
@@ -2803,9 +2794,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2832,9 +2822,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2862,9 +2851,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit vector of [8 x float].
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_ps(__m256 __a, __m256 __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a,
+ __m256 __b) {
return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b);
}
@@ -2888,9 +2876,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testz_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testz_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b);
}
@@ -2914,9 +2901,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testc_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b);
}
@@ -2941,9 +2927,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b)
/// \param __b
/// A 256-bit integer vector.
/// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_testnzc_si256(__m256i __a, __m256i __b)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_testnzc_si256(__m256i __a, __m256i __b) {
return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b);
}
diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h
index 5e63a1ae321bc..4f197d5ecaff9 100644
--- a/clang/lib/Headers/smmintrin.h
+++ b/clang/lib/Headers/smmintrin.h
@@ -1093,8 +1093,8 @@ _mm_max_epu32(__m128i __V1, __m128i __V2) {
/// \param __V
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are all zeros; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testz_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V);
}
@@ -1110,8 +1110,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M,
/// \param __V
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are all ones; FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testc_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V);
}
@@ -1128,8 +1128,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M,
/// A 128-bit integer vector selecting which bits to test in operand \a __M.
/// \returns TRUE if the specified bits are neither all zeros nor all ones;
/// FALSE otherwise.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M,
- __m128i __V) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_testnzc_si128(__m128i __M, __m128i __V) {
return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V);
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 3018bb9719b89..40a3bd68fbae1 100644
--- a/clang/test/Code...
[truncated]
|
Applied feedback and pushed less code. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a few final minors
assert(LHS.getFieldDesc()->isPrimitiveArray() && | ||
RHS.getFieldDesc()->isPrimitiveArray()); | ||
|
||
if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kimsh02 drop this?
def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; | ||
def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; | ||
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, " | ||
"_Vector<2,double>, _Constant char)">; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) don't alter lines unrelated to patch
Fix #158653
Add handling for:
Add corresponding test cases for: