From 4da56170ff4b363c97aa98a4f06946173366823d Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Thu, 2 Oct 2025 05:44:38 -0700 Subject: [PATCH 1/5] Squash --- clang/include/clang/Basic/BuiltinsX86.td | 27 +++++-- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 89 +++++++++++++++++++++++- clang/lib/AST/ExprConstant.cpp | 74 +++++++++++++++++++- clang/lib/Headers/avxintrin.h | 75 ++++++++------------ clang/lib/Headers/smmintrin.h | 12 ++-- clang/test/CodeGen/X86/avx-builtins.c | 59 ++++++++++++++++ clang/test/CodeGen/X86/sse41-builtins.c | 20 ++++++ 7 files changed, 296 insertions(+), 60 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index a0181b7ae8f9d..e15492ec19f80 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -319,14 +319,22 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">; def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; - def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">; - def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; - def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; - def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; + def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, " + "_Vector<2,double>, _Constant char)">; def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">; def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">; } +let Features = "sse4.1", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def ptestz128 + : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; + def ptestc128 + : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; + def ptestnzc128 + : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">; +} + let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">; @@ -516,8 +524,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">; } - -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">; def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">; def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">; @@ -526,7 +534,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">; } -let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx", + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">; def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">; def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">; @@ -536,6 +545,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; +} + +let Features = "avx", + Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">; def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6053237b1a261..48c12104a4a23 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2817,6 +2817,66 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_test_op( + InterpState &S, CodePtr OpPC, const CallExpr *Call, + llvm::function_ref Fn) { + const Pointer &RHS = S.Stk.pop(); + const Pointer &LHS = S.Stk.pop(); + + assert(LHS.getNumElems() == RHS.getNumElems()); + assert(LHS.getFieldDesc()->isPrimitiveArray() && + RHS.getFieldDesc()->isPrimitiveArray()); + + if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS), + getElemType(RHS))) + return false; + + const unsigned SourceLen = LHS.getNumElems(); + const QualType ElemQT = getElemType(LHS); + const OptPrimType ElemPT = S.getContext().classify(ElemQT); + + if (ElemQT->isIntegerType()) { + APInt FirstElem; + INT_TYPE_SWITCH_NO_BOOL(*ElemPT, + { FirstElem = LHS.elem(0).toAPSInt(); }); + const unsigned LaneWidth = FirstElem.getBitWidth(); + + APInt AWide(LaneWidth * SourceLen, 0); + APInt BWide(LaneWidth * SourceLen, 0); + + for (unsigned I = 0; I != SourceLen; ++I) { + APInt ALane; + APInt BLane; + INT_TYPE_SWITCH_NO_BOOL(*ElemPT, { + ALane = LHS.elem(I).toAPSInt(); + BLane = RHS.elem(I).toAPSInt(); + }); + AWide.insertBits(ALane, I * LaneWidth); + BWide.insertBits(BLane, I * LaneWidth); + } + pushInteger(S, Fn(AWide, BWide) ? 1 : 0, Call->getType()); + return true; + } else if (ElemQT->isFloatingType()) { + APInt ASignBits(SourceLen, 0); + APInt BSignBits(SourceLen, 0); + + for (unsigned I = 0; I != SourceLen; ++I) { + using T = PrimConv::T; + APInt ALane = LHS.elem(I).getAPFloat().bitcastToAPInt(); + APInt BLane = RHS.elem(I).getAPFloat().bitcastToAPInt(); + const unsigned SignBit = ALane.getBitWidth() - 1; + const bool ALaneSign = ALane[SignBit]; + const bool BLaneSign = BLane[SignBit]; + ASignBits.setBitVal(I, ALaneSign); + BSignBits.setBitVal(I, BLaneSign); + } + pushInteger(S, Fn(ASignBits, BSignBits) ? 1 : 0, Call->getType()); + return true; + } else { // Must be integer or float type + return false; + } +} + static bool interp__builtin_elementwise_triop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref @@ -3678,7 +3738,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) { return ((APInt)C).isNegative() ? T : F; }); - + case X86::BI__builtin_ia32_ptestz128: + case X86::BI__builtin_ia32_ptestz256: + case X86::BI__builtin_ia32_vtestzps: + case X86::BI__builtin_ia32_vtestzps256: + case X86::BI__builtin_ia32_vtestzpd: + case X86::BI__builtin_ia32_vtestzpd256: + return interp__builtin_ia32_test_op( + S, OpPC, Call, + [](const APInt &A, const APInt &B) { return (A & B) == 0; }); + case X86::BI__builtin_ia32_ptestc128: + case X86::BI__builtin_ia32_ptestc256: + case X86::BI__builtin_ia32_vtestcps: + case X86::BI__builtin_ia32_vtestcps256: + case X86::BI__builtin_ia32_vtestcpd: + case X86::BI__builtin_ia32_vtestcpd256: + return interp__builtin_ia32_test_op( + S, OpPC, Call, + [](const APInt &A, const APInt &B) { return (~A & B) == 0; }); + case X86::BI__builtin_ia32_ptestnzc128: + case X86::BI__builtin_ia32_ptestnzc256: + case X86::BI__builtin_ia32_vtestnzcps: + case X86::BI__builtin_ia32_vtestnzcps256: + case X86::BI__builtin_ia32_vtestnzcpd: + case X86::BI__builtin_ia32_vtestnzcpd256: + return interp__builtin_ia32_test_op( + S, OpPC, Call, [](const APInt &A, const APInt &B) { + return ((A & B) != 0) && ((~A & B) != 0); + }); case X86::BI__builtin_ia32_selectb_128: case X86::BI__builtin_ia32_selectb_256: case X86::BI__builtin_ia32_selectb_512: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7bf28d988f405..0e12677e7307f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13766,6 +13766,51 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info, bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { + auto EvalTestOp = + [&](llvm::function_ref Fn) { + APValue SourceLHS, SourceRHS; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) || + !EvaluateAsRValue(Info, E->getArg(1), SourceRHS)) + return false; + + unsigned SourceLen = SourceLHS.getVectorLength(); + + const VectorType *VT = E->getArg(0)->getType()->castAs(); + QualType ElemQT = VT->getElementType(); + + if (ElemQT->isIntegerType()) { + const unsigned LaneWidth = + SourceLHS.getVectorElt(0).getInt().getBitWidth(); + APInt AWide(LaneWidth * SourceLen, 0); + APInt BWide(LaneWidth * SourceLen, 0); + + for (unsigned I = 0; I != SourceLen; ++I) { + APInt ALane = SourceLHS.getVectorElt(I).getInt(); + APInt BLane = SourceRHS.getVectorElt(I).getInt(); + AWide.insertBits(ALane, I * LaneWidth); + BWide.insertBits(BLane, I * LaneWidth); + } + return Success(Fn(AWide, BWide), E); + + } else if (ElemQT->isFloatingType()) { + APInt ASignBits(SourceLen, 0); + APInt BSignBits(SourceLen, 0); + + for (unsigned I = 0; I != SourceLen; ++I) { + APInt ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt(); + APInt BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt(); + const unsigned SignBit = ALane.getBitWidth() - 1; + const bool ALaneSign = ALane[SignBit]; + const bool BLaneSign = BLane[SignBit]; + ASignBits.setBitVal(I, ALaneSign); + BSignBits.setBitVal(I, BLaneSign); + } + return Success(Fn(ASignBits, BSignBits), E); + + } else { // Must be integer or float type + return false; + } + }; auto HandleMaskBinOp = [&](llvm::function_ref Fn) @@ -14879,7 +14924,34 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, Result.setBitVal(P++, Val[I]); return Success(Result, E); } - + case X86::BI__builtin_ia32_ptestz128: + case X86::BI__builtin_ia32_ptestz256: + case X86::BI__builtin_ia32_vtestzps: + case X86::BI__builtin_ia32_vtestzps256: + case X86::BI__builtin_ia32_vtestzpd: + case X86::BI__builtin_ia32_vtestzpd256: { + return EvalTestOp( + [](const APInt &A, const APInt &B) { return (A & B) == 0; }); + } + case X86::BI__builtin_ia32_ptestc128: + case X86::BI__builtin_ia32_ptestc256: + case X86::BI__builtin_ia32_vtestcps: + case X86::BI__builtin_ia32_vtestcps256: + case X86::BI__builtin_ia32_vtestcpd: + case X86::BI__builtin_ia32_vtestcpd256: { + return EvalTestOp( + [](const APInt &A, const APInt &B) { return (~A & B) == 0; }); + } + case X86::BI__builtin_ia32_ptestnzc128: + case X86::BI__builtin_ia32_ptestnzc256: + case X86::BI__builtin_ia32_vtestnzcps: + case X86::BI__builtin_ia32_vtestnzcps256: + case X86::BI__builtin_ia32_vtestnzcpd: + case X86::BI__builtin_ia32_vtestnzcpd256: { + return EvalTestOp([](const APInt &A, const APInt &B) { + return ((A & B) != 0) && ((~A & B) != 0); + }); + } case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index d6ba19a6c78af..123fa7933c4f8 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2539,9 +2539,8 @@ _mm256_unpacklo_ps(__m256 __a, __m256 __b) { /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the ZF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testz_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); } @@ -2568,9 +2567,8 @@ _mm_testz_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the CF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testc_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_pd(__m128d __a, + __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); } @@ -2598,9 +2596,8 @@ _mm_testc_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [2 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testnzc_pd(__m128d __a, __m128d __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR +_mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); } @@ -2627,9 +2624,8 @@ _mm_testnzc_pd(__m128d __a, __m128d __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testz_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testz_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); } @@ -2656,9 +2652,8 @@ _mm_testz_ps(__m128 __a, __m128 __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testc_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testc_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); } @@ -2686,9 +2681,8 @@ _mm_testc_ps(__m128 __a, __m128 __b) /// \param __b /// A 128-bit vector of [4 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS128 -_mm_testnzc_ps(__m128 __a, __m128 __b) -{ +static __inline int __DEFAULT_FN_ATTRS128_CONSTEXPR _mm_testnzc_ps(__m128 __a, + __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); } @@ -2715,9 +2709,8 @@ _mm_testnzc_ps(__m128 __a, __m128 __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_pd(__m256d __a, + __m256d __b) { return __builtin_ia32_vtestzpd256((__v4df)__a, (__v4df)__b); } @@ -2744,9 +2737,8 @@ _mm256_testz_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_pd(__m256d __a, + __m256d __b) { return __builtin_ia32_vtestcpd256((__v4df)__a, (__v4df)__b); } @@ -2774,9 +2766,8 @@ _mm256_testc_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [4 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_pd(__m256d __a, __m256d __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testnzc_pd(__m256d __a, __m256d __b) { return __builtin_ia32_vtestnzcpd256((__v4df)__a, (__v4df)__b); } @@ -2803,9 +2794,8 @@ _mm256_testnzc_pd(__m256d __a, __m256d __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testz_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestzps256((__v8sf)__a, (__v8sf)__b); } @@ -2832,9 +2822,8 @@ _mm256_testz_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testc_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestcps256((__v8sf)__a, (__v8sf)__b); } @@ -2862,9 +2851,8 @@ _mm256_testc_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit vector of [8 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_ps(__m256 __a, __m256 __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_testnzc_ps(__m256 __a, + __m256 __b) { return __builtin_ia32_vtestnzcps256((__v8sf)__a, (__v8sf)__b); } @@ -2888,9 +2876,8 @@ _mm256_testnzc_ps(__m256 __a, __m256 __b) /// \param __b /// A 256-bit integer vector. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testz_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testz_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestz256((__v4di)__a, (__v4di)__b); } @@ -2914,9 +2901,8 @@ _mm256_testz_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testc_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestc256((__v4di)__a, (__v4di)__b); } @@ -2941,9 +2927,8 @@ _mm256_testc_si256(__m256i __a, __m256i __b) /// \param __b /// A 256-bit integer vector. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS -_mm256_testnzc_si256(__m256i __a, __m256i __b) -{ +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm256_testnzc_si256(__m256i __a, __m256i __b) { return __builtin_ia32_ptestnzc256((__v4di)__a, (__v4di)__b); } diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 5e63a1ae321bc..4f197d5ecaff9 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -1093,8 +1093,8 @@ _mm_max_epu32(__m128i __V1, __m128i __V2) { /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all zeros; FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testz_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestz128((__v2di)__M, (__v2di)__V); } @@ -1110,8 +1110,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testz_si128(__m128i __M, /// \param __V /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are all ones; FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestc128((__v2di)__M, (__v2di)__V); } @@ -1128,8 +1128,8 @@ static __inline__ int __DEFAULT_FN_ATTRS _mm_testc_si128(__m128i __M, /// A 128-bit integer vector selecting which bits to test in operand \a __M. /// \returns TRUE if the specified bits are neither all zeros nor all ones; /// FALSE otherwise. -static __inline__ int __DEFAULT_FN_ATTRS _mm_testnzc_si128(__m128i __M, - __m128i __V) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_testnzc_si128(__m128i __M, __m128i __V) { return __builtin_ia32_ptestnzc128((__v2di)__M, (__v2di)__V); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 5f08b6be81ab7..e598b044df14f 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -2009,90 +2009,149 @@ int test_mm_testc_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_testc_pd(A, B); } +TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){-1.0, -2.0}, + (__m128d)(__v2df){-3.0, 4.0}) == 1); +TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0}, + (__m128d)(__v2df){-3.0, 4.0}) == 0); +TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0}, + (__m128d)(__v2df){ 0.0, 5.0}) == 1); int test_mm256_testc_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testc_pd // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_testc_pd(A, B); } +TEST_CONSTEXPR(_mm256_testc_pd( + (__m256d)(__v4df){-1.0, 2.0, -3.0, 4.0}, + (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 1); +TEST_CONSTEXPR(_mm256_testc_pd( + (__m256d)(__v4df){ 1.0, 2.0, -3.0, 4.0}, + (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0); +TEST_CONSTEXPR(_mm256_testc_pd( + (__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, + (__m256d)(__v4df){ 5.0, 6.0, 7.0, 8.0}) == 1); int test_mm_testc_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_testc_ps // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_testc_ps(A, B); } +TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,-1,-9001.1009,}, (__m128)(__v4sf){-1.0,-9001,9001,9000}) == 1); +TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,2384.23,-9001.1009,}, (__m128)(__v4sf){-1.0,-9001,9001,9000}) == 0); +TEST_CONSTEXPR(_mm_testc_ps((__m128)(__v4sf){-1,-2,-9001.1009,-93}, (__m128)(__v4sf){-1.0,-9001,-0.9001,-1000}) == 1); int test_mm256_testc_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_testc_ps // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_testc_ps(A, B); } +TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0); +TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 1); +TEST_CONSTEXPR(_mm256_testc_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 0); int test_mm256_testc_si256(__m256i A, __m256i B) { // CHECK-LABEL: test_mm256_testc_si256 // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_testc_si256(A, B); } +TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){0,0,0,0}, (__m256i)(__v4di){0,0,0,0}) == 1); +TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){0,0,-1,0}, (__m256i)(__v4di){0,0,1,0}) == 1); +TEST_CONSTEXPR(_mm256_testc_si256((__m256i)(__v4di){-1,-2,1,3}, (__m256i)(__v4di){0,-1,1,1}) == 0); int test_mm_testnzc_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testnzc_pd // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_testnzc_pd(A, B); } +TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, +2.0}, + (__m128d)(__v2df){-3.0, -4.0}) == 1); +TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){+1.0, +2.0}, + (__m128d)(__v2df){+3.0, -4.0}) == 0); +TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, -2.0}, + (__m128d)(__v2df){-3.0, +4.0}) == 0); int test_mm256_testnzc_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testnzc_pd // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_testnzc_pd(A, B); } +TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, 2.0, 3.0, -4.0}, (__m256d)(__v4df){-5.0, -6.0, 7.0, 8.0}) == 1); +TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){1.0, 2.0, 3.0, 4.0}, (__m256d)(__v4df){-1.0, 6.0, 7.0, 8.0}) == 0); +TEST_CONSTEXPR(_mm256_testnzc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0); int test_mm_testnzc_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_testnzc_ps // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_testnzc_ps(A, B); } +TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){-9.9,987,-67,0}, (__m128)(__v4sf){10.0,-1.12,-29.29,0}) == 1); +TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){-810.0,-1.0,-1.0,-3.0}, (__m128)(__v4sf){-10.0,-1.0,-1.0,-2.0}) == 0); +TEST_CONSTEXPR(_mm_testnzc_ps((__m128)(__v4sf){0,0,0,0}, (__m128)(__v4sf){0,-1,0,-1}) == 0); int test_mm256_testnzc_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_testnzc_ps // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_testnzc_ps(A, B); } +TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, -2, 3, 4, 5, 6, 7, -8}) == 1); +TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0); +TEST_CONSTEXPR(_mm256_testnzc_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 0); int test_mm256_testnzc_si256(__m256i A, __m256i B) { // CHECK-LABEL: test_mm256_testnzc_si256 // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestnzc.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_testnzc_si256(A, B); } +TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,0,0}, (__m256i)(__v4di){478329848,23438,2343,-3483}) == 1); +TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){1,0,0,0}, (__m256i)(__v4di){3,0,0,0}) == 1); +TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,239483,-1,0}, (__m256i)(__v4di){3849234,0,-2,0}) == 0); +TEST_CONSTEXPR(_mm256_testnzc_si256((__m256i)(__v4di){-1,0,-1,3}, (__m256i)(__v4di){1,0,9999999,1}) == 0); int test_mm_testz_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_testz_pd // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_testz_pd(A, B); } +TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){-1,0}, (__m128d)(__v2df){0,-1}) == 1); +TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,-13.13}, (__m128d)(__v2df){0,-11.1}) == 0); +TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,5.13}, (__m128d)(__v2df){0,-113.1324823}) == 1); + int test_mm256_testz_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testz_pd // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_testz_pd(A, B); } +TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){-1,0,-47.47,0.00002}, (__m256d)(__v4df){0,-1,74.0101,-1}) == 1); +TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){-1,3249.9,-47.47,-0.00002}, (__m256d)(__v4df){0,-1,74.0101,-9999900}) == 0); +TEST_CONSTEXPR(_mm256_testz_pd((__m256d)(__v4df){0,0,-8,0}, (__m256d)(__v4df){0,-1,-101,-123}) == 0); int test_mm_testz_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_testz_ps // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps(<4 x float> %{{.*}}, <4 x float> %{{.*}}) return _mm_testz_ps(A, B); } +TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){-9.9,987,67,0}, (__m128)(__v4sf){10.0,-1.12,-29.29,0}) == 1); +TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){10.0,1.0,-1.0,-3.0}, (__m128)(__v4sf){-10.0,-1.0,-1.0,-2.0}) == 0); +TEST_CONSTEXPR(_mm_testz_ps((__m128)(__v4sf){0,0,0,0}, (__m128)(__v4sf){0,-1,0,-1}) == 1); int test_mm256_testz_ps(__m256 A, __m256 B) { // CHECK-LABEL: test_mm256_testz_ps // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}) return _mm256_testz_ps(A, B); } +TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){-1, -2, -3, -4, -5, -6, -7, 8},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 1); +TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){0,0,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, 4, 5, 6, 7, -8}) == 0); +TEST_CONSTEXPR(_mm256_testz_ps((__m256)(__v8sf){0,-0.00002,0,0,0,0,0,-1.00001},(__m256)(__v8sf){1, 2, 3, -4, 5, -6, -7, 8}) == 1); int test_mm256_testz_si256(__m256i A, __m256i B) { // CHECK-LABEL: test_mm256_testz_si256 // CHECK: call {{.*}}i32 @llvm.x86.avx.ptestz.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) return _mm256_testz_si256(A, B); } +TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){0,0,0,0}, (__m256i)(__v4di){0,0,0,0}) == 1); +TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){0,0,-1,0}, (__m256i)(__v4di){0,0,-1,0}) == 0); +TEST_CONSTEXPR(_mm256_testz_si256((__m256i)(__v4di){-1,0,1,0}, (__m256i)(__v4di){0,-1,0,1}) == 1); __m256 test_mm256_undefined_ps(void) { // X64-LABEL: test_mm256_undefined_ps diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index eee479a755ab4..89a7ac29e7db7 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -449,33 +449,53 @@ int test_mm_test_all_ones(__m128i x) { // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_test_all_ones(x); } +TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v2di){-1, -1})) == 1); +TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v2di){-1, 0})) == 0); +TEST_CONSTEXPR(_mm_test_all_ones(((__m128i)(__v4si){-1, -1, -1, 0x7FFFFFFF})) == 0); int test_mm_test_all_zeros(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_test_all_zeros // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_test_all_zeros(x, y); } +TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0,0}), ((__m128i)(__v2di){0,0})) == 1); +TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0xFF00,0}), ((__m128i)(__v2di){0x00FF,0})) == 1); +TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){1,0}), ((__m128i)(__v2di){-1,0})) == 0); +TEST_CONSTEXPR(_mm_test_all_zeros(((__m128i)(__v2di){0,1}), ((__m128i)(__v2di){0,-1})) == 0); int test_mm_test_mix_ones_zeros(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_test_mix_ones_zeros // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_test_mix_ones_zeros(x, y); } +TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0xFF, 0}), ((__m128i)(__v2di){0xF0, 1})) == 1); +TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0xF0, 0}), ((__m128i)(__v2di){0x0F, 0})) == 0); +TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){-1, -1}), ((__m128i)(__v2di){1, 0})) == 0); +TEST_CONSTEXPR(_mm_test_mix_ones_zeros(((__m128i)(__v2di){0, 0}), ((__m128i)(__v2di){0, 0})) == 0); int test_mm_testc_si128(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_testc_si128 // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_testc_si128(x, y); } +TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1); +TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){-1,0}) == 0); +TEST_CONSTEXPR(_mm_testc_si128((__m128i)(__v2di){0,-1}, (__m128i)(__v2di){0,1}) == 1); int test_mm_testnzc_si128(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_testnzc_si128 // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestnzc(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_testnzc_si128(x, y); } +TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){3,0}, (__m128i)(__v2di){1,1}) == 1); +TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){32,-1}, (__m128i)(__v2di){15,0}) == 0); +TEST_CONSTEXPR(_mm_testnzc_si128((__m128i)(__v2di){0,999}, (__m128i)(__v2di){0,999}) == 0); int test_mm_testz_si128(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_testz_si128 // CHECK: call {{.*}}i32 @llvm.x86.sse41.ptestz(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) return _mm_testz_si128(x, y); } +TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){0,0}, (__m128i)(__v2di){0,0}) == 1); +TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){-1,0}) == 0); +TEST_CONSTEXPR(_mm_testz_si128((__m128i)(__v2di){1,0}, (__m128i)(__v2di){0,1}) == 1); From de0da0921bd9b89e4574b65f0cd3346edbe6d983 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Thu, 2 Oct 2025 09:53:47 -0700 Subject: [PATCH 2/5] Apply feedback from Simon and Timm --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 51 ++++++++------------- clang/lib/AST/ExprConstant.cpp | 57 ++++++++++-------------- 2 files changed, 43 insertions(+), 65 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 48c12104a4a23..5e9ea011b498b 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2824,6 +2824,7 @@ static bool interp__builtin_ia32_test_op( const Pointer &LHS = S.Stk.pop(); assert(LHS.getNumElems() == RHS.getNumElems()); + assert(LHS.getFieldDesc()->isPrimitiveArray() && RHS.getFieldDesc()->isPrimitiveArray()); @@ -2831,50 +2832,36 @@ static bool interp__builtin_ia32_test_op( getElemType(RHS))) return false; - const unsigned SourceLen = LHS.getNumElems(); + unsigned SourceLen = LHS.getNumElems(); const QualType ElemQT = getElemType(LHS); const OptPrimType ElemPT = S.getContext().classify(ElemQT); + unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT); + APInt SignMask = APInt::getSignMask(LaneWidth); - if (ElemQT->isIntegerType()) { - APInt FirstElem; - INT_TYPE_SWITCH_NO_BOOL(*ElemPT, - { FirstElem = LHS.elem(0).toAPSInt(); }); - const unsigned LaneWidth = FirstElem.getBitWidth(); + APInt AWide(LaneWidth * SourceLen, 0); + APInt BWide(LaneWidth * SourceLen, 0); - APInt AWide(LaneWidth * SourceLen, 0); - APInt BWide(LaneWidth * SourceLen, 0); + for (unsigned I = 0; I != SourceLen; ++I) { + APInt ALane; + APInt BLane; - for (unsigned I = 0; I != SourceLen; ++I) { - APInt ALane; - APInt BLane; + if (ElemQT->isIntegerType()) { // Get value INT_TYPE_SWITCH_NO_BOOL(*ElemPT, { ALane = LHS.elem(I).toAPSInt(); BLane = RHS.elem(I).toAPSInt(); }); - AWide.insertBits(ALane, I * LaneWidth); - BWide.insertBits(BLane, I * LaneWidth); - } - pushInteger(S, Fn(AWide, BWide) ? 1 : 0, Call->getType()); - return true; - } else if (ElemQT->isFloatingType()) { - APInt ASignBits(SourceLen, 0); - APInt BSignBits(SourceLen, 0); - - for (unsigned I = 0; I != SourceLen; ++I) { + } else if (ElemQT->isFloatingType()) { // Get only sign bit using T = PrimConv::T; - APInt ALane = LHS.elem(I).getAPFloat().bitcastToAPInt(); - APInt BLane = RHS.elem(I).getAPFloat().bitcastToAPInt(); - const unsigned SignBit = ALane.getBitWidth() - 1; - const bool ALaneSign = ALane[SignBit]; - const bool BLaneSign = BLane[SignBit]; - ASignBits.setBitVal(I, ALaneSign); - BSignBits.setBitVal(I, BLaneSign); + ALane = LHS.elem(I).getAPFloat().bitcastToAPInt() & SignMask; + BLane = RHS.elem(I).getAPFloat().bitcastToAPInt() & SignMask; + } else { // Must be integer or floating type + return false; } - pushInteger(S, Fn(ASignBits, BSignBits) ? 1 : 0, Call->getType()); - return true; - } else { // Must be integer or float type - return false; + AWide.insertBits(ALane, I * LaneWidth); + BWide.insertBits(BLane, I * LaneWidth); } + pushInteger(S, Fn(AWide, BWide), Call->getType()); + return true; } static bool interp__builtin_elementwise_triop( diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 0e12677e7307f..c0ca11f77c9cf 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13774,42 +13774,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return false; unsigned SourceLen = SourceLHS.getVectorLength(); - const VectorType *VT = E->getArg(0)->getType()->castAs(); - QualType ElemQT = VT->getElementType(); - - if (ElemQT->isIntegerType()) { - const unsigned LaneWidth = - SourceLHS.getVectorElt(0).getInt().getBitWidth(); - APInt AWide(LaneWidth * SourceLen, 0); - APInt BWide(LaneWidth * SourceLen, 0); - - for (unsigned I = 0; I != SourceLen; ++I) { - APInt ALane = SourceLHS.getVectorElt(I).getInt(); - APInt BLane = SourceRHS.getVectorElt(I).getInt(); - AWide.insertBits(ALane, I * LaneWidth); - BWide.insertBits(BLane, I * LaneWidth); - } - return Success(Fn(AWide, BWide), E); - - } else if (ElemQT->isFloatingType()) { - APInt ASignBits(SourceLen, 0); - APInt BSignBits(SourceLen, 0); - - for (unsigned I = 0; I != SourceLen; ++I) { - APInt ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt(); - APInt BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt(); - const unsigned SignBit = ALane.getBitWidth() - 1; - const bool ALaneSign = ALane[SignBit]; - const bool BLaneSign = BLane[SignBit]; - ASignBits.setBitVal(I, ALaneSign); - BSignBits.setBitVal(I, BLaneSign); + const QualType ElemQT = VT->getElementType(); + unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT); + APInt SignMask = APInt::getSignMask(LaneWidth); + + APInt AWide(LaneWidth * SourceLen, 0); + APInt BWide(LaneWidth * SourceLen, 0); + + for (unsigned I = 0; I != SourceLen; ++I) { + APInt ALane; + APInt BLane; + + if (ElemQT->isIntegerType()) { // Get value + ALane = SourceLHS.getVectorElt(I).getInt(); + BLane = SourceRHS.getVectorElt(I).getInt(); + } else if (ElemQT->isFloatingType()) { // Get only sign bit + ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt() & + SignMask; + BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt() & + SignMask; + } else { // Must be integer or floating type + return false; } - return Success(Fn(ASignBits, BSignBits), E); - - } else { // Must be integer or float type - return false; + AWide.insertBits(ALane, I * LaneWidth); + BWide.insertBits(BLane, I * LaneWidth); } + return Success(Fn(AWide, BWide), E); }; auto HandleMaskBinOp = From 56a0df0a7dce50e96ee906910d0ad78ac03e87a9 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Fri, 3 Oct 2025 15:07:17 -0700 Subject: [PATCH 3/5] Clang-format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 ------ clang/lib/Headers/xmmintrin.h | 5 ++-- clang/test/CodeGen/X86/avx-builtins.c | 31 +++++++----------------- clang/test/CodeGen/X86/mmx-builtins.c | 12 +++------ 4 files changed, 15 insertions(+), 40 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 5e9ea011b498b..8bdbde8849454 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2825,13 +2825,6 @@ static bool interp__builtin_ia32_test_op( assert(LHS.getNumElems() == RHS.getNumElems()); - assert(LHS.getFieldDesc()->isPrimitiveArray() && - RHS.getFieldDesc()->isPrimitiveArray()); - - if (!S.getASTContext().hasSameUnqualifiedType(getElemType(LHS), - getElemType(RHS))) - return false; - unsigned SourceLen = LHS.getNumElems(); const QualType ElemQT = getElemType(LHS); const OptPrimType ElemPT = S.getContext().classify(ElemQT); diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index d876b4735a7d2..605409c1f43b9 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2416,9 +2416,8 @@ _mm_min_pu8(__m64 __a, __m64 __b) { /// A 64-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. -static __inline__ int __DEFAULT_FN_ATTRS_SSE2 -_mm_movemask_pi8(__m64 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR +_mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a)); } diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index e598b044df14f..02141d3144ab7 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -2009,27 +2009,18 @@ int test_mm_testc_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_testc_pd(A, B); } -TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){-1.0, -2.0}, - (__m128d)(__v2df){-3.0, 4.0}) == 1); -TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0}, - (__m128d)(__v2df){-3.0, 4.0}) == 0); -TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0}, - (__m128d)(__v2df){ 0.0, 5.0}) == 1); +TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){-1.0, -2.0},(__m128d)(__v2df){-3.0, 4.0}) == 1); +TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0},(__m128d)(__v2df){-3.0, 4.0}) == 0); +TEST_CONSTEXPR(_mm_testc_pd((__m128d)(__v2df){ 1.0, -2.0},(__m128d)(__v2df){ 0.0, 5.0}) == 1); int test_mm256_testc_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testc_pd // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) return _mm256_testc_pd(A, B); } -TEST_CONSTEXPR(_mm256_testc_pd( - (__m256d)(__v4df){-1.0, 2.0, -3.0, 4.0}, - (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 1); -TEST_CONSTEXPR(_mm256_testc_pd( - (__m256d)(__v4df){ 1.0, 2.0, -3.0, 4.0}, - (__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0); -TEST_CONSTEXPR(_mm256_testc_pd( - (__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0}, - (__m256d)(__v4df){ 5.0, 6.0, 7.0, 8.0}) == 1); +TEST_CONSTEXPR(_mm256_testc_pd((__m256d)(__v4df){-1.0, 2.0, -3.0, 4.0},(__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 1); +TEST_CONSTEXPR(_mm256_testc_pd((__m256d)(__v4df){ 1.0, 2.0, -3.0, 4.0},(__m256d)(__v4df){-5.0, 6.0, 7.0, 8.0}) == 0); +TEST_CONSTEXPR(_mm256_testc_pd((__m256d)(__v4df){-1.0, -2.0, -3.0, -4.0},(__m256d)(__v4df){ 5.0, 6.0, 7.0, 8.0}) == 1); int test_mm_testc_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_testc_ps @@ -2063,12 +2054,9 @@ int test_mm_testnzc_pd(__m128d A, __m128d B) { // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestnzc.pd(<2 x double> %{{.*}}, <2 x double> %{{.*}}) return _mm_testnzc_pd(A, B); } -TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, +2.0}, - (__m128d)(__v2df){-3.0, -4.0}) == 1); -TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){+1.0, +2.0}, - (__m128d)(__v2df){+3.0, -4.0}) == 0); -TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, -2.0}, - (__m128d)(__v2df){-3.0, +4.0}) == 0); +TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, +2.0},(__m128d)(__v2df){-3.0, -4.0}) == 1); +TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){+1.0, +2.0},(__m128d)(__v2df){+3.0, -4.0}) == 0); +TEST_CONSTEXPR(_mm_testnzc_pd((__m128d)(__v2df){-1.0, -2.0},(__m128d)(__v2df){-3.0, +4.0}) == 0); int test_mm256_testnzc_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testnzc_pd @@ -2116,7 +2104,6 @@ TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){-1,0}, (__m128d)(__v2df){0,-1}) == TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,-13.13}, (__m128d)(__v2df){0,-11.1}) == 0); TEST_CONSTEXPR(_mm_testz_pd((__m128d)(__v2df){0,5.13}, (__m128d)(__v2df){0,-113.1324823}) == 1); - int test_mm256_testz_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_testz_pd // CHECK: call {{.*}}i32 @llvm.x86.avx.vtestz.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}) diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index a4494b69219da..fa8d77bf73e9a 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -7,14 +7,7 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx -// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx + #include @@ -401,6 +394,9 @@ int test_mm_movemask_pi8(__m64 a) { // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128( return _mm_movemask_pi8(a); } +TEST_CONSTEXPR(match_m64(_mm_movemask_pi8((__m64)((__v8qi){(signed char)0x7F,(signed char)0x80,(signed char)0x01,(signed char)0xFF,(signed char)0x00,(signed char)0xAA,(signed char)0x55,(signed char)0xC3})),0xAA)); +// TEST_CONSTEXPR(match_i32(_mm_movemask_pi8((__m64)((__v2si){(int)0x80FF00AA,(int)0x7F0183E1})),0x3D)); +// TEST_CONSTEXPR(match_i32(_mm_movemask_pi8((__m64)((__v1){(long long)0xE110837A00924DB0ULL})),0xA5)); __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mul_su32 From 8ec97e148a4d7af67bef151d42fec2d37ce53beb Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Fri, 3 Oct 2025 15:12:12 -0700 Subject: [PATCH 4/5] Add back removed lit tests --- clang/test/CodeGen/X86/mmx-builtins.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index fa8d77bf73e9a..4694a34db1e68 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -7,7 +7,14 @@ // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror | FileCheck %s --implicit-check-not=x86mmx - +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=i386-apple-darwin -target-feature +ssse3 -fno-signed-char -emit-llvm -o - -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s --implicit-check-not=x86mmx #include From 0a21761dd4390b78f13978e7ed2453235081a132 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Fri, 3 Oct 2025 15:17:15 -0700 Subject: [PATCH 5/5] Remove stray test from another branch --- clang/test/CodeGen/X86/mmx-builtins.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 4694a34db1e68..a4494b69219da 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -401,9 +401,6 @@ int test_mm_movemask_pi8(__m64 a) { // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128( return _mm_movemask_pi8(a); } -TEST_CONSTEXPR(match_m64(_mm_movemask_pi8((__m64)((__v8qi){(signed char)0x7F,(signed char)0x80,(signed char)0x01,(signed char)0xFF,(signed char)0x00,(signed char)0xAA,(signed char)0x55,(signed char)0xC3})),0xAA)); -// TEST_CONSTEXPR(match_i32(_mm_movemask_pi8((__m64)((__v2si){(int)0x80FF00AA,(int)0x7F0183E1})),0x3D)); -// TEST_CONSTEXPR(match_i32(_mm_movemask_pi8((__m64)((__v1){(long long)0xE110837A00924DB0ULL})),0xA5)); __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mul_su32