From 79006fe91ed7d53329a956a65fb753dad2c8c6e2 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Fri, 3 Oct 2025 09:30:23 -0700 Subject: [PATCH 1/7] Stash --- clang/include/clang/Basic/BuiltinsX86.td | 16 ++++++--- clang/lib/AST/ExprConstant.cpp | 41 ++++++++++++++++++++++++ clang/lib/Headers/avx2intrin.h | 2 +- clang/lib/Headers/avxintrin.h | 4 +-- clang/lib/Headers/emmintrin.h | 4 +-- clang/lib/Headers/xmmintrin.h | 4 +-- clang/test/CodeGen/X86/mmx-builtins.c | 1 + 7 files changed, 60 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index a0181b7ae8f9d..7c14a54ed4c68 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -181,7 +181,7 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def cvttss2si : X86Builtin<"int(_Vector<4, float>)">; } -let Features = "sse", Attributes = [NoThrow, RequiredVectorWidth<128>] in { +let Features = "sse", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in { def movmskps : X86Builtin<"int(_Vector<4, float>)">; } @@ -207,7 +207,7 @@ let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">; } -let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "sse2", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in { def movmskpd : X86Builtin<"int(_Vector<2, double>)">; def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">; } @@ -526,6 +526,11 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">; } +let Features = "avx", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in { + def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">; + def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; +} + let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">; def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">; @@ -536,8 +541,6 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; - def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">; - def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; } let Features = "avx", Attributes = [NoThrow] in { @@ -572,6 +575,10 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; } +let Features = "avx2", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in { + def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; +} + let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">; @@ -583,7 +590,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">; def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">; - def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 7bf28d988f405..1c553e7780b1d 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13767,6 +13767,38 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info, bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { + auto EvalMoveMaskOp = [&]() { + APValue Source; + if (!Evaluate(Source, Info, E->getArg(0))) return false; + unsigned SourceLen = Source.getVectorLength(); + const VectorType *VT = E->getArg(0)->getType()->castAs(); + const QualType ElemQT = VT->getElementType(); + unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT); + + if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane + unsigned ByteLen = 8; + unsigned ResultLen = (LaneWidth * SourceLen) / ByteLen; + APInt Result(ResultLen, 0); + unsigned ResultIdx = 0; + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Lane = Source.getVectorElt(I).getInt(); + for (unsigned J = 0; J != LaneWidth; J=J+ByteLen) { + Result.setBitVal(ResultIdx++, Lane[J]); + } + } + return Success(Result, E); + } + if (ElemQT->isFloatingType()) { // Get sign bit of every lane + APInt Result(SourceLen, 0); + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt(); + Result.setBitVal(I, Lane[LaneWidth-1]); + } + return Success(Result, E); + } + return false; + }; + auto HandleMaskBinOp = [&](llvm::function_ref Fn) -> bool { @@ -14795,6 +14827,15 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success(CarryOut, E); } + case clang::X86::BI__builtin_ia32_movmskps: + case clang::X86::BI__builtin_ia32_movmskpd: + case clang::X86::BI__builtin_ia32_pmovmskb128: + case clang::X86::BI__builtin_ia32_pmovmskb256: + case clang::X86::BI__builtin_ia32_movmskps256: + case clang::X86::BI__builtin_ia32_movmskpd256: { + return EvalMoveMaskOp(); + } + case clang::X86::BI__builtin_ia32_bextr_u32: case clang::X86::BI__builtin_ia32_bextr_u64: case clang::X86::BI__builtin_ia32_bextri_u32: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 31759c5386d9f..08e99328fd1a0 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1306,7 +1306,7 @@ _mm256_min_epu32(__m256i __a, __m256i __b) { /// \param __a /// A 256-bit integer vector containing the source bytes. /// \returns The 32-bit integer mask. -static __inline__ int __DEFAULT_FN_ATTRS256 +static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR _mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index d6ba19a6c78af..356992aa66c75 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2960,7 +2960,7 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b) /// A 256-bit vector of [4 x double] containing the double-precision /// floating point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [3:0]. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movemask_pd(__m256d __a) { return __builtin_ia32_movmskpd256((__v4df)__a); @@ -2978,7 +2978,7 @@ _mm256_movemask_pd(__m256d __a) /// A 256-bit vector of [8 x float] containing the single-precision floating /// point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [7:0]. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR _mm256_movemask_ps(__m256 __a) { return __builtin_ia32_movmskps256((__v8sf)__a); diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index 6597e7e7d4030..c5436cd591549 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4280,7 +4280,7 @@ _mm_packus_epi16(__m128i __a, __m128i __b) { /// A 128-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bits from each 8-bit element in \a __a, /// written to bits [15:0]. The other bits are assigned zeros. -static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a) { return __builtin_ia32_pmovmskb128((__v16qi)__a); } @@ -4699,7 +4699,7 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) { /// be extracted. /// \returns The sign bits from each of the double-precision elements in \a __a, /// written to bits [1:0]. The remaining bits are assigned values of zero. -static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_pd(__m128d __a) { return __builtin_ia32_movmskpd((__v2df)__a); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index d876b4735a7d2..d89ed350edf71 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2416,7 +2416,7 @@ _mm_min_pu8(__m64 __a, __m64 __b) { /// A 64-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. -static __inline__ int __DEFAULT_FN_ATTRS_SSE2 +static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR _mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a)); @@ -3015,7 +3015,7 @@ _mm_cvtps_pi8(__m128 __a) /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each /// single-precision floating-point element of the parameter. Bits [31:4] are /// set to zero. -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_ps(__m128 __a) { return __builtin_ia32_movmskps((__v4sf)__a); diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index a4494b69219da..6d41a935e9572 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -402,6 +402,7 @@ int test_mm_movemask_pi8(__m64 a) { return _mm_movemask_pi8(a); } + __m64 test_mm_mul_su32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_mul_su32 // CHECK: and <2 x i64> {{%.*}}, splat (i64 4294967295) From baf0d69b15dbe57ac4c823e37a2ed1d7782b0822 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Fri, 3 Oct 2025 09:30:57 -0700 Subject: [PATCH 2/7] Clang-format --- clang/include/clang/Basic/BuiltinsX86.td | 18 ++++++++++++------ clang/lib/AST/ExprConstant.cpp | 7 ++++--- clang/lib/Headers/avx2intrin.h | 3 +-- clang/lib/Headers/avxintrin.h | 6 ++---- clang/lib/Headers/emmintrin.h | 6 ++++-- clang/lib/Headers/xmmintrin.h | 7 ++----- 6 files changed, 25 insertions(+), 22 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 7c14a54ed4c68..6d95fb95978f5 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -181,7 +181,8 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def cvttss2si : X86Builtin<"int(_Vector<4, float>)">; } -let Features = "sse", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in { +let Features = "sse", + Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in { def movmskps : X86Builtin<"int(_Vector<4, float>)">; } @@ -207,7 +208,8 @@ let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">; } -let Features = "sse2", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in { +let Features = "sse2", + Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in { def movmskpd : X86Builtin<"int(_Vector<2, double>)">; def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">; } @@ -526,7 +528,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">; } -let Features = "avx", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx", + Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in { def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">; def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; } @@ -540,7 +543,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in def vtestnzcps256 : X86Builtin<"int(_Vector<8, float>, _Vector<8, float>)">; def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; - def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; + def ptestnzc256 + : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">; } let Features = "avx", Attributes = [NoThrow] in { @@ -575,7 +579,8 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; } -let Features = "avx2", Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in { +let Features = "avx2", + Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in { def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; } @@ -589,7 +594,8 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">; - def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">; + def pmaddwd256 + : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">; def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1c553e7780b1d..1efbe095f79db 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13769,7 +13769,8 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, auto EvalMoveMaskOp = [&]() { APValue Source; - if (!Evaluate(Source, Info, E->getArg(0))) return false; + if (!Evaluate(Source, Info, E->getArg(0))) + return false; unsigned SourceLen = Source.getVectorLength(); const VectorType *VT = E->getArg(0)->getType()->castAs(); const QualType ElemQT = VT->getElementType(); @@ -13782,7 +13783,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned ResultIdx = 0; for (unsigned I = 0; I != SourceLen; ++I) { APInt Lane = Source.getVectorElt(I).getInt(); - for (unsigned J = 0; J != LaneWidth; J=J+ByteLen) { + for (unsigned J = 0; J != LaneWidth; J = J + ByteLen) { Result.setBitVal(ResultIdx++, Lane[J]); } } @@ -13792,7 +13793,7 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, APInt Result(SourceLen, 0); for (unsigned I = 0; I != SourceLen; ++I) { APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt(); - Result.setBitVal(I, Lane[LaneWidth-1]); + Result.setBitVal(I, Lane[LaneWidth - 1]); } return Success(Result, E); } diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index 08e99328fd1a0..133def7b496ec 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1307,8 +1307,7 @@ _mm256_min_epu32(__m256i __a, __m256i __b) { /// A 256-bit integer vector containing the source bytes. /// \returns The 32-bit integer mask. static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR -_mm256_movemask_epi8(__m256i __a) -{ +_mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h index 356992aa66c75..60c6f7a44a323 100644 --- a/clang/lib/Headers/avxintrin.h +++ b/clang/lib/Headers/avxintrin.h @@ -2961,8 +2961,7 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b) /// floating point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [3:0]. static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_movemask_pd(__m256d __a) -{ +_mm256_movemask_pd(__m256d __a) { return __builtin_ia32_movmskpd256((__v4df)__a); } @@ -2979,8 +2978,7 @@ _mm256_movemask_pd(__m256d __a) /// point values with sign bits to be extracted. /// \returns The sign bits from the operand, written to bits [7:0]. static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR -_mm256_movemask_ps(__m256 __a) -{ +_mm256_movemask_ps(__m256 __a) { return __builtin_ia32_movmskps256((__v8sf)__a); } diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h index c5436cd591549..11ba0919152e8 100644 --- a/clang/lib/Headers/emmintrin.h +++ b/clang/lib/Headers/emmintrin.h @@ -4280,7 +4280,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b) { /// A 128-bit integer vector containing the values with bits to be extracted. /// \returns The most significant bits from each 8-bit element in \a __a, /// written to bits [15:0]. The other bits are assigned zeros. -static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_epi8(__m128i __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movemask_epi8(__m128i __a) { return __builtin_ia32_pmovmskb128((__v16qi)__a); } @@ -4699,7 +4700,8 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) { /// be extracted. /// \returns The sign bits from each of the double-precision elements in \a __a, /// written to bits [1:0]. The remaining bits are assigned values of zero. -static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_pd(__m128d __a) { +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_movemask_pd(__m128d __a) { return __builtin_ia32_movmskpd((__v2df)__a); } diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h index d89ed350edf71..fe6afdcfc3fdb 100644 --- a/clang/lib/Headers/xmmintrin.h +++ b/clang/lib/Headers/xmmintrin.h @@ -2417,8 +2417,7 @@ _mm_min_pu8(__m64 __a, __m64 __b) { /// \returns The most significant bit from each 8-bit element in \a __a, /// written to bits [7:0]. static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR -_mm_movemask_pi8(__m64 __a) -{ +_mm_movemask_pi8(__m64 __a) { return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a)); } @@ -3015,9 +3014,7 @@ _mm_cvtps_pi8(__m128 __a) /// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each /// single-precision floating-point element of the parameter. Bits [31:4] are /// set to zero. -static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR -_mm_movemask_ps(__m128 __a) -{ +static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_ps(__m128 __a) { return __builtin_ia32_movmskps((__v4sf)__a); } From 79c747eaba378215817956a5570f10507f7fff00 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Fri, 3 Oct 2025 14:46:43 -0700 Subject: [PATCH 3/7] Add testcases and handling in new evaluator --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 49 ++++++++++++++++++ clang/lib/AST/ExprConstant.cpp | 65 +++++++++++------------- clang/test/CodeGen/X86/avx-builtins.c | 4 ++ clang/test/CodeGen/X86/avx2-builtins.c | 3 ++ clang/test/CodeGen/X86/mmx-builtins.c | 3 ++ clang/test/CodeGen/X86/sse-builtins.c | 2 + clang/test/CodeGen/X86/sse2-builtins.c | 5 ++ 7 files changed, 96 insertions(+), 35 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6053237b1a261..7a7375fcb7974 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2817,6 +2817,46 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 1); + + const Pointer &Source = S.Stk.pop(); + + unsigned SourceLen = Source.getNumElems(); + const QualType ElemQT = getElemType(Source); + const OptPrimType ElemPT = S.getContext().classify(ElemQT); + unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT); + + if (ElemQT->isIntegerType()) { + unsigned Byte = 8; + unsigned ResultLen = (LaneWidth * SourceLen) / Byte; + APInt Result(ResultLen, 0); + unsigned ResultIdx = 0; + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Lane; + INT_TYPE_SWITCH_NO_BOOL(*ElemPT, + { Lane = Source.elem(I).toAPSInt(); }); + for (unsigned J = 0; J != LaneWidth; J += Byte) { + Result.setBitVal(ResultIdx++, Lane[J + 7]); + } + } + pushInteger(S, Result.getZExtValue(), Call->getType()); + return true; + } + if (ElemQT->isFloatingType()) { + APInt Result(SourceLen, 0); + using T = PrimConv::T; + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Lane = Source.elem(I).getAPFloat().bitcastToAPInt(); + Result.setBitVal(I, Lane[LaneWidth - 1]); + } + pushInteger(S, Result.getZExtValue(), Call->getType()); + return true; + } + return false; +} + static bool interp__builtin_elementwise_triop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref @@ -3454,6 +3494,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS); }); + case clang::X86::BI__builtin_ia32_movmskps: + case clang::X86::BI__builtin_ia32_movmskpd: + case clang::X86::BI__builtin_ia32_pmovmskb128: + case clang::X86::BI__builtin_ia32_pmovmskb256: + case clang::X86::BI__builtin_ia32_movmskps256: + case clang::X86::BI__builtin_ia32_movmskpd256: { + return interp__builtin_ia32_movmsk_op(S, OpPC, Call); + } + case clang::X86::BI__builtin_ia32_pavgb128: case clang::X86::BI__builtin_ia32_pavgw128: case clang::X86::BI__builtin_ia32_pavgb256: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 1efbe095f79db..389d23e60a812 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -13766,40 +13766,6 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info, bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, unsigned BuiltinOp) { - - auto EvalMoveMaskOp = [&]() { - APValue Source; - if (!Evaluate(Source, Info, E->getArg(0))) - return false; - unsigned SourceLen = Source.getVectorLength(); - const VectorType *VT = E->getArg(0)->getType()->castAs(); - const QualType ElemQT = VT->getElementType(); - unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT); - - if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane - unsigned ByteLen = 8; - unsigned ResultLen = (LaneWidth * SourceLen) / ByteLen; - APInt Result(ResultLen, 0); - unsigned ResultIdx = 0; - for (unsigned I = 0; I != SourceLen; ++I) { - APInt Lane = Source.getVectorElt(I).getInt(); - for (unsigned J = 0; J != LaneWidth; J = J + ByteLen) { - Result.setBitVal(ResultIdx++, Lane[J]); - } - } - return Success(Result, E); - } - if (ElemQT->isFloatingType()) { // Get sign bit of every lane - APInt Result(SourceLen, 0); - for (unsigned I = 0; I != SourceLen; ++I) { - APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt(); - Result.setBitVal(I, Lane[LaneWidth - 1]); - } - return Success(Result, E); - } - return false; - }; - auto HandleMaskBinOp = [&](llvm::function_ref Fn) -> bool { @@ -14834,7 +14800,36 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, case clang::X86::BI__builtin_ia32_pmovmskb256: case clang::X86::BI__builtin_ia32_movmskps256: case clang::X86::BI__builtin_ia32_movmskpd256: { - return EvalMoveMaskOp(); + APValue Source; + if (!Evaluate(Source, Info, E->getArg(0))) + return false; + unsigned SourceLen = Source.getVectorLength(); + const VectorType *VT = E->getArg(0)->getType()->castAs(); + const QualType ElemQT = VT->getElementType(); + unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT); + + if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane + unsigned Byte = 8; + unsigned ResultLen = (LaneWidth * SourceLen) / Byte; + APInt Result(ResultLen, 0); + unsigned ResultIdx = 0; + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Lane = Source.getVectorElt(I).getInt(); + for (unsigned J = 0; J != LaneWidth; J += Byte) { + Result.setBitVal(ResultIdx++, Lane[J + 7]); + } + } + return Success(Result.getZExtValue(), E); + } + if (ElemQT->isFloatingType()) { // Get sign bit of every lane + APInt Result(SourceLen, 0); + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt(); + Result.setBitVal(I, Lane[LaneWidth - 1]); + } + return Success(Result.getZExtValue(), E); + } + return false; } case clang::X86::BI__builtin_ia32_bextr_u32: diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c index 5f08b6be81ab7..222a3a48bae62 100644 --- a/clang/test/CodeGen/X86/avx-builtins.c +++ b/clang/test/CodeGen/X86/avx-builtins.c @@ -1337,12 +1337,16 @@ int test_mm256_movemask_pd(__m256d A) { // CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}}) return _mm256_movemask_pd(A); } +TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-1234.5678901234, 98765.4321098765, 0.000123456789, -3.14159265358979}) == 0x9); +TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-0.000000987654321, -99999.999999999, 42.424242424242, 314159.2653589793}) == 0x3); int test_mm256_movemask_ps(__m256 A) { // CHECK-LABEL: test_mm256_movemask_ps // CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}}) return _mm256_movemask_ps(A); } +TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){-12.3456f, 34.7890f, -0.0001234f, 123456.78f, -987.654f, 0.001234f, 3.14159f, -256.001f}) == 0x95); +TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){0.333333f, -45.6789f, 999.999f, -0.9999f, 17.234f, -128.512f, 2048.0f, -3.14f}) == 0xAA); __m256d test_mm256_mul_pd(__m256d A, __m256d B) { // CHECK-LABEL: test_mm256_mul_pd diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 4299b18243f21..c27291c010962 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -966,6 +966,9 @@ int test_mm256_movemask_epi8(__m256i a) { // CHECK: call {{.*}}i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}}) return _mm256_movemask_epi8(a); } +TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v32qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01,0xB6,0x00,0x39,0x40,0xD0,0x05,0x80,0x2A,0x7B,0x00,0x90,0xFF,0x01,0x34,0xC0,0x6D}) == 0x4C516AAA); +TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v8si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001,(int)0x00000000,(int)0xFFFFFFFF,(int)0x12345678,(int)0x90ABCDEF}) == 0xF0F08F3D); +TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v4du){0xFF00000000000080ULL,0x7F010203040506C3ULL,0x8000000000000000ULL,0x0123456789ABCDEFULL}) == 0x0F800181); __m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) { // CHECK-LABEL: test_mm256_mpsadbw_epu8 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 6d41a935e9572..0d1bce3214a06 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -401,6 +401,9 @@ int test_mm_movemask_pi8(__m64 a) { // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128( return _mm_movemask_pi8(a); } +TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v8qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3})) == 0xAA); +TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v2si){(int)0x80FF00AA,(int)0x7F0183E1})) == 0x3D); +TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v1di){(long long)0xE110837A00924DB0ULL})) == 0xA5); __m64 test_mm_mul_su32(__m64 a, __m64 b) { diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c index 3bad3426b1586..f5c1d00d1b851 100644 --- a/clang/test/CodeGen/X86/sse-builtins.c +++ b/clang/test/CodeGen/X86/sse-builtins.c @@ -561,6 +561,8 @@ int test_mm_movemask_ps(__m128 A) { // CHECK: call {{.*}}i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}}) return _mm_movemask_ps(A); } +TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-2.0f, 3.0f, -5.5f, -0.0f}) == 0xD); +TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-7.348215e5, 0.00314159, -12.789, 2.7182818}) == 0x5); __m128 test_mm_mul_ps(__m128 A, __m128 B) { // CHECK-LABEL: test_mm_mul_ps diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 8428fd6540ac9..8a54b2bc8bbd0 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -955,12 +955,17 @@ int test_mm_movemask_epi8(__m128i A) { // CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}}) return _mm_movemask_epi8(A); } +TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v16qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01}) == 0x6AAA); +TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v4si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001}) == 0x8F3D); +TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v2du){0xFF00000000000080ULL,0x7F010203040506C3ULL}) == 0x181); int test_mm_movemask_pd(__m128d A) { // CHECK-LABEL: test_mm_movemask_pd // CHECK: call {{.*}}i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}}) return _mm_movemask_pd(A); } +TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){-12345.67890123, 4567.89012345}) == 0x1); +TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){0.0000987654321, 09876.5432109876}) == 0x0); __m128i test_mm_mul_epu32(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_mul_epu32 From 32f0a7c88b0cd1b37ea067806e4de285367e783c Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Tue, 7 Oct 2025 11:02:40 -0700 Subject: [PATCH 4/7] Stash --- clang/include/clang/Basic/BuiltinsX86.td | 14 +++++--------- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 4 ++-- 2 files changed, 7 insertions(+), 11 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 6d95fb95978f5..3c51352ce6107 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -182,7 +182,7 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in } let Features = "sse", - Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in { + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def movmskps : X86Builtin<"int(_Vector<4, float>)">; } @@ -208,12 +208,6 @@ let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in { def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">; } -let Features = "sse2", - Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in { - def movmskpd : X86Builtin<"int(_Vector<2, double>)">; - def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">; -} - let Features = "sse2", Attributes = [NoThrow] in { def movnti : X86Builtin<"void(int *, int)">; } @@ -222,6 +216,8 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; + def movmskpd : X86Builtin<"int(_Vector<2, double>)">; + def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">; } let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { @@ -529,7 +525,7 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in } let Features = "avx", - Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in { + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">; def movmskps256 : X86Builtin<"int(_Vector<8, float>)">; } @@ -580,7 +576,7 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid } let Features = "avx2", - Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in { + Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 7a7375fcb7974..91f7f8b8c8b15 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2824,8 +2824,8 @@ static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC, const Pointer &Source = S.Stk.pop(); unsigned SourceLen = Source.getNumElems(); - const QualType ElemQT = getElemType(Source); - const OptPrimType ElemPT = S.getContext().classify(ElemQT); + QualType ElemQT = getElemType(Source); + OptPrimType ElemPT = S.getContext().classify(ElemQT); unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT); if (ElemQT->isIntegerType()) { From 15076445a4c60561e6d865c22d83c9d5b3647c55 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Tue, 7 Oct 2025 11:32:05 -0700 Subject: [PATCH 5/7] Apply feedback for InterpBuiltin.cpp --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 32 ++++++++++++------------ 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 91f7f8b8c8b15..eb647f95713ce 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2826,32 +2826,32 @@ static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC, unsigned SourceLen = Source.getNumElems(); QualType ElemQT = getElemType(Source); OptPrimType ElemPT = S.getContext().classify(ElemQT); - unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT); + unsigned ResultLen = + S.getASTContext().getTypeSize(Call->getType()); // Always 32-bit integer. + APInt Result(ResultLen, 0); if (ElemQT->isIntegerType()) { - unsigned Byte = 8; - unsigned ResultLen = (LaneWidth * SourceLen) / Byte; - APInt Result(ResultLen, 0); + unsigned BitsInAByte = 8; + unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT); unsigned ResultIdx = 0; - for (unsigned I = 0; I != SourceLen; ++I) { - APInt Lane; - INT_TYPE_SWITCH_NO_BOOL(*ElemPT, - { Lane = Source.elem(I).toAPSInt(); }); - for (unsigned J = 0; J != LaneWidth; J += Byte) { - Result.setBitVal(ResultIdx++, Lane[J + 7]); + INT_TYPE_SWITCH_NO_BOOL(*ElemPT, { + for (unsigned I = 0; I != SourceLen; ++I) { + APInt Lane = Source.elem(I).toAPSInt(); + for (unsigned J = 0; J != LaneWidth; J += BitsInAByte) { + Result.setBitVal(ResultIdx++, Lane[J + 7]); + } } - } - pushInteger(S, Result.getZExtValue(), Call->getType()); + }); + pushInteger(S, Result, Call->getType()); return true; } - if (ElemQT->isFloatingType()) { - APInt Result(SourceLen, 0); + if (ElemQT->isRealFloatingType()) { using T = PrimConv::T; for (unsigned I = 0; I != SourceLen; ++I) { APInt Lane = Source.elem(I).getAPFloat().bitcastToAPInt(); - Result.setBitVal(I, Lane[LaneWidth - 1]); + Result.setBitVal(I, Lane.isNegative()); } - pushInteger(S, Result.getZExtValue(), Call->getType()); + pushInteger(S, Result, Call->getType()); return true; } return false; From f9b8e745d9a72df7527ff3c36c9b0aee59d70131 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Tue, 7 Oct 2025 11:45:22 -0700 Subject: [PATCH 6/7] Apply feedback for ExprConstant.cpp --- clang/lib/AST/ExprConstant.cpp | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 389d23e60a812..f0b37ff891d21 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -14805,29 +14805,29 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return false; unsigned SourceLen = Source.getVectorLength(); const VectorType *VT = E->getArg(0)->getType()->castAs(); - const QualType ElemQT = VT->getElementType(); - unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT); - - if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane - unsigned Byte = 8; - unsigned ResultLen = (LaneWidth * SourceLen) / Byte; - APInt Result(ResultLen, 0); + QualType ElemQT = VT->getElementType(); + unsigned ResultLen = Info.Ctx.getTypeSize( + E->getCallReturnType(Info.Ctx)); // Always 32-bit integer. + APInt Result(ResultLen, 0); + + if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane. + unsigned BitsInAByte = 8; + unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT); unsigned ResultIdx = 0; for (unsigned I = 0; I != SourceLen; ++I) { APInt Lane = Source.getVectorElt(I).getInt(); - for (unsigned J = 0; J != LaneWidth; J += Byte) { + for (unsigned J = 0; J != LaneWidth; J += BitsInAByte) { Result.setBitVal(ResultIdx++, Lane[J + 7]); } } - return Success(Result.getZExtValue(), E); + return Success(Result, E); } - if (ElemQT->isFloatingType()) { // Get sign bit of every lane - APInt Result(SourceLen, 0); + if (ElemQT->isRealFloatingType()) { // Get sign bit of every lane. for (unsigned I = 0; I != SourceLen; ++I) { APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt(); - Result.setBitVal(I, Lane[LaneWidth - 1]); + Result.setBitVal(I, Lane.isNegative()); } - return Success(Result.getZExtValue(), E); + return Success(Result, E); } return false; } From d460cbf3c0104f07564bace1e02c59b392c461b4 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Fri, 10 Oct 2025 08:06:41 -0700 Subject: [PATCH 7/7] Rebase --- clang/include/clang/Basic/BuiltinsX86.td | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index fc7a2a8f80059..33e54ba07b4ce 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -579,11 +579,6 @@ let Features = "avx", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWid def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">; } -let Features = "avx2", - Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { - def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">; -} - let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;