From 71542e71f09e2bcbcedc3c54b6b8d4234645624f Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Mon, 20 Oct 2025 11:32:46 -0700 Subject: [PATCH 1/6] Squash --- clang/include/clang/Basic/BuiltinsX86.td | 22 +++++++---- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 13 +++++++ clang/lib/AST/ExprConstant.cpp | 13 +++++++ clang/lib/Headers/avx2intrin.h | 21 +++++------ clang/lib/Headers/tmmintrin.h | 48 +++++++++++------------- clang/test/CodeGen/X86/avx2-builtins.c | 7 ++++ clang/test/CodeGen/X86/mmx-builtins.c | 5 +++ clang/test/CodeGen/X86/ssse3-builtins.c | 3 ++ 8 files changed, 86 insertions(+), 46 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 54b3ce08f5625..0b49bd0f7f253 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -123,13 +123,13 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in { } } - let Features = "ssse3" in { - def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; - def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; - def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; - } - let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { + def psignb128 + : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; + def psignw128 + : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; + def psignd128 + : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">; def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">; @@ -677,7 +677,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - + + def psignb256 + : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; + def psignw256 + : X86Builtin< + "_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; + def psignd256 + : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 2d5ad4a7a92cf..df4bbee61b109 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3809,6 +3809,19 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return interp__builtin_ia32_movmsk_op(S, OpPC, Call); } + case X86::BI__builtin_ia32_psignb128: + case X86::BI__builtin_ia32_psignb256: + case X86::BI__builtin_ia32_psignw128: + case X86::BI__builtin_ia32_psignw256: + case X86::BI__builtin_ia32_psignd128: + case X86::BI__builtin_ia32_psignd256: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &AElem, const APSInt &BElem) -> APInt { + return BElem.isNegative() ? static_cast(-AElem) + : BElem.isZero() ? APInt(AElem.getBitWidth(), 0) + : static_cast(AElem); + }); + case clang::X86::BI__builtin_ia32_pavgb128: case clang::X86::BI__builtin_ia32_pavgw128: case clang::X86::BI__builtin_ia32_pavgb256: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 00aaaab957591..0743248507b00 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12312,6 +12312,19 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_psignb128: + case X86::BI__builtin_ia32_psignb256: + case X86::BI__builtin_ia32_psignw128: + case X86::BI__builtin_ia32_psignw256: + case X86::BI__builtin_ia32_psignd128: + case X86::BI__builtin_ia32_psignd256: + return EvaluateBinOpExpr( + [](const APSInt &AElem, const APSInt &BElem) -> APInt { + return BElem.isNegative() ? static_cast(-AElem) + : BElem.isZero() ? APInt(AElem.getBitWidth(), 0) + : static_cast(AElem); + }); + case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: case X86::BI__builtin_ia32_blendvps: diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h index fdb825fbbd134..3cbaaece7b38e 100644 --- a/clang/lib/Headers/avx2intrin.h +++ b/clang/lib/Headers/avx2intrin.h @@ -1975,10 +1975,9 @@ _mm256_shuffle_epi8(__m256i __a, __m256i __b) { /// \param __b /// A 256-bit integer vector]. /// \returns A 256-bit integer vector containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi8(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi8(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } /// Sets each element of the result to the corresponding element of the @@ -1996,10 +1995,9 @@ _mm256_sign_epi8(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [16 x i16]. /// \returns A 256-bit vector of [16 x i16] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi16(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi16(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } /// Sets each element of the result to the corresponding element of the @@ -2017,10 +2015,9 @@ _mm256_sign_epi16(__m256i __a, __m256i __b) /// \param __b /// A 256-bit vector of [8 x i32]. /// \returns A 256-bit vector of [8 x i32] containing the result. -static __inline__ __m256i __DEFAULT_FN_ATTRS256 -_mm256_sign_epi32(__m256i __a, __m256i __b) -{ - return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); +static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR +_mm256_sign_epi32(__m256i __a, __m256i __b) { + return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); } /// Shifts each 128-bit half of the 256-bit integer vector \a a left by diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 5d0f20f4d527d..95b32c0087ecb 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -641,10 +641,9 @@ _mm_shuffle_pi8(__m64 __a, __m64 __b) { /// A 128-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi8(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi8(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignb128((__v16qi)__a, (__v16qi)__b); } /// For each 16-bit integer in the first source operand, perform one of @@ -667,10 +666,9 @@ _mm_sign_epi8(__m128i __a, __m128i __b) /// A 128-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi16(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi16(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignw128((__v8hi)__a, (__v8hi)__b); } /// For each 32-bit integer in the first source operand, perform one of @@ -693,10 +691,9 @@ _mm_sign_epi16(__m128i __a, __m128i __b) /// A 128-bit integer vector containing control doublewords corresponding to /// positions in the destination. /// \returns A 128-bit integer vector containing the resultant values. -static __inline__ __m128i __DEFAULT_FN_ATTRS -_mm_sign_epi32(__m128i __a, __m128i __b) -{ - return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_sign_epi32(__m128i __a, __m128i __b) { + return (__m128i)__builtin_ia32_psignd128((__v4si)__a, (__v4si)__b); } /// For each 8-bit integer in the first source operand, perform one of @@ -719,11 +716,10 @@ _mm_sign_epi32(__m128i __a, __m128i __b) /// A 64-bit integer vector containing control bytes corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi8(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignb128((__v16qi)__anyext128(__a), - (__v16qi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi8(__m64 __a, + __m64 __b) { + return __trunc64(__builtin_ia32_psignb128((__v16qi)__zext128(__a), + (__v16qi)__zext128(__b))); } /// For each 16-bit integer in the first source operand, perform one of @@ -746,11 +742,10 @@ _mm_sign_pi8(__m64 __a, __m64 __b) /// A 64-bit integer vector containing control words corresponding to /// positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi16(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignw128((__v8hi)__anyext128(__a), - (__v8hi)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi16(__m64 __a, + __m64 __b) { + return __trunc64( + __builtin_ia32_psignw128((__v8hi)__zext128(__a), (__v8hi)__zext128(__b))); } /// For each 32-bit integer in the first source operand, perform one of @@ -773,11 +768,10 @@ _mm_sign_pi16(__m64 __a, __m64 __b) /// A 64-bit integer vector containing two control doublewords corresponding /// to positions in the destination. /// \returns A 64-bit integer vector containing the resultant values. -static __inline__ __m64 __DEFAULT_FN_ATTRS -_mm_sign_pi32(__m64 __a, __m64 __b) -{ - return __trunc64(__builtin_ia32_psignd128((__v4si)__anyext128(__a), - (__v4si)__anyext128(__b))); +static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a, + __m64 __b) { + return __trunc64( + __builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b))); } #undef __anyext128 diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 03b1bdeb55d7c..557ba26e50331 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1155,23 +1155,30 @@ __m256i test_mm256_shufflelo_epi16(__m256i a) { return _mm256_shufflelo_epi16(a, 83); } TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) ); + __m256i test_mm256_sign_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sign_epi8 // CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) return _mm256_sign_epi8(a, b); } +TEST_CONSTEXPR(match_v32qi(_mm256_sign_epi8( + (__m256i)(__v32qi){'B','r','i','g','h','t','n','e','o','n','f','o','x','j','u','m','p','s','o','v','e','r','p','r','o','g','r','a','m','m','e','r'}, + (__m256i)(__v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'t','h','i','s'}), + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'m','m','e','r')); __m256i test_mm256_sign_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sign_epi16 // CHECK: call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_sign_epi16(a, b); } +TEST_CONSTEXPR(match_v16hi(_mm256_sign_epi16((__m256i)(__v16hi){0x77,0x77,0xbe,0xbe, -0x9,-0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0}, (__m256i)(__v16hi){-1,-256,1,256, -512,-1028,512,1028, -2048,-4096,'h','i', 'b','y','e','!'}), -0x77,-0x77,0xbe,0xbe, 0x9,0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0)); __m256i test_mm256_sign_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sign_epi32 // CHECK: call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_sign_epi32(a, b); } +TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed,'o','o','p','s'}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, -'o',-'o',-'p',-'s')); __m256i test_mm256_slli_epi16(__m256i a) { // CHECK-LABEL: test_mm256_slli_epi16 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index 767425577a130..273138063a1b1 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -602,23 +602,28 @@ __m64 test_mm_shuffle_pi16(__m64 a) { return _mm_shuffle_pi16(a, 3); } TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0)); + __m64 test_mm_sign_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sign_pi8 // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128( return _mm_sign_pi8(a, b); } +TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){0,0,0,0, 0,0,0,0}, (__m64)(__v8qi){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8qi(_mm_sign_pi8((__m64)(__v8qi){6,7,6,7, 6,7,6,7}, (__m64)(__v8qi){1,1,1,1, 0,0,0,0}), 6,7,6,7, 0,0,0,0)); __m64 test_mm_sign_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sign_pi16 // CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128( return _mm_sign_pi16(a, b); } +TEST_CONSTEXPR(match_v4hi(_mm_sign_pi16((__m64)(__v4hi){-1,0,1,0}, (__m64)(__v4hi){1,0,-1,0}), -1,0,-1,0)); __m64 test_mm_sign_pi32(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sign_pi32 // CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128( return _mm_sign_pi32(a, b); } +TEST_CONSTEXPR(match_v2si(_mm_sign_pi32((__m64)(__v2si){0x7FFF, -1}, (__m64)(__v2si){-1, 0x7FFF}), -0x7FFF, -1)); __m64 test_mm_sll_pi16(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sll_pi16 diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index f70afc01a1963..d3fde1e2de653 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -125,15 +125,18 @@ __m128i test_mm_sign_epi8(__m128i a, __m128i b) { // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_sign_epi8(a, b); } +TEST_CONSTEXPR(match_v16qi(_mm_sign_epi8((__m128i)(__v16qi){'g','r','i','n','d','i','n','g', 'l','e','e','t','c','o','d','e'}, (__m128i)(__v16qi){0,1,0,1, 1,1,0,0, 0,0,1,1, 1,0,1,0}), 0,'r',0,'n', 'd','i',0,0, 0,0,'e','t', 'c',0,'d',0)); __m128i test_mm_sign_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_sign_epi16 // CHECK: call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}) return _mm_sign_epi16(a, b); } +TEST_CONSTEXPR(match_v8hi(_mm_sign_epi16((__m128i)(__v8hi){0,-2,0,-4,0,-6,0,-8}, (__m128i)(__v8hi){-1,-2,-3,-4,-5,-6,7,-8}), 0,2,0,4,0,6,0,8)); __m128i test_mm_sign_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_sign_epi32 // CHECK: call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) return _mm_sign_epi32(a, b); } +TEST_CONSTEXPR(match_v4si(_mm_sign_epi32((__m128i)(__v4si){-1,-2,-3,-4}, (__m128i)(__v4si){-4,-3,-2,-1}), 1,2,3,4)); From d996020029f60abba27e9dedbe2d2f9846b4b94e Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Mon, 20 Oct 2025 11:48:01 -0700 Subject: [PATCH 2/6] Fix rebase --- clang/include/clang/Basic/BuiltinsX86.td | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 0b49bd0f7f253..8332eac2a890c 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -603,10 +603,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">; def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, " "_Vector<32, char>, _Constant int)">; - def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; - def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; - def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; - def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; + def psadbw256 + : X86Builtin< + "_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; From 29202f4e4cf7368565e16116ded127f15399c5e8 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Tue, 21 Oct 2025 02:29:50 -0700 Subject: [PATCH 3/6] Fix testcases --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 7 ++++--- clang/lib/AST/ExprConstant.cpp | 12 ++++++------ clang/test/CodeGen/X86/avx2-builtins.c | 10 +++++----- clang/test/CodeGen/X86/ssse3-builtins.c | 2 +- 4 files changed, 16 insertions(+), 15 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index df4bbee61b109..b539e032ddbbc 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3817,9 +3817,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_psignd256: return interp__builtin_elementwise_int_binop( S, OpPC, Call, [](const APSInt &AElem, const APSInt &BElem) -> APInt { - return BElem.isNegative() ? static_cast(-AElem) - : BElem.isZero() ? APInt(AElem.getBitWidth(), 0) - : static_cast(AElem); + return BElem[BElem.getBitWidth() - 1] + ? static_cast(-AElem) + : BElem.isZero() ? APInt(AElem.getBitWidth(), 0) + : static_cast(AElem); }); case clang::X86::BI__builtin_ia32_pavgb128: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 0743248507b00..f2047f60c8d62 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12318,12 +12318,12 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_psignw256: case X86::BI__builtin_ia32_psignd128: case X86::BI__builtin_ia32_psignd256: - return EvaluateBinOpExpr( - [](const APSInt &AElem, const APSInt &BElem) -> APInt { - return BElem.isNegative() ? static_cast(-AElem) - : BElem.isZero() ? APInt(AElem.getBitWidth(), 0) - : static_cast(AElem); - }); + return EvaluateBinOpExpr([](const APSInt &AElem, + const APSInt &BElem) -> APInt { + return BElem[BElem.getBitWidth() - 1] ? static_cast(-AElem) + : BElem.isZero() ? APInt(AElem.getBitWidth(), 0) + : static_cast(AElem); + }); case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index 557ba26e50331..5c52d84de56f5 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1162,23 +1162,23 @@ __m256i test_mm256_sign_epi8(__m256i a, __m256i b) { return _mm256_sign_epi8(a, b); } TEST_CONSTEXPR(match_v32qi(_mm256_sign_epi8( - (__m256i)(__v32qi){'B','r','i','g','h','t','n','e','o','n','f','o','x','j','u','m','p','s','o','v','e','r','p','r','o','g','r','a','m','m','e','r'}, - (__m256i)(__v32qi){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'t','h','i','s'}), - 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,'m','m','e','r')); + (__m256i)(__v32qs){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1}, + (__m256i)(__v32qs){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1}), + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,-1)); __m256i test_mm256_sign_epi16(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sign_epi16 // CHECK: call <16 x i16> @llvm.x86.avx2.psign.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}) return _mm256_sign_epi16(a, b); } -TEST_CONSTEXPR(match_v16hi(_mm256_sign_epi16((__m256i)(__v16hi){0x77,0x77,0xbe,0xbe, -0x9,-0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0}, (__m256i)(__v16hi){-1,-256,1,256, -512,-1028,512,1028, -2048,-4096,'h','i', 'b','y','e','!'}), -0x77,-0x77,0xbe,0xbe, 0x9,0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_sign_epi16((__m256i)(__v16hi){0x77,0x77,0xbe,0xbe, -0x9,-0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0}, (__m256i)(__v16hi){-1,-256,1,256, -512,-1028,512,1028, -2048,-4096,0,0, 0,0,0,0}), -0x77,-0x77,0xbe,0xbe, 0x9,0x9,-0x8,-0x8, 0,0,0,0, 0,0,0,0)); __m256i test_mm256_sign_epi32(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sign_epi32 // CHECK: call <8 x i32> @llvm.x86.avx2.psign.d(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) return _mm256_sign_epi32(a, b); } -TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed,'o','o','p','s'}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, -'o',-'o',-'p',-'s')); +TEST_CONSTEXPR(match_v8si(_mm256_sign_epi32((__m256i)(__v8si){0xbeef,0xfeed,0xbead,0xdeed, -1,2,-3,4}, (__m256i)(__v8si){0,0,0,0,-1,-1,-1,-1}), 0,0,0,0, 1,-2,3,-4)); __m256i test_mm256_slli_epi16(__m256i a) { // CHECK-LABEL: test_mm256_slli_epi16 diff --git a/clang/test/CodeGen/X86/ssse3-builtins.c b/clang/test/CodeGen/X86/ssse3-builtins.c index d3fde1e2de653..b7a4a2fe7ccd7 100644 --- a/clang/test/CodeGen/X86/ssse3-builtins.c +++ b/clang/test/CodeGen/X86/ssse3-builtins.c @@ -125,7 +125,7 @@ __m128i test_mm_sign_epi8(__m128i a, __m128i b) { // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %{{.*}}, <16 x i8> %{{.*}}) return _mm_sign_epi8(a, b); } -TEST_CONSTEXPR(match_v16qi(_mm_sign_epi8((__m128i)(__v16qi){'g','r','i','n','d','i','n','g', 'l','e','e','t','c','o','d','e'}, (__m128i)(__v16qi){0,1,0,1, 1,1,0,0, 0,0,1,1, 1,0,1,0}), 0,'r',0,'n', 'd','i',0,0, 0,0,'e','t', 'c',0,'d',0)); +TEST_CONSTEXPR(match_v16qi(_mm_sign_epi8((__m128i)(__v16qs){11,0,13,14, 0,16,17,18, 19,20,21,22, 23,24,25,26}, (__m128i)(__v16qs){0,1,0,1, -1,1,0,0, 0,0,1,1, -1,0,-1,0}), 0,0,0,14, 0,16,0,0, 0,0,21,22, -23,0,-25,0)); __m128i test_mm_sign_epi16(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_sign_epi16 From a64af807b94352403bfedf4e60e6b31db381f7d6 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Tue, 21 Oct 2025 20:59:47 -0700 Subject: [PATCH 4/6] Apply feedback --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 11 ++++++----- clang/lib/AST/ExprConstant.cpp | 12 +++++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b539e032ddbbc..b1e4196af956a 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3816,11 +3816,12 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_psignd128: case X86::BI__builtin_ia32_psignd256: return interp__builtin_elementwise_int_binop( - S, OpPC, Call, [](const APSInt &AElem, const APSInt &BElem) -> APInt { - return BElem[BElem.getBitWidth() - 1] - ? static_cast(-AElem) - : BElem.isZero() ? APInt(AElem.getBitWidth(), 0) - : static_cast(AElem); + S, OpPC, Call, [](const APInt &AElem, const APInt &BElem) { + if (BElem.isZero()) + return APInt::getZero(AElem.getBitWidth()); + if (BElem.isNegative()) + return -AElem; + return AElem; }); case clang::X86::BI__builtin_ia32_pavgb128: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index f2047f60c8d62..5c74d508aa50b 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12318,11 +12318,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_psignw256: case X86::BI__builtin_ia32_psignd128: case X86::BI__builtin_ia32_psignd256: - return EvaluateBinOpExpr([](const APSInt &AElem, - const APSInt &BElem) -> APInt { - return BElem[BElem.getBitWidth() - 1] ? static_cast(-AElem) - : BElem.isZero() ? APInt(AElem.getBitWidth(), 0) - : static_cast(AElem); + return EvaluateBinOpExpr([](const APInt &AElem, + const APInt &BElem) -> APInt { + if (BElem.isZero()) + return APInt::getZero(AElem.getBitWidth()); + if (BElem.isNegative()) + return -AElem; + return AElem; }); case X86::BI__builtin_ia32_blendvpd: From e081ff8e85a33b67b827250d1825e60a599cefa1 Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Tue, 21 Oct 2025 21:00:07 -0700 Subject: [PATCH 5/6] Clang-format --- clang/lib/AST/ByteCode/InterpBuiltin.cpp | 2 +- clang/lib/AST/ExprConstant.cpp | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index b1e4196af956a..d932a49dd2935 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3817,7 +3817,7 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_psignd256: return interp__builtin_elementwise_int_binop( S, OpPC, Call, [](const APInt &AElem, const APInt &BElem) { - if (BElem.isZero()) + if (BElem.isZero()) return APInt::getZero(AElem.getBitWidth()); if (BElem.isNegative()) return -AElem; diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 5c74d508aa50b..665a1c06b85ed 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12318,14 +12318,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_psignw256: case X86::BI__builtin_ia32_psignd128: case X86::BI__builtin_ia32_psignd256: - return EvaluateBinOpExpr([](const APInt &AElem, - const APInt &BElem) -> APInt { - if (BElem.isZero()) + return EvaluateBinOpExpr( + [](const APInt &AElem, const APInt &BElem) -> APInt { + if (BElem.isZero()) return APInt::getZero(AElem.getBitWidth()); if (BElem.isNegative()) return -AElem; return AElem; - }); + }); case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: From 08b2f3370b7983718fa71bf142c6dd492ffb6bdc Mon Sep 17 00:00:00 2001 From: kimsh02 Date: Thu, 23 Oct 2025 05:49:35 -0700 Subject: [PATCH 6/6] Apply feedback --- clang/lib/AST/ExprConstant.cpp | 15 +++++++-------- clang/lib/Headers/tmmintrin.h | 4 ---- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 665a1c06b85ed..7db7a01998a4a 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12318,14 +12318,13 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { case X86::BI__builtin_ia32_psignw256: case X86::BI__builtin_ia32_psignd128: case X86::BI__builtin_ia32_psignd256: - return EvaluateBinOpExpr( - [](const APInt &AElem, const APInt &BElem) -> APInt { - if (BElem.isZero()) - return APInt::getZero(AElem.getBitWidth()); - if (BElem.isNegative()) - return -AElem; - return AElem; - }); + return EvaluateBinOpExpr([](const APInt &AElem, const APInt &BElem) { + if (BElem.isZero()) + return APInt::getZero(AElem.getBitWidth()); + if (BElem.isNegative()) + return -AElem; + return AElem; + }); case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: diff --git a/clang/lib/Headers/tmmintrin.h b/clang/lib/Headers/tmmintrin.h index 95b32c0087ecb..cb4b36ea7383c 100644 --- a/clang/lib/Headers/tmmintrin.h +++ b/clang/lib/Headers/tmmintrin.h @@ -26,9 +26,6 @@ #define __zext128(x) \ (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ 1, 2, 3) -#define __anyext128(x) \ - (__m128i) __builtin_shufflevector((__v2si)(x), __extension__(__v2si){}, 0, \ - 1, -1, -1) #if defined(__cplusplus) && (__cplusplus >= 201103L) #define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr @@ -774,7 +771,6 @@ static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR _mm_sign_pi32(__m64 __a, __builtin_ia32_psignd128((__v4si)__zext128(__a), (__v4si)__zext128(__b))); } -#undef __anyext128 #undef __zext128 #undef __trunc64 #undef __DEFAULT_FN_ATTRS