-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[Headers][X86] Allow MMX/SSE/AVX MOVMSK intrinsics to be used in constexpr #161914
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
kimsh02
wants to merge
9
commits into
llvm:main
Choose a base branch
from
kimsh02:x86-movemask-constexpr
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Shawn K (kimsh02) ChangesFix #154520 Full diff: https://github.com/llvm/llvm-project/pull/161914.diff 12 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index e98bee28c15be..1387c6a18f500 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -182,7 +182,8 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def cvttss2si : X86Builtin<"int(_Vector<4, float>)">;
}
-let Features = "sse", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
+let Features = "sse",
+ Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in {
def movmskps : X86Builtin<"int(_Vector<4, float>)">;
}
@@ -208,7 +209,8 @@ let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">;
}
-let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "sse2",
+ Attributes = [NoThrow, Constexpr, RequiredVectorWidth<128>] in {
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
}
@@ -523,6 +525,12 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
}
+let Features = "avx",
+ Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in {
+ def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
+ def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
+}
+
let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
@@ -532,9 +540,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def vtestnzcps256 : X86Builtin<"int(_Vector<8, float>, _Vector<8, float>)">;
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
- def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
- def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
- def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
+ def ptestnzc256
+ : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
}
let Features = "avx", Attributes = [NoThrow] in {
@@ -569,6 +576,11 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def vec_set_v8si : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int, _Constant int)">;
}
+let Features = "avx2",
+ Attributes = [NoThrow, Constexpr, RequiredVectorWidth<256>] in {
+ def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
+}
+
let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def mpsadbw256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant char)">;
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
@@ -579,8 +591,8 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def pmaddubsw256 : X86Builtin<"_Vector<16, short>(_Vector<32, char>, _Vector<32, char>)">;
- def pmaddwd256 : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
- def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
+ def pmaddwd256
+ : X86Builtin<"_Vector<8, int>(_Vector<16, short>, _Vector<16, short>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a2e97fcafdfef..ed36877d070e3 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2773,6 +2773,46 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
return true;
}
+static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
+ const CallExpr *Call) {
+ assert(Call->getNumArgs() == 1);
+
+ const Pointer &Source = S.Stk.pop<Pointer>();
+
+ unsigned SourceLen = Source.getNumElems();
+ const QualType ElemQT = getElemType(Source);
+ const OptPrimType ElemPT = S.getContext().classify(ElemQT);
+ unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
+
+ if (ElemQT->isIntegerType()) {
+ unsigned Byte = 8;
+ unsigned ResultLen = (LaneWidth * SourceLen) / Byte;
+ APInt Result(ResultLen, 0);
+ unsigned ResultIdx = 0;
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt Lane;
+ INT_TYPE_SWITCH_NO_BOOL(*ElemPT,
+ { Lane = Source.elem<T>(I).toAPSInt(); });
+ for (unsigned J = 0; J != LaneWidth; J += Byte) {
+ Result.setBitVal(ResultIdx++, Lane[J + 7]);
+ }
+ }
+ pushInteger(S, Result.getZExtValue(), Call->getType());
+ return true;
+ }
+ if (ElemQT->isFloatingType()) {
+ APInt Result(SourceLen, 0);
+ using T = PrimConv<PT_Float>::T;
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt Lane = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
+ Result.setBitVal(I, Lane[LaneWidth - 1]);
+ }
+ pushInteger(S, Result.getZExtValue(), Call->getType());
+ return true;
+ }
+ return false;
+}
+
static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
@@ -3355,6 +3395,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});
+ case clang::X86::BI__builtin_ia32_movmskps:
+ case clang::X86::BI__builtin_ia32_movmskpd:
+ case clang::X86::BI__builtin_ia32_pmovmskb128:
+ case clang::X86::BI__builtin_ia32_pmovmskb256:
+ case clang::X86::BI__builtin_ia32_movmskps256:
+ case clang::X86::BI__builtin_ia32_movmskpd256: {
+ return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
+ }
+
case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
case clang::X86::BI__builtin_ia32_pavgb256:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index b706b14945b6d..0926fbc415ce7 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -13650,7 +13650,6 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,
bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
-
auto HandleMaskBinOp =
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
-> bool {
@@ -14679,6 +14678,44 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(CarryOut, E);
}
+ case clang::X86::BI__builtin_ia32_movmskps:
+ case clang::X86::BI__builtin_ia32_movmskpd:
+ case clang::X86::BI__builtin_ia32_pmovmskb128:
+ case clang::X86::BI__builtin_ia32_pmovmskb256:
+ case clang::X86::BI__builtin_ia32_movmskps256:
+ case clang::X86::BI__builtin_ia32_movmskpd256: {
+ APValue Source;
+ if (!Evaluate(Source, Info, E->getArg(0)))
+ return false;
+ unsigned SourceLen = Source.getVectorLength();
+ const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
+ const QualType ElemQT = VT->getElementType();
+ unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
+
+ if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane
+ unsigned Byte = 8;
+ unsigned ResultLen = (LaneWidth * SourceLen) / Byte;
+ APInt Result(ResultLen, 0);
+ unsigned ResultIdx = 0;
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt Lane = Source.getVectorElt(I).getInt();
+ for (unsigned J = 0; J != LaneWidth; J += Byte) {
+ Result.setBitVal(ResultIdx++, Lane[J + 7]);
+ }
+ }
+ return Success(Result.getZExtValue(), E);
+ }
+ if (ElemQT->isFloatingType()) { // Get sign bit of every lane
+ APInt Result(SourceLen, 0);
+ for (unsigned I = 0; I != SourceLen; ++I) {
+ APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt();
+ Result.setBitVal(I, Lane[LaneWidth - 1]);
+ }
+ return Success(Result.getZExtValue(), E);
+ }
+ return false;
+ }
+
case clang::X86::BI__builtin_ia32_bextr_u32:
case clang::X86::BI__builtin_ia32_bextr_u64:
case clang::X86::BI__builtin_ia32_bextri_u32:
diff --git a/clang/lib/Headers/avx2intrin.h b/clang/lib/Headers/avx2intrin.h
index 31759c5386d9f..133def7b496ec 100644
--- a/clang/lib/Headers/avx2intrin.h
+++ b/clang/lib/Headers/avx2intrin.h
@@ -1306,9 +1306,8 @@ _mm256_min_epu32(__m256i __a, __m256i __b) {
/// \param __a
/// A 256-bit integer vector containing the source bytes.
/// \returns The 32-bit integer mask.
-static __inline__ int __DEFAULT_FN_ATTRS256
-_mm256_movemask_epi8(__m256i __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR
+_mm256_movemask_epi8(__m256i __a) {
return __builtin_ia32_pmovmskb256((__v32qi)__a);
}
diff --git a/clang/lib/Headers/avxintrin.h b/clang/lib/Headers/avxintrin.h
index d6ba19a6c78af..60c6f7a44a323 100644
--- a/clang/lib/Headers/avxintrin.h
+++ b/clang/lib/Headers/avxintrin.h
@@ -2960,9 +2960,8 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b)
/// A 256-bit vector of [4 x double] containing the double-precision
/// floating point values with sign bits to be extracted.
/// \returns The sign bits from the operand, written to bits [3:0].
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_movemask_pd(__m256d __a)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_movemask_pd(__m256d __a) {
return __builtin_ia32_movmskpd256((__v4df)__a);
}
@@ -2978,9 +2977,8 @@ _mm256_movemask_pd(__m256d __a)
/// A 256-bit vector of [8 x float] containing the single-precision floating
/// point values with sign bits to be extracted.
/// \returns The sign bits from the operand, written to bits [7:0].
-static __inline int __DEFAULT_FN_ATTRS
-_mm256_movemask_ps(__m256 __a)
-{
+static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm256_movemask_ps(__m256 __a) {
return __builtin_ia32_movmskps256((__v8sf)__a);
}
diff --git a/clang/lib/Headers/emmintrin.h b/clang/lib/Headers/emmintrin.h
index 6597e7e7d4030..11ba0919152e8 100644
--- a/clang/lib/Headers/emmintrin.h
+++ b/clang/lib/Headers/emmintrin.h
@@ -4280,7 +4280,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b) {
/// A 128-bit integer vector containing the values with bits to be extracted.
/// \returns The most significant bits from each 8-bit element in \a __a,
/// written to bits [15:0]. The other bits are assigned zeros.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_movemask_epi8(__m128i __a) {
return __builtin_ia32_pmovmskb128((__v16qi)__a);
}
@@ -4699,7 +4700,8 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) {
/// be extracted.
/// \returns The sign bits from each of the double-precision elements in \a __a,
/// written to bits [1:0]. The remaining bits are assigned values of zero.
-static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm_movemask_pd(__m128d __a) {
return __builtin_ia32_movmskpd((__v2df)__a);
}
diff --git a/clang/lib/Headers/xmmintrin.h b/clang/lib/Headers/xmmintrin.h
index d876b4735a7d2..fe6afdcfc3fdb 100644
--- a/clang/lib/Headers/xmmintrin.h
+++ b/clang/lib/Headers/xmmintrin.h
@@ -2416,9 +2416,8 @@ _mm_min_pu8(__m64 __a, __m64 __b) {
/// A 64-bit integer vector containing the values with bits to be extracted.
/// \returns The most significant bit from each 8-bit element in \a __a,
/// written to bits [7:0].
-static __inline__ int __DEFAULT_FN_ATTRS_SSE2
-_mm_movemask_pi8(__m64 __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
+_mm_movemask_pi8(__m64 __a) {
return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a));
}
@@ -3015,9 +3014,7 @@ _mm_cvtps_pi8(__m128 __a)
/// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each
/// single-precision floating-point element of the parameter. Bits [31:4] are
/// set to zero.
-static __inline__ int __DEFAULT_FN_ATTRS
-_mm_movemask_ps(__m128 __a)
-{
+static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_ps(__m128 __a) {
return __builtin_ia32_movmskps((__v4sf)__a);
}
diff --git a/clang/test/CodeGen/X86/avx-builtins.c b/clang/test/CodeGen/X86/avx-builtins.c
index 3018bb9719b89..13ea5be32a5eb 100644
--- a/clang/test/CodeGen/X86/avx-builtins.c
+++ b/clang/test/CodeGen/X86/avx-builtins.c
@@ -1329,12 +1329,16 @@ int test_mm256_movemask_pd(__m256d A) {
// CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}})
return _mm256_movemask_pd(A);
}
+TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-1234.5678901234, 98765.4321098765, 0.000123456789, -3.14159265358979}) == 0x9);
+TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-0.000000987654321, -99999.999999999, 42.424242424242, 314159.2653589793}) == 0x3);
int test_mm256_movemask_ps(__m256 A) {
// CHECK-LABEL: test_mm256_movemask_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}})
return _mm256_movemask_ps(A);
}
+TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){-12.3456f, 34.7890f, -0.0001234f, 123456.78f, -987.654f, 0.001234f, 3.14159f, -256.001f}) == 0x95);
+TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){0.333333f, -45.6789f, 999.999f, -0.9999f, 17.234f, -128.512f, 2048.0f, -3.14f}) == 0xAA);
__m256d test_mm256_mul_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_mul_pd
diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c
index eff2797e87c75..943881b8d599d 100644
--- a/clang/test/CodeGen/X86/avx2-builtins.c
+++ b/clang/test/CodeGen/X86/avx2-builtins.c
@@ -966,6 +966,9 @@ int test_mm256_movemask_epi8(__m256i a) {
// CHECK: call {{.*}}i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}})
return _mm256_movemask_epi8(a);
}
+TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v32qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01,0xB6,0x00,0x39,0x40,0xD0,0x05,0x80,0x2A,0x7B,0x00,0x90,0xFF,0x01,0x34,0xC0,0x6D}) == 0x4C516AAA);
+TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v8si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001,(int)0x00000000,(int)0xFFFFFFFF,(int)0x12345678,(int)0x90ABCDEF}) == 0xF0F08F3D);
+TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v4du){0xFF00000000000080ULL,0x7F010203040506C3ULL,0x8000000000000000ULL,0x0123456789ABCDEFULL}) == 0x0F800181);
__m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
// CHECK-LABEL: test_mm256_mpsadbw_epu8
diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c
index 26c5f7315457e..23e0b17236966 100644
--- a/clang/test/CodeGen/X86/mmx-builtins.c
+++ b/clang/test/CodeGen/X86/mmx-builtins.c
@@ -399,6 +399,10 @@ int test_mm_movemask_pi8(__m64 a) {
// CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(
return _mm_movemask_pi8(a);
}
+TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v8qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3})) == 0xAA);
+TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v2si){(int)0x80FF00AA,(int)0x7F0183E1})) == 0x3D);
+TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v1di){(long long)0xE110837A00924DB0ULL})) == 0xA5);
+
__m64 test_mm_mul_su32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mul_su32
diff --git a/clang/test/CodeGen/X86/sse-builtins.c b/clang/test/CodeGen/X86/sse-builtins.c
index 3bad3426b1586..f5c1d00d1b851 100644
--- a/clang/test/CodeGen/X86/sse-builtins.c
+++ b/clang/test/CodeGen/X86/sse-builtins.c
@@ -561,6 +561,8 @@ int test_mm_movemask_ps(__m128 A) {
// CHECK: call {{.*}}i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
return _mm_movemask_ps(A);
}
+TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-2.0f, 3.0f, -5.5f, -0.0f}) == 0xD);
+TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-7.348215e5, 0.00314159, -12.789, 2.7182818}) == 0x5);
__m128 test_mm_mul_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_mul_ps
diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c
index 84b90c09444c2..698686038b32f 100644
--- a/clang/test/CodeGen/X86/sse2-builtins.c
+++ b/clang/test/CodeGen/X86/sse2-builtins.c
@@ -953,12 +953,17 @@ int test_mm_movemask_epi8(__m128i A) {
// CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
return _mm_movemask_epi8(A);
}
+TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v16qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01}) == 0x6AAA);
+TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v4si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001}) == 0x8F3D);
+TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v2du){0xFF00000000000080ULL,0x7F010203040506C3ULL}) == 0x181);
int test_mm_movemask_pd(__m128d A) {
// CHECK-LABEL: test_mm_movemask_pd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
return _mm_movemask_pd(A);
}
+TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){-12345.67890123, 4567.89012345}) == 0x1);
+TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){0.0000987654321, 09876.5432109876}) == 0x0);
__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_mul_epu32
|
e269538
to
79c747e
Compare
shafik
reviewed
Oct 5, 2025
tbaederr
reviewed
Oct 5, 2025
RKSimon
requested changes
Oct 6, 2025
Apply feedback 👍 |
@RKSimon My PR could take a look 🫡 |
@RKSimon Fixed rebase |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
backend:X86
clang:bytecode
Issues for the clang bytecode constexpr interpreter
clang:frontend
Language frontend issues, e.g. anything involving "Sema"
clang:headers
Headers provided by Clang, e.g. for intrinsics
clang
Clang issues not falling into any other category
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Fix #154520