Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,8 @@ let Features = "sse", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def cvttss2si : X86Builtin<"int(_Vector<4, float>)">;
}

let Features = "sse", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
let Features = "sse",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def movmskps : X86Builtin<"int(_Vector<4, float>)">;
}

Expand All @@ -210,11 +211,6 @@ let Features = "sse2", Attributes = [NoThrow, RequiredVectorWidth<128>] in {
def maskmovdqu : X86Builtin<"void(_Vector<16, char>, _Vector<16, char>, char *)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
}

let Features = "sse2", Attributes = [NoThrow] in {
def movnti : X86Builtin<"void(int *, int)">;
}
Expand All @@ -223,6 +219,8 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">;
def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">;
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -530,6 +528,12 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
}

let Features = "avx",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
Expand All @@ -539,9 +543,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def vtestnzcps256 : X86Builtin<"int(_Vector<8, float>, _Vector<8, float>)">;
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
def ptestnzc256
: X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
}

let Features = "avx", Attributes = [NoThrow] in {
Expand Down Expand Up @@ -585,7 +588,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def phsubw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def phsubd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def phsubsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
Expand Down Expand Up @@ -626,7 +628,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi

def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;


def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;

def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
Expand Down
49 changes: 49 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2756,6 +2756,46 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_movmsk_op(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 1);

const Pointer &Source = S.Stk.pop<Pointer>();

unsigned SourceLen = Source.getNumElems();
QualType ElemQT = getElemType(Source);
OptPrimType ElemPT = S.getContext().classify(ElemQT);
unsigned ResultLen =
S.getASTContext().getTypeSize(Call->getType()); // Always 32-bit integer.
APInt Result(ResultLen, 0);

if (ElemQT->isIntegerType()) {
unsigned BitsInAByte = 8;
unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
unsigned ResultIdx = 0;
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
for (unsigned I = 0; I != SourceLen; ++I) {
APInt Lane = Source.elem<T>(I).toAPSInt();
for (unsigned J = 0; J != LaneWidth; J += BitsInAByte) {
Result.setBitVal(ResultIdx++, Lane[J + 7]);
}
}
});
pushInteger(S, Result, Call->getType());
return true;
}
if (ElemQT->isRealFloatingType()) {
using T = PrimConv<PT_Float>::T;
for (unsigned I = 0; I != SourceLen; ++I) {
APInt Lane = Source.elem<T>(I).getAPFloat().bitcastToAPInt();
Result.setBitVal(I, Lane.isNegative());
}
pushInteger(S, Result, Call->getType());
return true;
}
return false;
}

static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
Expand Down Expand Up @@ -3452,6 +3492,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
});

case clang::X86::BI__builtin_ia32_movmskps:
case clang::X86::BI__builtin_ia32_movmskpd:
case clang::X86::BI__builtin_ia32_pmovmskb128:
case clang::X86::BI__builtin_ia32_pmovmskb256:
case clang::X86::BI__builtin_ia32_movmskps256:
case clang::X86::BI__builtin_ia32_movmskpd256: {
return interp__builtin_ia32_movmsk_op(S, OpPC, Call);
}

case clang::X86::BI__builtin_ia32_pavgb128:
case clang::X86::BI__builtin_ia32_pavgw128:
case clang::X86::BI__builtin_ia32_pavgb256:
Expand Down
39 changes: 38 additions & 1 deletion clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13905,7 +13905,6 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,

bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {

auto HandleMaskBinOp =
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
-> bool {
Expand Down Expand Up @@ -14934,6 +14933,44 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
return Success(CarryOut, E);
}

case clang::X86::BI__builtin_ia32_movmskps:
case clang::X86::BI__builtin_ia32_movmskpd:
case clang::X86::BI__builtin_ia32_pmovmskb128:
case clang::X86::BI__builtin_ia32_pmovmskb256:
case clang::X86::BI__builtin_ia32_movmskps256:
case clang::X86::BI__builtin_ia32_movmskpd256: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
return false;
unsigned SourceLen = Source.getVectorLength();
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
QualType ElemQT = VT->getElementType();
unsigned ResultLen = Info.Ctx.getTypeSize(
E->getCallReturnType(Info.Ctx)); // Always 32-bit integer.
APInt Result(ResultLen, 0);

if (ElemQT->isIntegerType()) { // Get MSB of each byte of every lane.
unsigned BitsInAByte = 8;
unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
unsigned ResultIdx = 0;
for (unsigned I = 0; I != SourceLen; ++I) {
APInt Lane = Source.getVectorElt(I).getInt();
for (unsigned J = 0; J != LaneWidth; J += BitsInAByte) {
Result.setBitVal(ResultIdx++, Lane[J + 7]);
}
}
return Success(Result, E);
}
if (ElemQT->isRealFloatingType()) { // Get sign bit of every lane.
for (unsigned I = 0; I != SourceLen; ++I) {
APInt Lane = Source.getVectorElt(I).getFloat().bitcastToAPInt();
Result.setBitVal(I, Lane.isNegative());
}
return Success(Result, E);
}
return false;
}

case clang::X86::BI__builtin_ia32_bextr_u32:
case clang::X86::BI__builtin_ia32_bextr_u64:
case clang::X86::BI__builtin_ia32_bextri_u32:
Expand Down
5 changes: 2 additions & 3 deletions clang/lib/Headers/avx2intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1304,9 +1304,8 @@ _mm256_min_epu32(__m256i __a, __m256i __b) {
/// \param __a
/// A 256-bit integer vector containing the source bytes.
/// \returns The 32-bit integer mask.
static __inline__ int __DEFAULT_FN_ATTRS256
_mm256_movemask_epi8(__m256i __a)
{
static __inline__ int __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_movemask_epi8(__m256i __a) {
return __builtin_ia32_pmovmskb256((__v32qi)__a);
}

Expand Down
10 changes: 4 additions & 6 deletions clang/lib/Headers/avxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2960,9 +2960,8 @@ _mm256_testnzc_si256(__m256i __a, __m256i __b)
/// A 256-bit vector of [4 x double] containing the double-precision
/// floating point values with sign bits to be extracted.
/// \returns The sign bits from the operand, written to bits [3:0].
static __inline int __DEFAULT_FN_ATTRS
_mm256_movemask_pd(__m256d __a)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_movemask_pd(__m256d __a) {
return __builtin_ia32_movmskpd256((__v4df)__a);
}

Expand All @@ -2978,9 +2977,8 @@ _mm256_movemask_pd(__m256d __a)
/// A 256-bit vector of [8 x float] containing the single-precision floating
/// point values with sign bits to be extracted.
/// \returns The sign bits from the operand, written to bits [7:0].
static __inline int __DEFAULT_FN_ATTRS
_mm256_movemask_ps(__m256 __a)
{
static __inline int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm256_movemask_ps(__m256 __a) {
return __builtin_ia32_movmskps256((__v8sf)__a);
}

Expand Down
6 changes: 4 additions & 2 deletions clang/lib/Headers/emmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -4280,7 +4280,8 @@ _mm_packus_epi16(__m128i __a, __m128i __b) {
/// A 128-bit integer vector containing the values with bits to be extracted.
/// \returns The most significant bits from each 8-bit element in \a __a,
/// written to bits [15:0]. The other bits are assigned zeros.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a) {
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_movemask_epi8(__m128i __a) {
return __builtin_ia32_pmovmskb128((__v16qi)__a);
}

Expand Down Expand Up @@ -4699,7 +4700,8 @@ _mm_unpacklo_pd(__m128d __a, __m128d __b) {
/// be extracted.
/// \returns The sign bits from each of the double-precision elements in \a __a,
/// written to bits [1:0]. The remaining bits are assigned values of zero.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_pd(__m128d __a) {
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_movemask_pd(__m128d __a) {
return __builtin_ia32_movmskpd((__v2df)__a);
}

Expand Down
9 changes: 3 additions & 6 deletions clang/lib/Headers/xmmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2416,9 +2416,8 @@ _mm_min_pu8(__m64 __a, __m64 __b) {
/// A 64-bit integer vector containing the values with bits to be extracted.
/// \returns The most significant bit from each 8-bit element in \a __a,
/// written to bits [7:0].
static __inline__ int __DEFAULT_FN_ATTRS_SSE2
_mm_movemask_pi8(__m64 __a)
{
static __inline__ int __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
_mm_movemask_pi8(__m64 __a) {
return __builtin_ia32_pmovmskb128((__v16qi)__zext128(__a));
}

Expand Down Expand Up @@ -3015,9 +3014,7 @@ _mm_cvtps_pi8(__m128 __a)
/// \returns A 32-bit integer value. Bits [3:0] contain the sign bits from each
/// single-precision floating-point element of the parameter. Bits [31:4] are
/// set to zero.
static __inline__ int __DEFAULT_FN_ATTRS
_mm_movemask_ps(__m128 __a)
{
static __inline__ int __DEFAULT_FN_ATTRS_CONSTEXPR _mm_movemask_ps(__m128 __a) {
return __builtin_ia32_movmskps((__v4sf)__a);
}

Expand Down
4 changes: 4 additions & 0 deletions clang/test/CodeGen/X86/avx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -1337,12 +1337,16 @@ int test_mm256_movemask_pd(__m256d A) {
// CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %{{.*}})
return _mm256_movemask_pd(A);
}
TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-1234.5678901234, 98765.4321098765, 0.000123456789, -3.14159265358979}) == 0x9);
TEST_CONSTEXPR(_mm256_movemask_pd((__m256d)(__v4df){-0.000000987654321, -99999.999999999, 42.424242424242, 314159.2653589793}) == 0x3);

int test_mm256_movemask_ps(__m256 A) {
// CHECK-LABEL: test_mm256_movemask_ps
// CHECK: call {{.*}}i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %{{.*}})
return _mm256_movemask_ps(A);
}
TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){-12.3456f, 34.7890f, -0.0001234f, 123456.78f, -987.654f, 0.001234f, 3.14159f, -256.001f}) == 0x95);
TEST_CONSTEXPR(_mm256_movemask_ps((__m256)(__v8sf){0.333333f, -45.6789f, 999.999f, -0.9999f, 17.234f, -128.512f, 2048.0f, -3.14f}) == 0xAA);

__m256d test_mm256_mul_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_mul_pd
Expand Down
3 changes: 3 additions & 0 deletions clang/test/CodeGen/X86/avx2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -968,6 +968,9 @@ int test_mm256_movemask_epi8(__m256i a) {
// CHECK: call {{.*}}i32 @llvm.x86.avx2.pmovmskb(<32 x i8> %{{.*}})
return _mm256_movemask_epi8(a);
}
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v32qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01,0xB6,0x00,0x39,0x40,0xD0,0x05,0x80,0x2A,0x7B,0x00,0x90,0xFF,0x01,0x34,0xC0,0x6D}) == 0x4C516AAA);
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v8si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001,(int)0x00000000,(int)0xFFFFFFFF,(int)0x12345678,(int)0x90ABCDEF}) == 0xF0F08F3D);
TEST_CONSTEXPR(_mm256_movemask_epi8((__m256i)(__v4du){0xFF00000000000080ULL,0x7F010203040506C3ULL,0x8000000000000000ULL,0x0123456789ABCDEFULL}) == 0x0F800181);

__m256i test_mm256_mpsadbw_epu8(__m256i x, __m256i y) {
// CHECK-LABEL: test_mm256_mpsadbw_epu8
Expand Down
4 changes: 4 additions & 0 deletions clang/test/CodeGen/X86/mmx-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,10 @@ int test_mm_movemask_pi8(__m64 a) {
// CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(
return _mm_movemask_pi8(a);
}
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v8qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3})) == 0xAA);
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v2si){(int)0x80FF00AA,(int)0x7F0183E1})) == 0x3D);
TEST_CONSTEXPR(_mm_movemask_pi8((__m64)((__v1di){(long long)0xE110837A00924DB0ULL})) == 0xA5);


__m64 test_mm_mul_su32(__m64 a, __m64 b) {
// CHECK-LABEL: test_mm_mul_su32
Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGen/X86/sse-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,8 @@ int test_mm_movemask_ps(__m128 A) {
// CHECK: call {{.*}}i32 @llvm.x86.sse.movmsk.ps(<4 x float> %{{.*}})
return _mm_movemask_ps(A);
}
TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-2.0f, 3.0f, -5.5f, -0.0f}) == 0xD);
TEST_CONSTEXPR(_mm_movemask_ps((__m128)(__v4sf){-7.348215e5, 0.00314159, -12.789, 2.7182818}) == 0x5);

__m128 test_mm_mul_ps(__m128 A, __m128 B) {
// CHECK-LABEL: test_mm_mul_ps
Expand Down
5 changes: 5 additions & 0 deletions clang/test/CodeGen/X86/sse2-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -956,12 +956,17 @@ int test_mm_movemask_epi8(__m128i A) {
// CHECK: call {{.*}}i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %{{.*}})
return _mm_movemask_epi8(A);
}
TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v16qu){0x7F,0x80,0x01,0xFF,0x00,0xAA,0x55,0xC3,0x12,0x8E,0x00,0xFE,0x7E,0x81,0xFF,0x01}) == 0x6AAA);
TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v4si){(int)0x80FF00AA,(int)0x7F0183E1,(int)0xDEADBEEF,(int)0xC0000001}) == 0x8F3D);
TEST_CONSTEXPR(_mm_movemask_epi8((__m128i)(__v2du){0xFF00000000000080ULL,0x7F010203040506C3ULL}) == 0x181);

int test_mm_movemask_pd(__m128d A) {
// CHECK-LABEL: test_mm_movemask_pd
// CHECK: call {{.*}}i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %{{.*}})
return _mm_movemask_pd(A);
}
TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){-12345.67890123, 4567.89012345}) == 0x1);
TEST_CONSTEXPR(_mm_movemask_pd((__m128d)(__v2df){0.0000987654321, 09876.5432109876}) == 0x0);

__m128i test_mm_mul_epu32(__m128i A, __m128i B) {
// CHECK-LABEL: test_mm_mul_epu32
Expand Down