Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 22 additions & 11 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -2515,24 +2515,28 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def rsqrt14ps256_mask : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, float>, unsigned char)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtb2mask512 : X86Builtin<"unsigned long long int(_Vector<64, char>)">;
def cvtmask2b512 : X86Builtin<"_Vector<64, char>(unsigned long long int)">;
def cvtmask2w512 : X86Builtin<"_Vector<32, short>(unsigned int)">;
}

let Features = "avx512dq", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512dq",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtd2mask512 : X86Builtin<"unsigned short(_Vector<16, int>)">;
def cvtmask2d512 : X86Builtin<"_Vector<16, int>(unsigned short)">;
def cvtmask2q512 : X86Builtin<"_Vector<8, long long int>(unsigned char)">;
def cvtq2mask512 : X86Builtin<"unsigned char(_Vector<8, long long int>)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512bw,avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtb2mask128 : X86Builtin<"unsigned short(_Vector<16, char>)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512bw,avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtb2mask256 : X86Builtin<"unsigned int(_Vector<32, char>)">;
}

Expand All @@ -2552,11 +2556,13 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
def cvtmask2w256 : X86Builtin<"_Vector<16, short>(unsigned short)">;
}

let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512dq,avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtd2mask128 : X86Builtin<"unsigned char(_Vector<4, int>)">;
}

let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512dq,avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtd2mask256 : X86Builtin<"unsigned char(_Vector<8, int>)">;
}

Expand All @@ -2576,11 +2582,13 @@ let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVector
def cvtmask2q256 : X86Builtin<"_Vector<4, long long int>(unsigned char)">;
}

let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512dq,avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtq2mask128 : X86Builtin<"unsigned char(_Vector<2, long long int>)">;
}

let Features = "avx512dq,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512dq,avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtq2mask256 : X86Builtin<"unsigned char(_Vector<4, long long int>)">;
}

Expand Down Expand Up @@ -3374,15 +3382,18 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
def vcvtps2ph256_mask : X86Builtin<"_Vector<8, short>(_Vector<8, float>, _Constant int, _Vector<8, short>, unsigned char)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def cvtw2mask512 : X86Builtin<"unsigned int(_Vector<32, short>)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512bw,avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def cvtw2mask128 : X86Builtin<"unsigned char(_Vector<8, short>)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512bw,avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def cvtw2mask256 : X86Builtin<"unsigned short(_Vector<16, short>)">;
}

Expand Down
33 changes: 33 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3320,6 +3320,25 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_cvt_mask(InterpState &S, CodePtr OpPC,
const CallExpr *Call, unsigned ID) {
assert(Call->getNumArgs() == 1);

const Pointer &Vec = S.Stk.pop<Pointer>();
APInt RetMask(Vec.getNumElems(), 0);
unsigned VectorLen = Vec.getNumElems();
PrimType ElemT = Vec.getFieldDesc()->getPrimType();

for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
APSInt A;
INT_TYPE_SWITCH_NO_BOOL(ElemT, { A = Vec.elem<T>(ElemNum).toAPSInt(); });
unsigned MSB = A[A.getBitWidth() - 1];
RetMask.setBitVal(ElemNum, MSB);
}
pushInteger(S, RetMask, Call->getType());
return true;
}

static bool interp__builtin_x86_byteshift(
InterpState &S, CodePtr OpPC, const CallExpr *Call, unsigned ID,
llvm::function_ref<APInt(const Pointer &, unsigned Lane, unsigned I,
Expand Down Expand Up @@ -4485,6 +4504,20 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_vec_set_v4di:
return interp__builtin_vec_set(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_cvtb2mask128:
case X86::BI__builtin_ia32_cvtb2mask256:
case X86::BI__builtin_ia32_cvtb2mask512:
case X86::BI__builtin_ia32_cvtw2mask128:
case X86::BI__builtin_ia32_cvtw2mask256:
case X86::BI__builtin_ia32_cvtw2mask512:
case X86::BI__builtin_ia32_cvtd2mask128:
case X86::BI__builtin_ia32_cvtd2mask256:
case X86::BI__builtin_ia32_cvtd2mask512:
case X86::BI__builtin_ia32_cvtq2mask128:
case X86::BI__builtin_ia32_cvtq2mask256:
case X86::BI__builtin_ia32_cvtq2mask512:
return interp__builtin_ia32_cvt_mask(S, OpPC, Call, BuiltinID);

case X86::BI__builtin_ia32_pslldqi128_byteshift:
case X86::BI__builtin_ia32_pslldqi256_byteshift:
case X86::BI__builtin_ia32_pslldqi512_byteshift:
Expand Down
27 changes: 27 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15766,6 +15766,33 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned Idx = static_cast<unsigned>(IdxAPS.getZExtValue() & (N - 1));
return Success(Vec.getVectorElt(Idx).getInt(), E);
}

case clang::X86::BI__builtin_ia32_cvtb2mask128:
case clang::X86::BI__builtin_ia32_cvtb2mask256:
case clang::X86::BI__builtin_ia32_cvtb2mask512:
case clang::X86::BI__builtin_ia32_cvtw2mask128:
case clang::X86::BI__builtin_ia32_cvtw2mask256:
case clang::X86::BI__builtin_ia32_cvtw2mask512:
case clang::X86::BI__builtin_ia32_cvtd2mask128:
case clang::X86::BI__builtin_ia32_cvtd2mask256:
case clang::X86::BI__builtin_ia32_cvtd2mask512:
case clang::X86::BI__builtin_ia32_cvtq2mask128:
case clang::X86::BI__builtin_ia32_cvtq2mask256:
case clang::X86::BI__builtin_ia32_cvtq2mask512: {
assert(E->getNumArgs() == 1);
APValue Vec;
if (!EvaluateVector(E->getArg(0), Vec, Info))
return false;

unsigned VectorLen = Vec.getVectorLength();
APSInt RetMask(llvm::APInt(VectorLen, 0), /*isUnsigned=*/true);
for (unsigned ElemNum = 0; ElemNum < VectorLen; ++ElemNum) {
const APSInt &A = Vec.getVectorElt(ElemNum).getInt();
unsigned MSB = A[A.getBitWidth() - 1];
RetMask.setBitVal(ElemNum, MSB);
}
return Success(APValue(RetMask), E);
}
}
}

Expand Down
10 changes: 4 additions & 6 deletions clang/lib/Headers/avx512vlbwintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -2492,15 +2492,13 @@ _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
_mm256_setzero_si256());
}

static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
_mm_movepi8_mask (__m128i __A)
{
static __inline__ __mmask16 __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_movepi8_mask(__m128i __A) {
return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
}

static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
_mm256_movepi8_mask (__m256i __A)
{
static __inline__ __mmask32 __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_movepi8_mask(__m256i __A) {
return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
}

Expand Down
12 changes: 12 additions & 0 deletions clang/test/CodeGen/X86/avx512vlbw-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -3013,12 +3013,24 @@ __mmask16 test_mm_movepi8_mask(__m128i __A) {
return _mm_movepi8_mask(__A);
}

TEST_CONSTEXPR(_mm_movepi8_mask(
((__m128i)(__v16qi){0, 1, char(129), 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15})
) == (__mmask16)0x0004);

__mmask32 test_mm256_movepi8_mask(__m256i __A) {
// CHECK-LABEL: test_mm256_movepi8_mask
// CHECK: [[CMP:%.*]] = icmp slt <32 x i8> %{{.*}}, zeroinitializer
return _mm256_movepi8_mask(__A);
}

TEST_CONSTEXPR(_mm256_movepi8_mask(
((__m256i)(__v32qi){0, 1, char(255), 3, 4, 5, 6, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24, 25, 26, 27, 28, 29, 30, char(128)})
) == (__mmask32)0x80000004);

__m128i test_mm_movm_epi8(__mmask16 __A) {
// CHECK-LABEL: test_mm_movm_epi8
// CHECK: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
Expand Down