Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -3360,15 +3360,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
def cvtusi2ss32 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, unsigned int, _Constant int)">;
}

let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpmultishiftqb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
}

let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpmultishiftqb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
}

let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpmultishiftqb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
}

Expand Down
63 changes: 63 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3531,6 +3531,65 @@ static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_multishiftqb(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 2);

QualType ATy = Call->getArg(0)->getType();
QualType BTy = Call->getArg(1)->getType();
if (!ATy->isVectorType() || !BTy->isVectorType()) {
return false;
}

const Pointer &BPtr = S.Stk.pop<Pointer>();
const Pointer &APtr = S.Stk.pop<Pointer>();
const auto *AVecT = ATy->castAs<VectorType>();
const auto *BVecT = BTy->castAs<VectorType>();
assert(AVecT->getNumElements() == BVecT->getNumElements());

PrimType ElemT = *S.getContext().classify(AVecT->getElementType());

unsigned NumBytesInQWord = 8;
unsigned NumBitsInByte = 8;
unsigned NumBytes = AVecT->getNumElements();
unsigned NumQWords = NumBytes / NumBytesInQWord;
const Pointer &Dst = S.Stk.peek<Pointer>();

for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
APInt AQWord(64, 0);
APInt BQWord(64, 0);
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
uint64_t Byte = 0;
INT_TYPE_SWITCH(ElemT, {
Byte = static_cast<uint64_t>(APtr.elem<T>(Idx));
AQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);

Byte = static_cast<uint64_t>(BPtr.elem<T>(Idx));
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
});
}

for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
uint64_t Ctrl =
AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() & 0x3F;

APInt Byte(8, 0);
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F), BitIdx);
}
INT_TYPE_SWITCH(ElemT, {
Dst.elem<T>(QWordId * NumBytesInQWord + ByteIdx) =
T::from(Byte.getZExtValue());
});
}
}

Dst.initializeAllElements();

return true;
}

bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
uint32_t BuiltinID) {
if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID))
Expand Down Expand Up @@ -4756,6 +4815,10 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return std::make_pair(0, static_cast<int>(LaneOffset + Index));
});

case X86::BI__builtin_ia32_vpmultishiftqb128:
case X86::BI__builtin_ia32_vpmultishiftqb256:
case X86::BI__builtin_ia32_vpmultishiftqb512:
return interp__builtin_ia32_multishiftqb(S, OpPC, Call);
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
Expand Down
45 changes: 45 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13096,6 +13096,51 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}

case X86::BI__builtin_ia32_vpmultishiftqb128:
case X86::BI__builtin_ia32_vpmultishiftqb256:
case X86::BI__builtin_ia32_vpmultishiftqb512: {
assert(E->getNumArgs() == 2);

APValue A, B;
if (!Evaluate(A, Info, E->getArg(0)) || !Evaluate(B, Info, E->getArg(1)))
return false;

assert(A.getVectorLength() == B.getVectorLength());
unsigned NumBytesInQWord = 8;
unsigned NumBitsInByte = 8;
unsigned NumBytes = A.getVectorLength();
unsigned NumQWords = NumBytes / NumBytesInQWord;
SmallVector<APValue, 64> Result;
Result.reserve(NumBytes);

for (unsigned QWordId = 0; QWordId != NumQWords; ++QWordId) {
APInt AQWord(64, 0);
APInt BQWord(64, 0);
for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
unsigned Idx = QWordId * NumBytesInQWord + ByteIdx;
uint64_t Byte = A.getVectorElt(Idx).getInt().getZExtValue();
AQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);

Byte = B.getVectorElt(Idx).getInt().getZExtValue();
BQWord.insertBits(APInt(8, Byte & 0xFF), ByteIdx * NumBitsInByte);
}

for (unsigned ByteIdx = 0; ByteIdx != NumBytesInQWord; ++ByteIdx) {
uint64_t Ctrl =
AQWord.extractBits(8, ByteIdx * NumBitsInByte).getZExtValue() &
0x3F;

APInt Byte(8, 0);
for (unsigned BitIdx = 0; BitIdx != NumBitsInByte; ++BitIdx) {
Byte.insertBits(BQWord.extractBits(1, (Ctrl + BitIdx) & 0x3F),
BitIdx);
}
Result.push_back(APValue(APSInt(Byte, /*isUnsigned*/ true)));
}
}
return Success(APValue(Result.data(), Result.size()), E);
}

case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
Expand Down
38 changes: 17 additions & 21 deletions clang/lib/Headers/avx512vbmiintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,61 +15,57 @@
#define __VBMIINTRIN_H

/* Define the default attributes for the functions in this file. */
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \
__min_vector_width__(512)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS constexpr
__min_vector_width__(512))) constexpr
#else
#define __DEFAULT_FN_ATTRS_CONSTEXPR __DEFAULT_FN_ATTRS
#define __DEFAULT_FN_ATTRS \
__attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), \
__min_vector_width__(512)))
#endif

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutex2var_epi8(__m512i __A, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_vpermi2varqi512((__v64qi)__A, (__v64qi)__I,
(__v64qi) __B);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask_permutex2var_epi8(__m512i __A, __mmask64 __U, __m512i __I,
__m512i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutex2var_epi8(
__m512i __A, __mmask64 __U, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)__A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask2_permutex2var_epi8(__m512i __A, __m512i __I, __mmask64 __U,
__m512i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask2_permutex2var_epi8(
__m512i __A, __m512i __I, __mmask64 __U, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)__I);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I,
__m512i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_maskz_permutex2var_epi8(
__mmask64 __U, __m512i __A, __m512i __I, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512(__U,
(__v64qi)_mm512_permutex2var_epi8(__A, __I, __B),
(__v64qi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_permutexvar_epi8(__m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_permvarqi512((__v64qi) __B, (__v64qi) __A);
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
static __inline__ __m512i __DEFAULT_FN_ATTRS
_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
(__v64qi)_mm512_setzero_si512());
}

static __inline__ __m512i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A,
__m512i __B) {
static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_permutexvar_epi8(
__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) {
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__M,
(__v64qi)_mm512_permutexvar_epi8(__A, __B),
(__v64qi)__W);
Expand Down Expand Up @@ -97,6 +93,6 @@ _mm512_maskz_multishift_epi64_epi8(__mmask64 __M, __m512i __X, __m512i __Y)
(__v64qi)_mm512_multishift_epi64_epi8(__X, __Y),
(__v64qi)_mm512_setzero_si512());
}
#undef __DEFAULT_FN_ATTRS_CONSTEXPR

#undef __DEFAULT_FN_ATTRS
#endif
72 changes: 32 additions & 40 deletions clang/lib/Headers/avx512vbmivlintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,16 @@
#define __VBMIVLINTRIN_H

/* Define the default attributes for the functions in this file. */
#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl"), \
__min_vector_width__(128))) constexpr
#define __DEFAULT_FN_ATTRS256 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl"), \
__min_vector_width__(256))) constexpr
#else
#define __DEFAULT_FN_ATTRS128 \
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl"), \
Expand All @@ -23,111 +33,96 @@
__attribute__((__always_inline__, __nodebug__, \
__target__("avx512vbmi,avx512vl"), \
__min_vector_width__(256)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128 constexpr
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256 constexpr
#else
#define __DEFAULT_FN_ATTRS128_CONSTEXPR __DEFAULT_FN_ATTRS128
#define __DEFAULT_FN_ATTRS256_CONSTEXPR __DEFAULT_FN_ATTRS256
#endif

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
(__v16qi)__I,
(__v16qi)__B);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
__m128i __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8(
__m128i __A, __mmask16 __U, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
__m128i __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8(
__m128i __A, __m128i __I, __mmask16 __U, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)__I);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
__m128i __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8(
__mmask16 __U, __m128i __A, __m128i __I, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128(__U,
(__v16qi)_mm_permutex2var_epi8(__A, __I, __B),
(__v16qi)_mm_setzero_si128());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
(__v32qi)__B);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
__m256i __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8(
__m256i __A, __mmask32 __U, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)__A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
__m256i __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8(
__m256i __A, __m256i __I, __mmask32 __U, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)__I);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
__m256i __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8(
__mmask32 __U, __m256i __A, __m256i __I, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256(__U,
(__v32qi)_mm256_permutex2var_epi8(__A, __I, __B),
(__v32qi)_mm256_setzero_si256());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_permutexvar_epi8(__m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
static __inline__ __m128i __DEFAULT_FN_ATTRS128
_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_permutexvar_epi8(__A, __B),
(__v16qi)_mm_setzero_si128());
}

static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A,
__m128i __B) {
static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8(
__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) {
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
(__v16qi)_mm_permutexvar_epi8(__A, __B),
(__v16qi)__W);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_permutexvar_epi8(__m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
static __inline__ __m256i __DEFAULT_FN_ATTRS256
_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
(__v32qi)_mm256_setzero_si256());
}

static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A,
__m256i __B) {
static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8(
__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) {
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
(__v32qi)_mm256_permutexvar_epi8(__A, __B),
(__v32qi)__W);
Expand Down Expand Up @@ -179,9 +174,6 @@ _mm256_maskz_multishift_epi64_epi8(__mmask32 __M, __m256i __X, __m256i __Y)
(__v32qi)_mm256_setzero_si256());
}

#undef __DEFAULT_FN_ATTRS128_CONSTEXPR
#undef __DEFAULT_FN_ATTRS256_CONSTEXPR
#undef __DEFAULT_FN_ATTRS128
#undef __DEFAULT_FN_ATTRS256

#endif
Loading