Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 17 additions & 44 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -1746,75 +1746,48 @@ let Features = "avx512vl", Attributes = [NoThrow, RequiredVectorWidth<256>] in {
def scattersiv8si : X86Builtin<"void(void *, unsigned char, _Vector<8, int>, _Vector<8, int>, _Constant int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2vard128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Vector<4, int>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermi2vard256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpermi2vard512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpermi2varpd128 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>, _Vector<2, double>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermi2varpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>, _Vector<4, double>)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpermi2varpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>, _Vector<8, double>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpermi2varps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>, _Vector<4, float>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def vpermi2varps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>, _Vector<8, float>)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def vpermi2varps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>, _Vector<16, float>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def vpermi2varq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Vector<2, long long int>)">;
def vpermi2varps128 : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, int>, _Vector<4, float>)">;
def vpermi2varpd128 : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, long long int>, _Vector<2, double>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermi2vard256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Vector<8, int>)">;
def vpermi2varq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Vector<4, long long int>)">;
def vpermi2varps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>, _Vector<8, float>)">;
def vpermi2varpd256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Vector<4, long long int>, _Vector<4, double>)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermi2vard512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Vector<16, int>)">;
def vpermi2varq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Vector<8, long long int>)">;
def vpermi2varps512 : X86Builtin<"_Vector<16, float>(_Vector<16, float>, _Vector<16, int>, _Vector<16, float>)">;
def vpermi2varpd512 : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, long long int>, _Vector<8, double>)">;
}

let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2varqi128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Vector<16, char>)">;
}

let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vbmi,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermi2varqi256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
}

let Features = "avx512vbmi", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512vbmi", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermi2varqi512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>, _Vector<64, char>)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpermi2varhi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Vector<8, short>)">;
}

let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx512vl,avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vpermi2varhi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Vector<16, short>)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
}

Expand Down
88 changes: 85 additions & 3 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3358,18 +3358,46 @@ static bool interp__builtin_ia32_shuffle_generic(
GetSourceIndex) {

assert(Call->getNumArgs() == 3);
unsigned ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();

unsigned ShuffleMask = 0;
Pointer A, MaskVector, B;

QualType Arg2Type = Call->getArg(2)->getType();
bool IsVectorMask = false;
if (Arg2Type->isVectorType()) {
IsVectorMask = true;
B = S.Stk.pop<Pointer>();
MaskVector = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
} else if (Arg2Type->isIntegerType()) {
ShuffleMask = popToAPSInt(S, Call->getArg(2)).getZExtValue();
B = S.Stk.pop<Pointer>();
A = S.Stk.pop<Pointer>();
} else {
return false;
}

QualType Arg0Type = Call->getArg(0)->getType();
const auto *VecT = Arg0Type->castAs<VectorType>();
PrimType ElemT = *S.getContext().classify(VecT->getElementType());
unsigned NumElems = VecT->getNumElements();

const Pointer &B = S.Stk.pop<Pointer>();
const Pointer &A = S.Stk.pop<Pointer>();
const Pointer &Dst = S.Stk.peek<Pointer>();

PrimType MaskElemT = PT_Uint32;
if (IsVectorMask) {
QualType Arg1Type = Call->getArg(1)->getType();
const auto *MaskVecT = Arg1Type->castAs<VectorType>();
QualType MaskElemType = MaskVecT->getElementType();
MaskElemT = *S.getContext().classify(MaskElemType);
}

for (unsigned DstIdx = 0; DstIdx != NumElems; ++DstIdx) {
if (IsVectorMask) {
INT_TYPE_SWITCH(MaskElemT, {
ShuffleMask = static_cast<unsigned>(MaskVector.elem<T>(DstIdx));
});
}
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const Pointer &Src = (SrcVecIdx == 0) ? A : B;
TYPE_SWITCH(ElemT, { Dst.elem<T>(DstIdx) = Src.elem<T>(SrcIdx); });
Expand Down Expand Up @@ -4345,6 +4373,60 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
unsigned Index = (ShuffleMask >> BitIndex) & IndexMask;
return std::pair<unsigned, unsigned>{SrcIdx, LaneOffset + Index};
});
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x1;
unsigned SrcIdx = (ShuffleMask >> 1) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
});
case X86::BI__builtin_ia32_vpermi2vard128:
case X86::BI__builtin_ia32_vpermi2varps128:
case X86::BI__builtin_ia32_vpermi2varq256:
case X86::BI__builtin_ia32_vpermi2varpd256:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x3;
unsigned SrcIdx = (ShuffleMask >> 2) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
});
case X86::BI__builtin_ia32_vpermi2varhi128:
case X86::BI__builtin_ia32_vpermi2vard256:
case X86::BI__builtin_ia32_vpermi2varps256:
case X86::BI__builtin_ia32_vpermi2varq512:
case X86::BI__builtin_ia32_vpermi2varpd512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x7;
unsigned SrcIdx = (ShuffleMask >> 3) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
});
case X86::BI__builtin_ia32_vpermi2varqi128:
case X86::BI__builtin_ia32_vpermi2varhi256:
case X86::BI__builtin_ia32_vpermi2vard512:
case X86::BI__builtin_ia32_vpermi2varps512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0xF;
unsigned SrcIdx = (ShuffleMask >> 4) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
});
case X86::BI__builtin_ia32_vpermi2varqi256:
case X86::BI__builtin_ia32_vpermi2varhi512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x1F;
unsigned SrcIdx = (ShuffleMask >> 5) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
});
case X86::BI__builtin_ia32_vpermi2varqi512:
return interp__builtin_ia32_shuffle_generic(
S, OpPC, Call, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x3F;
unsigned SrcIdx = (ShuffleMask >> 6) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
});
case X86::BI__builtin_ia32_pshufb128:
case X86::BI__builtin_ia32_pshufb256:
case X86::BI__builtin_ia32_pshufb512:
Expand Down
111 changes: 103 additions & 8 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11628,21 +11628,38 @@ static bool evalShuffleGeneric(
if (!VT)
return false;

APSInt MaskImm;
if (!EvaluateInteger(Call->getArg(2), MaskImm, Info))
return false;
unsigned ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
unsigned ShuffleMask = 0;
APValue A, MaskVector, B;
bool IsVectorMask = false;

APValue A, B;
if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
!EvaluateAsRValue(Info, Call->getArg(1), B))
QualType Arg2Type = Call->getArg(2)->getType();
if (Arg2Type->isVectorType()) {
IsVectorMask = true;
if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
!EvaluateAsRValue(Info, Call->getArg(1), MaskVector) ||
!EvaluateAsRValue(Info, Call->getArg(2), B))
return false;
} else if (Arg2Type->isIntegerType()) {
APSInt MaskImm;
if (!EvaluateInteger(Call->getArg(2), MaskImm, Info))
return false;
ShuffleMask = static_cast<unsigned>(MaskImm.getZExtValue());
if (!EvaluateAsRValue(Info, Call->getArg(0), A) ||
!EvaluateAsRValue(Info, Call->getArg(1), B))
return false;
} else {
return false;
}

unsigned NumElts = VT->getNumElements();
SmallVector<APValue, 16> ResultElements;
SmallVector<APValue, 64> ResultElements;
ResultElements.reserve(NumElts);

for (unsigned DstIdx = 0; DstIdx != NumElts; ++DstIdx) {
if (IsVectorMask) {
ShuffleMask = static_cast<unsigned>(
MaskVector.getVectorElt(DstIdx).getInt().getZExtValue());
}
auto [SrcVecIdx, SrcIdx] = GetSourceIndex(DstIdx, ShuffleMask);
const APValue &Src = (SrcVecIdx == 0) ? A : B;
ResultElements.push_back(Src.getVectorElt(SrcIdx));
Expand Down Expand Up @@ -13048,6 +13065,84 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {

return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
case X86::BI__builtin_ia32_vpermi2varq128:
case X86::BI__builtin_ia32_vpermi2varpd128: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x1;
unsigned SrcIdx = (ShuffleMask >> 1) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
}))
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_vpermi2vard128:
case X86::BI__builtin_ia32_vpermi2varps128:
case X86::BI__builtin_ia32_vpermi2varq256:
case X86::BI__builtin_ia32_vpermi2varpd256: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x3;
unsigned SrcIdx = (ShuffleMask >> 2) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
}))
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_vpermi2varhi128:
case X86::BI__builtin_ia32_vpermi2vard256:
case X86::BI__builtin_ia32_vpermi2varps256:
case X86::BI__builtin_ia32_vpermi2varq512:
case X86::BI__builtin_ia32_vpermi2varpd512: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x7;
unsigned SrcIdx = (ShuffleMask >> 3) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
}))
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_vpermi2varqi128:
case X86::BI__builtin_ia32_vpermi2varhi256:
case X86::BI__builtin_ia32_vpermi2vard512:
case X86::BI__builtin_ia32_vpermi2varps512: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0xF;
unsigned SrcIdx = (ShuffleMask >> 4) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
}))
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_vpermi2varqi256:
case X86::BI__builtin_ia32_vpermi2varhi512: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x1F;
unsigned SrcIdx = (ShuffleMask >> 5) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
}))
return false;
return Success(R, E);
}
case X86::BI__builtin_ia32_vpermi2varqi512: {
APValue R;
if (!evalShuffleGeneric(
Info, E, R, [](unsigned DstIdx, unsigned ShuffleMask) {
unsigned offset = ShuffleMask & 0x3F;
unsigned SrcIdx = (ShuffleMask >> 6) & 0x1 ? 1 : 0;
return std::pair<unsigned, unsigned>{SrcIdx, offset};
}))
return false;
return Success(R, E);
}
}
}

Expand Down
9 changes: 8 additions & 1 deletion clang/lib/Headers/avx10_2_512bf16intrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,12 @@ typedef __bf16 __m512bh_u __attribute__((__vector_size__(64), __aligned__(1)));
__attribute__((__always_inline__, __nodebug__, __target__("avx10.2"), \
__min_vector_width__(512)))

#if defined(__cplusplus) && (__cplusplus >= 201103L)
#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512 constexpr
#else
#define __DEFAULT_FN_ATTRS512_CONSTEXPR __DEFAULT_FN_ATTRS512
#endif

static __inline __m512bh __DEFAULT_FN_ATTRS512 _mm512_setzero_pbh(void) {
return __builtin_bit_cast(__m512bh, _mm512_setzero_ps());
}
Expand Down Expand Up @@ -167,7 +173,7 @@ _mm512_mask_blend_pbh(__mmask32 __U, __m512bh __A, __m512bh __W) {
(__v32bf)__A);
}

static __inline__ __m512bh __DEFAULT_FN_ATTRS512
static __inline__ __m512bh __DEFAULT_FN_ATTRS512_CONSTEXPR
_mm512_permutex2var_pbh(__m512bh __A, __m512i __I, __m512bh __B) {
return (__m512bh)__builtin_ia32_vpermi2varhi512((__v32hi)__A, (__v32hi)__I,
(__v32hi)__B);
Expand Down Expand Up @@ -555,6 +561,7 @@ static __inline__ __m512bh __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pbh(
(__v32bf)_mm512_setzero_pbh());
}

#undef __DEFAULT_FN_ATTRS512_CONSTEXPR
#undef __DEFAULT_FN_ATTRS512

#endif
Expand Down
Loading