Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 24 additions & 36 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -210,17 +210,6 @@ let Header = "emmintrin.h", Attributes = [NoThrow, RequireDeclaration] in {
def _mm_pause : X86LibBuiltin<"void()">;
}

let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
}

let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def movmskpd : X86Builtin<"int(_Vector<2, double>)">;
def pmovmskb128 : X86Builtin<"int(_Vector<16, char>)">;
Expand Down Expand Up @@ -261,6 +250,15 @@ let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
def vec_ext_v4sf : X86Builtin<"float(_Vector<4, float>, _Constant int)">;
def vec_ext_v8hi : X86Builtin<"short(_Vector<8, short>, _Constant int)">;
def vec_set_v8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, short, _Constant int)">;

def psraw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrad128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def psrld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psrlq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
}

let Features = "sse3", Attributes = [NoThrow] in {
Expand Down Expand Up @@ -579,14 +577,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
def psadbw256
: X86Builtin<
"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def permdf256 : X86Builtin<"_Vector<4, double>(_Vector<4, double>, _Constant int)">;
def permti256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def permdi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
Expand Down Expand Up @@ -663,6 +653,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi

def permvarsi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
def permvarsf256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Vector<8, int>)">;

def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
}

let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
Expand Down Expand Up @@ -1926,16 +1925,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def prorq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pmaddubsw512 : X86Builtin<"_Vector<32, short>(_Vector<64, char>, _Vector<64, char>)">;
def pmaddwd512 : X86Builtin<"_Vector<16, int>(_Vector<32, short>, _Vector<32, short>)">;
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
}

let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
Expand Down Expand Up @@ -1991,7 +1987,7 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect
def psravq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>)">;
}

let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def psraw512
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
def psrlw512
Expand Down Expand Up @@ -2308,25 +2304,17 @@ let Features = "avx512f",
def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
}

let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
}

let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
}

let Features = "avx512vl",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
}

let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
def pslld512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
def psllq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<2, long long int>)">;
def psrad512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<4, int>)">;
Expand Down
105 changes: 105 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3468,6 +3468,69 @@ static bool interp__builtin_ia32_shuffle_generic(
return true;
}

static bool interp__builtin_ia32_shift_with_count(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {

assert(Call->getNumArgs() == 2);

const Pointer &Count = S.Stk.pop<Pointer>();
const Pointer &Source = S.Stk.pop<Pointer>();

QualType SourceType = Call->getArg(0)->getType();
QualType CountType = Call->getArg(1)->getType();
assert(SourceType->isVectorType() && CountType->isVectorType());

const auto *SourceVecT = SourceType->castAs<VectorType>();
const auto *CountVecT = CountType->castAs<VectorType>();
PrimType SourceElemT = *S.getContext().classify(SourceVecT->getElementType());
PrimType CountElemT = *S.getContext().classify(CountVecT->getElementType());

const Pointer &Dst = S.Stk.peek<Pointer>();

unsigned DestEltWidth =
S.getASTContext().getTypeSize(SourceVecT->getElementType());
bool IsDestUnsigned = SourceVecT->getElementType()->isUnsignedIntegerType();
unsigned DestLen = SourceVecT->getNumElements();
unsigned CountEltWidth =
S.getASTContext().getTypeSize(CountVecT->getElementType());
unsigned NumBitsInQWord = 64;
unsigned NumCountElts = NumBitsInQWord / CountEltWidth;

uint64_t CountLQWord = 0;
for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
uint64_t Elt = 0;
INT_TYPE_SWITCH(CountElemT,
{ Elt = static_cast<uint64_t>(Count.elem<T>(EltIdx)); });
CountLQWord |= (Elt << (EltIdx * CountEltWidth));
}

for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
APSInt Elt;
INT_TYPE_SWITCH(SourceElemT, { Elt = Source.elem<T>(EltIdx).toAPSInt(); });

APInt Result;
if (CountLQWord < DestEltWidth) {
Result = ShiftOp(Elt, CountLQWord);
} else {
Result = OverflowOp(Elt, DestEltWidth);
}
if (IsDestUnsigned) {
INT_TYPE_SWITCH(SourceElemT, {
Dst.elem<T>(EltIdx) = T::from(Result.getZExtValue());
});
} else {
INT_TYPE_SWITCH(SourceElemT, {
Dst.elem<T>(EltIdx) = T::from(Result.getSExtValue());
});
}
}

Dst.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_shufbitqmb_mask(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {

Expand Down Expand Up @@ -4971,6 +5034,48 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
case X86::BI__builtin_ia32_phminposuw128:
return interp__builtin_ia32_phminposuw(S, OpPC, Call);

case X86::BI__builtin_ia32_psraq128:
case X86::BI__builtin_ia32_psraq256:
case X86::BI__builtin_ia32_psraq512:
case X86::BI__builtin_ia32_psrad128:
case X86::BI__builtin_ia32_psrad256:
case X86::BI__builtin_ia32_psrad512:
case X86::BI__builtin_ia32_psraw128:
case X86::BI__builtin_ia32_psraw256:
case X86::BI__builtin_ia32_psraw512:
return interp__builtin_ia32_shift_with_count(
S, OpPC, Call,
[](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); },
[](const APInt &Elt, unsigned Width) { return Elt.ashr(Width - 1); });

case X86::BI__builtin_ia32_psllq128:
case X86::BI__builtin_ia32_psllq256:
case X86::BI__builtin_ia32_psllq512:
case X86::BI__builtin_ia32_pslld128:
case X86::BI__builtin_ia32_pslld256:
case X86::BI__builtin_ia32_pslld512:
case X86::BI__builtin_ia32_psllw128:
case X86::BI__builtin_ia32_psllw256:
case X86::BI__builtin_ia32_psllw512:
return interp__builtin_ia32_shift_with_count(
S, OpPC, Call,
[](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); },
[](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); });

case X86::BI__builtin_ia32_psrlq128:
case X86::BI__builtin_ia32_psrlq256:
case X86::BI__builtin_ia32_psrlq512:
case X86::BI__builtin_ia32_psrld128:
case X86::BI__builtin_ia32_psrld256:
case X86::BI__builtin_ia32_psrld512:
case X86::BI__builtin_ia32_psrlw128:
case X86::BI__builtin_ia32_psrlw256:
case X86::BI__builtin_ia32_psrlw512:
return interp__builtin_ia32_shift_with_count(
S, OpPC, Call,
[](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); },
[](const APInt &Elt, unsigned Width) { return APInt::getZero(Width); });

case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
Expand Down
106 changes: 106 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12166,6 +12166,52 @@ static bool evalShuffleGeneric(
return true;
}

static bool evalShiftWithCount(
EvalInfo &Info, const CallExpr *Call, APValue &Out,
llvm::function_ref<APInt(const APInt &, uint64_t)> ShiftOp,
llvm::function_ref<APInt(const APInt &, unsigned)> OverflowOp) {

APValue Source, Count;
if (!EvaluateAsRValue(Info, Call->getArg(0), Source) ||
!EvaluateAsRValue(Info, Call->getArg(1), Count))
return false;

assert(Call->getNumArgs() == 2);

QualType SourceTy = Call->getArg(0)->getType();
QualType CountTy = Call->getArg(1)->getType();
assert(SourceTy->isVectorType() && CountTy->isVectorType());

QualType DestEltTy = SourceTy->castAs<VectorType>()->getElementType();
unsigned DestEltWidth = Source.getVectorElt(0).getInt().getBitWidth();
unsigned DestLen = Source.getVectorLength();
bool IsDestUnsigned = DestEltTy->isUnsignedIntegerType();
unsigned CountEltWidth = Count.getVectorElt(0).getInt().getBitWidth();
unsigned NumBitsInQWord = 64;
unsigned NumCountElts = NumBitsInQWord / CountEltWidth;
SmallVector<APValue, 64> Result;
Result.reserve(DestLen);

uint64_t CountLQWord = 0;
for (unsigned EltIdx = 0; EltIdx != NumCountElts; ++EltIdx) {
uint64_t Elt = Count.getVectorElt(EltIdx).getInt().getZExtValue();
CountLQWord |= (Elt << (EltIdx * CountEltWidth));
}

for (unsigned EltIdx = 0; EltIdx != DestLen; ++EltIdx) {
APInt Elt = Source.getVectorElt(EltIdx).getInt();
if (CountLQWord < DestEltWidth) {
Result.push_back(
APValue(APSInt(ShiftOp(Elt, CountLQWord), IsDestUnsigned)));
} else {
Result.push_back(
APValue(APSInt(OverflowOp(Elt, DestEltWidth), IsDestUnsigned)));
}
}
Out = APValue(Result.data(), Result.size());
return true;
}

bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
if (!IsConstantEvaluatedBuiltinCall(E))
return ExprEvaluatorBaseTy::VisitCallExpr(E);
Expand Down Expand Up @@ -13169,6 +13215,66 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(Result.data(), Result.size()), E);
}

case X86::BI__builtin_ia32_psraq128:
case X86::BI__builtin_ia32_psraq256:
case X86::BI__builtin_ia32_psraq512:
case X86::BI__builtin_ia32_psrad128:
case X86::BI__builtin_ia32_psrad256:
case X86::BI__builtin_ia32_psrad512:
case X86::BI__builtin_ia32_psraw128:
case X86::BI__builtin_ia32_psraw256:
case X86::BI__builtin_ia32_psraw512: {
APValue R;
if (!evalShiftWithCount(
Info, E, R,
[](const APInt &Elt, uint64_t Count) { return Elt.ashr(Count); },
[](const APInt &Elt, unsigned Width) {
return Elt.ashr(Width - 1);
}))
return false;
return Success(R, E);
}

case X86::BI__builtin_ia32_psllq128:
case X86::BI__builtin_ia32_psllq256:
case X86::BI__builtin_ia32_psllq512:
case X86::BI__builtin_ia32_pslld128:
case X86::BI__builtin_ia32_pslld256:
case X86::BI__builtin_ia32_pslld512:
case X86::BI__builtin_ia32_psllw128:
case X86::BI__builtin_ia32_psllw256:
case X86::BI__builtin_ia32_psllw512: {
APValue R;
if (!evalShiftWithCount(
Info, E, R,
[](const APInt &Elt, uint64_t Count) { return Elt.shl(Count); },
[](const APInt &Elt, unsigned Width) {
return APInt::getZero(Width);
}))
return false;
return Success(R, E);
}

case X86::BI__builtin_ia32_psrlq128:
case X86::BI__builtin_ia32_psrlq256:
case X86::BI__builtin_ia32_psrlq512:
case X86::BI__builtin_ia32_psrld128:
case X86::BI__builtin_ia32_psrld256:
case X86::BI__builtin_ia32_psrld512:
case X86::BI__builtin_ia32_psrlw128:
case X86::BI__builtin_ia32_psrlw256:
case X86::BI__builtin_ia32_psrlw512: {
APValue R;
if (!evalShiftWithCount(
Info, E, R,
[](const APInt &Elt, uint64_t Count) { return Elt.lshr(Count); },
[](const APInt &Elt, unsigned Width) {
return APInt::getZero(Width);
}))
return false;
return Success(R, E);
}

case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
Expand Down
Loading
Loading