Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -319,14 +319,22 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def roundsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
def roundpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Constant int)">;
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant char)">;
def ptestz128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestnzc128 : X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
"_Vector<2,double>, _Constant char)">;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(style) don't alter lines unrelated to patch

def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
}

let Features = "sse4.1",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def ptestz128
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestc128
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
def ptestnzc128
: X86Builtin<"int(_Vector<2, long long int>, _Vector<2, long long int>)">;
}

let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def pblendw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
def blendpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, _Vector<2, double>, _Constant int)">;
Expand Down Expand Up @@ -516,8 +524,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def roundps256 : X86Builtin<"_Vector<8, float>(_Vector<8, float>, _Constant int)">;
}


let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
let Features = "avx",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vtestzpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
def vtestnzcpd : X86Builtin<"int(_Vector<2, double>, _Vector<2, double>)">;
Expand All @@ -526,7 +534,8 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in
def vtestnzcps : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>)">;
}

let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
let Features = "avx",
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
def vtestzpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
def vtestnzcpd256 : X86Builtin<"int(_Vector<4, double>, _Vector<4, double>)">;
Expand All @@ -536,6 +545,10 @@ let Features = "avx", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in
def ptestz256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
def ptestnzc256 : X86Builtin<"int(_Vector<4, long long int>, _Vector<4, long long int>)">;
}

let Features = "avx",
Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
def movmskpd256 : X86Builtin<"int(_Vector<4, double>)">;
def movmskps256 : X86Builtin<"int(_Vector<8, float>)">;
}
Expand Down
69 changes: 68 additions & 1 deletion clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2817,6 +2817,46 @@ static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_test_op(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<bool(const APInt &A, const APInt &B)> Fn) {
const Pointer &RHS = S.Stk.pop<Pointer>();
const Pointer &LHS = S.Stk.pop<Pointer>();

assert(LHS.getNumElems() == RHS.getNumElems());

unsigned SourceLen = LHS.getNumElems();
const QualType ElemQT = getElemType(LHS);
const OptPrimType ElemPT = S.getContext().classify(ElemQT);
unsigned LaneWidth = S.getASTContext().getTypeSize(ElemQT);
APInt SignMask = APInt::getSignMask(LaneWidth);

APInt AWide(LaneWidth * SourceLen, 0);
APInt BWide(LaneWidth * SourceLen, 0);

for (unsigned I = 0; I != SourceLen; ++I) {
APInt ALane;
APInt BLane;

if (ElemQT->isIntegerType()) { // Get value
INT_TYPE_SWITCH_NO_BOOL(*ElemPT, {
ALane = LHS.elem<T>(I).toAPSInt();
BLane = RHS.elem<T>(I).toAPSInt();
});
} else if (ElemQT->isFloatingType()) { // Get only sign bit
using T = PrimConv<PT_Float>::T;
ALane = LHS.elem<T>(I).getAPFloat().bitcastToAPInt() & SignMask;
BLane = RHS.elem<T>(I).getAPFloat().bitcastToAPInt() & SignMask;
} else { // Must be integer or floating type
return false;
}
AWide.insertBits(ALane, I * LaneWidth);
BWide.insertBits(BLane, I * LaneWidth);
}
pushInteger(S, Fn(AWide, BWide), Call->getType());
return true;
}

static bool interp__builtin_elementwise_triop(
InterpState &S, CodePtr OpPC, const CallExpr *Call,
llvm::function_ref<APInt(const APSInt &, const APSInt &, const APSInt &)>
Expand Down Expand Up @@ -3678,7 +3718,34 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call, [](const APSInt &F, const APSInt &T, const APSInt &C) {
return ((APInt)C).isNegative() ? T : F;
});

case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256:
case X86::BI__builtin_ia32_vtestzps:
case X86::BI__builtin_ia32_vtestzps256:
case X86::BI__builtin_ia32_vtestzpd:
case X86::BI__builtin_ia32_vtestzpd256:
return interp__builtin_ia32_test_op(
S, OpPC, Call,
[](const APInt &A, const APInt &B) { return (A & B) == 0; });
case X86::BI__builtin_ia32_ptestc128:
case X86::BI__builtin_ia32_ptestc256:
case X86::BI__builtin_ia32_vtestcps:
case X86::BI__builtin_ia32_vtestcps256:
case X86::BI__builtin_ia32_vtestcpd:
case X86::BI__builtin_ia32_vtestcpd256:
return interp__builtin_ia32_test_op(
S, OpPC, Call,
[](const APInt &A, const APInt &B) { return (~A & B) == 0; });
case X86::BI__builtin_ia32_ptestnzc128:
case X86::BI__builtin_ia32_ptestnzc256:
case X86::BI__builtin_ia32_vtestnzcps:
case X86::BI__builtin_ia32_vtestnzcps256:
case X86::BI__builtin_ia32_vtestnzcpd:
case X86::BI__builtin_ia32_vtestnzcpd256:
return interp__builtin_ia32_test_op(
S, OpPC, Call, [](const APInt &A, const APInt &B) {
return ((A & B) != 0) && ((~A & B) != 0);
});
case X86::BI__builtin_ia32_selectb_128:
case X86::BI__builtin_ia32_selectb_256:
case X86::BI__builtin_ia32_selectb_512:
Expand Down
65 changes: 64 additions & 1 deletion clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13766,6 +13766,42 @@ static bool getBuiltinAlignArguments(const CallExpr *E, EvalInfo &Info,

bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
unsigned BuiltinOp) {
auto EvalTestOp =
[&](llvm::function_ref<bool(const APInt &, const APInt &)> Fn) {
APValue SourceLHS, SourceRHS;
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
return false;

unsigned SourceLen = SourceLHS.getVectorLength();
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
const QualType ElemQT = VT->getElementType();
unsigned LaneWidth = Info.Ctx.getTypeSize(ElemQT);
APInt SignMask = APInt::getSignMask(LaneWidth);

APInt AWide(LaneWidth * SourceLen, 0);
APInt BWide(LaneWidth * SourceLen, 0);

for (unsigned I = 0; I != SourceLen; ++I) {
APInt ALane;
APInt BLane;

if (ElemQT->isIntegerType()) { // Get value
ALane = SourceLHS.getVectorElt(I).getInt();
BLane = SourceRHS.getVectorElt(I).getInt();
} else if (ElemQT->isFloatingType()) { // Get only sign bit
ALane = SourceLHS.getVectorElt(I).getFloat().bitcastToAPInt() &
SignMask;
BLane = SourceRHS.getVectorElt(I).getFloat().bitcastToAPInt() &
SignMask;
} else { // Must be integer or floating type
return false;
}
AWide.insertBits(ALane, I * LaneWidth);
BWide.insertBits(BLane, I * LaneWidth);
}
return Success(Fn(AWide, BWide), E);
};

auto HandleMaskBinOp =
[&](llvm::function_ref<APSInt(const APSInt &, const APSInt &)> Fn)
Expand Down Expand Up @@ -14879,7 +14915,34 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
Result.setBitVal(P++, Val[I]);
return Success(Result, E);
}

case X86::BI__builtin_ia32_ptestz128:
case X86::BI__builtin_ia32_ptestz256:
case X86::BI__builtin_ia32_vtestzps:
case X86::BI__builtin_ia32_vtestzps256:
case X86::BI__builtin_ia32_vtestzpd:
case X86::BI__builtin_ia32_vtestzpd256: {
return EvalTestOp(
[](const APInt &A, const APInt &B) { return (A & B) == 0; });
}
case X86::BI__builtin_ia32_ptestc128:
case X86::BI__builtin_ia32_ptestc256:
case X86::BI__builtin_ia32_vtestcps:
case X86::BI__builtin_ia32_vtestcps256:
case X86::BI__builtin_ia32_vtestcpd:
case X86::BI__builtin_ia32_vtestcpd256: {
return EvalTestOp(
[](const APInt &A, const APInt &B) { return (~A & B) == 0; });
}
case X86::BI__builtin_ia32_ptestnzc128:
case X86::BI__builtin_ia32_ptestnzc256:
case X86::BI__builtin_ia32_vtestnzcps:
case X86::BI__builtin_ia32_vtestnzcps256:
case X86::BI__builtin_ia32_vtestnzcpd:
case X86::BI__builtin_ia32_vtestnzcpd256: {
return EvalTestOp([](const APInt &A, const APInt &B) {
return ((A & B) != 0) && ((~A & B) != 0);
});
}
case X86::BI__builtin_ia32_kandqi:
case X86::BI__builtin_ia32_kandhi:
case X86::BI__builtin_ia32_kandsi:
Expand Down
Loading