Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions clang/include/clang/Basic/BuiltinsX86.td
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>]
def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">;
def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, "
"_Vector<2,double>, _Constant char)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;
def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, "
"_Vector<16, char>, _Constant char)">;
}

let Features = "sse4.1",
Expand All @@ -358,6 +358,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector

def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">;

def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">;
def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">;
Expand Down
42 changes: 42 additions & 0 deletions clang/lib/AST/ByteCode/InterpBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3003,6 +3003,45 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC,
return true;
}

static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC,
const CallExpr *Call) {
assert(Call->getNumArgs() == 1);

const Pointer &Source = S.Stk.pop<Pointer>();
const Pointer &Dest = S.Stk.peek<Pointer>();

unsigned SourceLen = Source.getNumElems();
QualType ElemQT = getElemType(Source);
OptPrimType ElemT = S.getContext().classify(ElemQT);
unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT);

bool DestUnsigned = Call->getCallReturnType(S.getASTContext())
->castAs<VectorType>()
->getElementType()
->isUnsignedIntegerOrEnumerationType();

INT_TYPE_SWITCH_NO_BOOL(*ElemT, {
APSInt MinIndex(ElemBitWidth, DestUnsigned);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Its probably better to just use:

Suggested change
APSInt MinIndex(ElemBitWidth, DestUnsigned);
unsigned MinIndex = 0;

We never make use of APSInt/APInt features for the index.

APSInt MinVal = Source.elem<T>(0).toAPSInt();

for (unsigned I = 1; I != SourceLen; ++I) {
APSInt Val = Source.elem<T>(I).toAPSInt();
if (MinVal.ugt(Val)) {
MinVal = Val;
MinIndex = I;
}
}

Dest.elem<T>(0) = static_cast<T>(MinVal);
Dest.elem<T>(1) = static_cast<T>(MinIndex);
for (unsigned I = 2; I != SourceLen; ++I) {
Dest.elem<T>(I) = static_cast<T>(APSInt(ElemBitWidth, DestUnsigned));
}
});
Dest.initializeAllElements();
return true;
}

static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC,
const CallExpr *Call, bool MaskZ) {
assert(Call->getNumArgs() == 5);
Expand Down Expand Up @@ -4087,6 +4126,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
S, OpPC, Call,
[](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; });

case X86::BI__builtin_ia32_phminposuw128:
return interp__builtin_ia32_phminposuw(S, OpPC, Call);

case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
Expand Down
34 changes: 34 additions & 0 deletions clang/lib/AST/ExprConstant.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12353,6 +12353,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(R, E);
}

case X86::BI__builtin_ia32_phminposuw128: {
APValue Source;
if (!Evaluate(Source, Info, E->getArg(0)))
return false;
unsigned SourceLen = Source.getVectorLength();
const VectorType *VT = E->getArg(0)->getType()->castAs<VectorType>();
QualType ElemQT = VT->getElementType();
unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT);

APInt MinIndex(ElemBitWidth, 0);
APInt MinVal = Source.getVectorElt(0).getInt();
for (unsigned I = 1; I != SourceLen; ++I) {
APInt Val = Source.getVectorElt(I).getInt();
if (MinVal.ugt(Val)) {
MinVal = Val;
MinIndex = I;
}
}

bool ResultUnsigned = E->getCallReturnType(Info.Ctx)
->castAs<VectorType>()
->getElementType()
->isUnsignedIntegerOrEnumerationType();

SmallVector<APValue, 8> Result;
Result.reserve(SourceLen);
Result.emplace_back(APSInt(MinVal, ResultUnsigned));
Result.emplace_back(APSInt(MinIndex, ResultUnsigned));
for (unsigned I = 0; I != SourceLen - 2; ++I) {
Result.emplace_back(APSInt(APInt(ElemBitWidth, 0), ResultUnsigned));
}
return Success(APValue(Result.data(), Result.size()), E);
}

case X86::BI__builtin_ia32_pternlogd128_mask:
case X86::BI__builtin_ia32_pternlogd256_mask:
case X86::BI__builtin_ia32_pternlogd512_mask:
Expand Down
3 changes: 2 additions & 1 deletion clang/lib/Headers/smmintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -1524,7 +1524,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) {
/// \returns A 128-bit value where bits [15:0] contain the minimum value found
/// in parameter \a __V, bits [18:16] contain the index of the minimum value
/// and the remaining bits are set to 0.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) {
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
_mm_minpos_epu16(__m128i __V) {
return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V);
}

Expand Down
10 changes: 10 additions & 0 deletions clang/test/CodeGen/X86/sse41-builtins.c
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,16 @@ __m128i test_mm_minpos_epu16(__m128i x) {
// CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}})
return _mm_minpos_epu16(x);
}
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1,0,0,0, 0,0,0,0}), 0,1,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){65535,65535,65535,65535,65535,65535,65535,65535}), 65535,0,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){9,8,7,6,5,4,3,2}), 2,7,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,5,5,5,5,5,5,5}), 5,0,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,7,9,4,10,4,11,12}), 4,3,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){6,0,0,0,0,0,0,0}), 0,1,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1000,2000,3000,4000,5000,6000,7000,1}), 1,7,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1234,5678,42,9999,65535,0,4242,42}), 0,5,0,0, 0,0,0,0));
TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){400,500,12,600,12,700,800,900}), 12,2,0,0, 0,0,0,0));

__m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) {
// CHECK-LABEL: test_mm_mpsadbw_epu8
Expand Down
Loading