diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index d03c778740ad3..b9a76cf210e7f 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -334,8 +334,8 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] def dpps : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Constant char)">; def dppd : X86Builtin<"_Vector<2, double>(_Vector<2, double>, " "_Vector<2,double>, _Constant char)">; - def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>, _Constant char)">; - def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">; + def mpsadbw128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, " + "_Vector<16, char>, _Constant char)">; } let Features = "sse4.1", @@ -358,6 +358,7 @@ let Features = "sse4.1", Attributes = [NoThrow, Const, Constexpr, RequiredVector def pmuldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">; def packusdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">; + def phminposuw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>)">; def vec_ext_v16qi : X86Builtin<"char(_Vector<16, char>, _Constant int)">; def vec_set_v16qi : X86Builtin<"_Vector<16, char>(_Vector<16, char>, char, _Constant int)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0cb491063057c..0de80d4549aa8 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3003,6 +3003,45 @@ static bool interp__builtin_x86_insert_subvector(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_phminposuw(InterpState &S, CodePtr OpPC, + const CallExpr *Call) { + assert(Call->getNumArgs() == 1); + + const Pointer &Source = S.Stk.pop(); + const Pointer &Dest = S.Stk.peek(); + + unsigned SourceLen = Source.getNumElems(); + QualType ElemQT = getElemType(Source); + OptPrimType ElemT = S.getContext().classify(ElemQT); + unsigned ElemBitWidth = S.getASTContext().getTypeSize(ElemQT); + + bool DestUnsigned = Call->getCallReturnType(S.getASTContext()) + ->castAs() + ->getElementType() + ->isUnsignedIntegerOrEnumerationType(); + + INT_TYPE_SWITCH_NO_BOOL(*ElemT, { + APSInt MinIndex(ElemBitWidth, DestUnsigned); + APSInt MinVal = Source.elem(0).toAPSInt(); + + for (unsigned I = 1; I != SourceLen; ++I) { + APSInt Val = Source.elem(I).toAPSInt(); + if (MinVal.ugt(Val)) { + MinVal = Val; + MinIndex = I; + } + } + + Dest.elem(0) = static_cast(MinVal); + Dest.elem(1) = static_cast(MinIndex); + for (unsigned I = 2; I != SourceLen; ++I) { + Dest.elem(I) = static_cast(APSInt(ElemBitWidth, DestUnsigned)); + } + }); + Dest.initializeAllElements(); + return true; +} + static bool interp__builtin_ia32_pternlog(InterpState &S, CodePtr OpPC, const CallExpr *Call, bool MaskZ) { assert(Call->getNumArgs() == 5); @@ -4087,6 +4126,9 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_phminposuw128: + return interp__builtin_ia32_phminposuw(S, OpPC, Call); + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e308c171ed551..b82d5411b998c 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12353,6 +12353,40 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(R, E); } + case X86::BI__builtin_ia32_phminposuw128: { + APValue Source; + if (!Evaluate(Source, Info, E->getArg(0))) + return false; + unsigned SourceLen = Source.getVectorLength(); + const VectorType *VT = E->getArg(0)->getType()->castAs(); + QualType ElemQT = VT->getElementType(); + unsigned ElemBitWidth = Info.Ctx.getTypeSize(ElemQT); + + APInt MinIndex(ElemBitWidth, 0); + APInt MinVal = Source.getVectorElt(0).getInt(); + for (unsigned I = 1; I != SourceLen; ++I) { + APInt Val = Source.getVectorElt(I).getInt(); + if (MinVal.ugt(Val)) { + MinVal = Val; + MinIndex = I; + } + } + + bool ResultUnsigned = E->getCallReturnType(Info.Ctx) + ->castAs() + ->getElementType() + ->isUnsignedIntegerOrEnumerationType(); + + SmallVector Result; + Result.reserve(SourceLen); + Result.emplace_back(APSInt(MinVal, ResultUnsigned)); + Result.emplace_back(APSInt(MinIndex, ResultUnsigned)); + for (unsigned I = 0; I != SourceLen - 2; ++I) { + Result.emplace_back(APSInt(APInt(ElemBitWidth, 0), ResultUnsigned)); + } + return Success(APValue(Result.data(), Result.size()), E); + } + case X86::BI__builtin_ia32_pternlogd128_mask: case X86::BI__builtin_ia32_pternlogd256_mask: case X86::BI__builtin_ia32_pternlogd512_mask: diff --git a/clang/lib/Headers/smmintrin.h b/clang/lib/Headers/smmintrin.h index 4f197d5ecaff9..511a135375295 100644 --- a/clang/lib/Headers/smmintrin.h +++ b/clang/lib/Headers/smmintrin.h @@ -1524,7 +1524,8 @@ _mm_packus_epi32(__m128i __V1, __m128i __V2) { /// \returns A 128-bit value where bits [15:0] contain the minimum value found /// in parameter \a __V, bits [18:16] contain the index of the minimum value /// and the remaining bits are set to 0. -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_minpos_epu16(__m128i __V) { +static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR +_mm_minpos_epu16(__m128i __V) { return (__m128i)__builtin_ia32_phminposuw128((__v8hi)__V); } diff --git a/clang/test/CodeGen/X86/sse41-builtins.c b/clang/test/CodeGen/X86/sse41-builtins.c index 89a7ac29e7db7..62cd392824bb2 100644 --- a/clang/test/CodeGen/X86/sse41-builtins.c +++ b/clang/test/CodeGen/X86/sse41-builtins.c @@ -376,6 +376,16 @@ __m128i test_mm_minpos_epu16(__m128i x) { // CHECK: call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %{{.*}}) return _mm_minpos_epu16(x); } +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){0,0,0,0, 0,0,0,0}), 0,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1,0,0,0, 0,0,0,0}), 0,1,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){65535,65535,65535,65535,65535,65535,65535,65535}), 65535,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){9,8,7,6,5,4,3,2}), 2,7,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,5,5,5,5,5,5,5}), 5,0,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){5,7,9,4,10,4,11,12}), 4,3,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){6,0,0,0,0,0,0,0}), 0,1,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1000,2000,3000,4000,5000,6000,7000,1}), 1,7,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){1234,5678,42,9999,65535,0,4242,42}), 0,5,0,0, 0,0,0,0)); +TEST_CONSTEXPR(match_v8hu(_mm_minpos_epu16((__m128i)(__v8hu){400,500,12,600,12,700,800,900}), 12,2,0,0, 0,0,0,0)); __m128i test_mm_mpsadbw_epu8(__m128i x, __m128i y) { // CHECK-LABEL: test_mm_mpsadbw_epu8