diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index d03c778740ad3..7f19f025d6af3 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -282,8 +282,6 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">; def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">; - def pslldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; - def psrldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; } let Features = "sse2", @@ -302,6 +300,9 @@ let Features = "sse2", def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">; def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">; + + def pslldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; + def psrldqi128_byteshift : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Constant int)">; } let Features = "sse3", Attributes = [NoThrow] in { @@ -613,12 +614,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; - def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; - def psrldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">; def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">; def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">; @@ -652,10 +651,12 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">; + def pslldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; def psrlqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">; + def psrldqi256_byteshift : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Constant int)">; def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">; def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">; diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 0cb491063057c..85e18f2436885 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -3128,6 +3128,67 @@ static bool interp__builtin_ia32_vpconflict(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_x86_pslldq_byteshift(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 2); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); + uint64_t Shift = ImmAPS.getZExtValue(); + + const Pointer &Src = S.Stk.pop(); + if (!Src.getFieldDesc()->isPrimitiveArray()) + return false; + + unsigned NumElems = Src.getNumElems(); + const Pointer &Dst = S.Stk.peek(); + PrimType ElemT = Src.getFieldDesc()->getPrimType(); + + TYPE_SWITCH(ElemT, { + for (unsigned I = 0; I != NumElems; ++I) { + if (I < Shift) { + Dst.elem(I) = T(); + } else { + Dst.elem(I) = Src.elem(I - Shift); + } + } + }); + + Dst.initializeAllElements(); + + return true; +} + +static bool interp__builtin_x86_psrldq_byteshift(InterpState &S, CodePtr OpPC, + const CallExpr *Call, + unsigned ID) { + assert(Call->getNumArgs() == 2); + + APSInt ImmAPS = popToAPSInt(S, Call->getArg(1)); + uint64_t Shift = ImmAPS.getZExtValue(); + + const Pointer &Src = S.Stk.pop(); + if (!Src.getFieldDesc()->isPrimitiveArray()) + return false; + + unsigned NumElems = Src.getNumElems(); + const Pointer &Dst = S.Stk.peek(); + PrimType ElemT = Src.getFieldDesc()->getPrimType(); + + TYPE_SWITCH(ElemT, { + for (unsigned I = 0; I != NumElems; ++I) { + if (I + Shift < NumElems) + Dst.elem(I) = Src.elem(I + Shift); + else + Dst.elem(I) = T(); + } + }); + + Dst.initializeAllElements(); + + return true; +} + bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, uint32_t BuiltinID) { if (!S.getASTContext().BuiltinInfo.isConstantEvaluated(BuiltinID)) @@ -4149,6 +4210,14 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_vec_set_v4di: return interp__builtin_vec_set(S, OpPC, Call, BuiltinID); + case X86::BI__builtin_ia32_pslldqi128_byteshift: + case X86::BI__builtin_ia32_pslldqi256_byteshift: + return interp__builtin_x86_pslldq_byteshift(S, OpPC, Call, BuiltinID); + + case X86::BI__builtin_ia32_psrldqi128_byteshift: + case X86::BI__builtin_ia32_psrldqi256_byteshift: + return interp__builtin_x86_psrldq_byteshift(S, OpPC, Call, BuiltinID); + default: S.FFDiag(S.Current->getLocation(OpPC), diag::note_invalid_subexpr_in_const_expr) diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index e308c171ed551..db9ab7f0b9e33 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -12790,6 +12790,60 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(Elems.data(), NumElems), E); } + + case X86::BI__builtin_ia32_pslldqi128_byteshift: + case X86::BI__builtin_ia32_pslldqi256_byteshift: { + assert(E->getNumArgs() == 2); + + APValue Src; + APSInt Imm; + if (!EvaluateAsRValue(Info, E->getArg(0), Src) || + !EvaluateInteger(E->getArg(1), Imm, Info)) + return false; + + unsigned VecLen = Src.getVectorLength(); + unsigned Shift = Imm.getZExtValue(); + + SmallVector ResultElements; + for (unsigned I = 0; I != VecLen; ++I) { + if (I < Shift) { + APSInt Zero(8, /*isUnsigned=*/true); + Zero = 0; + ResultElements.push_back(APValue(Zero)); + } else { + ResultElements.push_back(Src.getVectorElt(I - Shift)); + } + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + + case X86::BI__builtin_ia32_psrldqi128_byteshift: + case X86::BI__builtin_ia32_psrldqi256_byteshift: { + assert(E->getNumArgs() == 2); + + APValue Src; + APSInt Imm; + if (!EvaluateAsRValue(Info, E->getArg(0), Src) || + !EvaluateInteger(E->getArg(1), Imm, Info)) + return false; + + unsigned VecLen = Src.getVectorLength(); + unsigned Shift = Imm.getZExtValue(); + + SmallVector ResultElements; + for (unsigned I = 0; I < VecLen; ++I) { + if (I + Shift < VecLen) { + ResultElements.push_back(Src.getVectorElt(I + Shift)); + } else { + APSInt Zero(8, /*isUnsigned=*/true); + Zero = 0; + ResultElements.push_back(APValue(Zero)); + } + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } } } diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index a505d70a98203..79ba172ad0938 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1225,6 +1225,8 @@ __m256i test_mm256_slli_si256(__m256i a) { // CHECK: shufflevector <32 x i8> zeroinitializer, <32 x i8> %{{.*}}, <32 x i32> return _mm256_slli_si256(a, 3); } +TEST_CONSTEXPR(match_v32qi(_mm256_slli_si256(((__m256i)(__v32qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 3), 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29)); +TEST_CONSTEXPR(match_v32qi(_mm256_slli_si256(((__m256i)(__v32qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_sllv_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_sllv_epi32 @@ -1368,6 +1370,8 @@ __m256i test_mm256_srli_si256(__m256i a) { // CHECK: shufflevector <32 x i8> %{{.*}}, <32 x i8> zeroinitializer, <32 x i32> return _mm256_srli_si256(a, 3); } +TEST_CONSTEXPR(match_v32qi(_mm256_srli_si256(((__m256i)(__v32qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 3), 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 0, 0)); +TEST_CONSTEXPR(match_v32qi(_mm256_srli_si256(((__m256i)(__v32qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31}), 32), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_srlv_epi32(__m128i a, __m128i b) { // CHECK-LABEL: test_mm_srlv_epi32 diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index ade7ef39a008a..4faae3f1f35d1 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1413,6 +1413,8 @@ __m128i test_mm_slli_si128(__m128i A) { // CHECK: shufflevector <16 x i8> zeroinitializer, <16 x i8> %{{.*}}, <16 x i32> return _mm_slli_si128(A, 5); } +TEST_CONSTEXPR(match_v16qi(_mm_slli_si128(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 5), 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11)); +TEST_CONSTEXPR(match_v16qi(_mm_slli_si128(((__m128i)(__v16qi){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_slli_si128_2(__m128i A) { // CHECK-LABEL: test_mm_slli_si128_2 @@ -1565,6 +1567,8 @@ __m128i test_mm_srli_si128(__m128i A) { // CHECK: shufflevector <16 x i8> %{{.*}}, <16 x i8> zeroinitializer, <16 x i32> return _mm_srli_si128(A, 5); } +TEST_CONSTEXPR(match_v16qi(_mm_srli_si128(((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 5), 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0)); +TEST_CONSTEXPR(match_v16qi(_mm_srli_si128(((__m128i)(__v16qi){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), 16), 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)); __m128i test_mm_srli_si128_2(__m128i A) { // CHECK-LABEL: test_mm_srli_si128_2