diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index d8eb0bed3e3e5..a0181b7ae8f9d 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -216,10 +216,13 @@ let Features = "sse2", Attributes = [NoThrow] in { def movnti : X86Builtin<"void(int *, int)">; } -let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; +let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def pshuflw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; + def pshufd : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Constant int)">; def pshufhw : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Constant int)">; +} + +let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { def psadbw128 : X86Builtin<"_Vector<2, long long int>(_Vector<16, char>, _Vector<16, char>)">; def sqrtpd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; def sqrtsd : X86Builtin<"_Vector<2, double>(_Vector<2, double>)">; @@ -584,9 +587,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">; def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; - def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; - def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; - def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">; def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">; def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">; @@ -647,6 +647,10 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi def packsswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; def packssdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">; def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">; + + def pshuflw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; + def pshufhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Constant int)">; + def pshufd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Constant int)">; } let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { @@ -1017,6 +1021,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512> let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def pmuldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">; def pmuludq512 : X86Builtin<"_Vector<8, long long int>(_Vector<16, int>, _Vector<16, int>)">; + def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; } let Features = "avx512f", Attributes = [NoThrow, RequiredVectorWidth<512>] in { @@ -1990,13 +1995,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVect } let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; - def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">; } let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">; + def pshufhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; + def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">; } let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { @@ -2026,8 +2031,7 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, Constexpr, Req def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">; } -let Features = "avx512f", - Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">; def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">; def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">; @@ -3266,7 +3270,6 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> } let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def pshufd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Constant int)">; def expanddf512_mask : X86Builtin<"_Vector<8, double>(_Vector<8, double>, _Vector<8, double>, unsigned char)">; def expanddi512_mask : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, unsigned char)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 5b3aa26e84248..6053237b1a261 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2773,6 +2773,50 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC, return true; } +static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC, + const CallExpr *Call, bool IsShufHW) { + assert(Call->getNumArgs() == 2 && "masked forms handled via select*"); + APSInt ControlImm = popToAPSInt(S, Call->getArg(1)); + const Pointer &Src = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + + unsigned NumElems = Dst.getNumElems(); + PrimType ElemT = Dst.getFieldDesc()->getPrimType(); + + unsigned ElemBits = static_cast(primSize(ElemT) * 8); + if (ElemBits != 16 && ElemBits != 32) + return false; + + unsigned LaneElts = 128u / ElemBits; + assert(LaneElts && (NumElems % LaneElts == 0)); + + uint8_t Ctl = static_cast(ControlImm.getZExtValue()); + + for (unsigned Idx = 0; Idx != NumElems; Idx++) { + unsigned LaneBase = (Idx / LaneElts) * LaneElts; + unsigned LaneIdx = Idx % LaneElts; + unsigned SrcIdx = Idx; + unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3; + if (ElemBits == 32) { + SrcIdx = LaneBase + Sel; + } else { + constexpr unsigned HalfSize = 4; + bool InHigh = LaneIdx >= HalfSize; + if (!IsShufHW && !InHigh) { + SrcIdx = LaneBase + Sel; + } else if (IsShufHW && InHigh) { + unsigned Rel = LaneIdx - HalfSize; + Sel = (Ctl >> (2 * Rel)) & 0x3; + SrcIdx = LaneBase + HalfSize + Sel; + } + } + + INT_TYPE_SWITCH_NO_BOOL(ElemT, { Dst.elem(Idx) = Src.elem(SrcIdx); }); + } + Dst.initializeAllElements(); + return true; +} + static bool interp__builtin_elementwise_triop( InterpState &S, CodePtr OpPC, const CallExpr *Call, llvm::function_ref @@ -3661,6 +3705,21 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, case X86::BI__builtin_ia32_selectpd_512: return interp__builtin_select(S, OpPC, Call); + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: + return interp__builtin_ia32_pshuf(S, OpPC, Call, false); + + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: + return interp__builtin_ia32_pshuf(S, OpPC, Call, true); + + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: + return interp__builtin_ia32_pshuf(S, OpPC, Call, false); + case X86::BI__builtin_ia32_kandqi: case X86::BI__builtin_ia32_kandhi: case X86::BI__builtin_ia32_kandsi: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 81a5c2de151a0..7bf28d988f405 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11615,6 +11615,60 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result, return true; } +static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call, + bool IsShufHW, APValue &Out) { + APValue Vec; + APSInt Imm; + if (!EvaluateAsRValue(Info, Call->getArg(0), Vec)) + return false; + if (!EvaluateInteger(Call->getArg(1), Imm, Info)) + return false; + + const auto *VT = Call->getType()->getAs(); + if (!VT) + return false; + + QualType ElemT = VT->getElementType(); + unsigned ElemBits = Info.Ctx.getTypeSize(ElemT); + unsigned NumElts = VT->getNumElements(); + + unsigned LaneBits = 128u; + unsigned LaneElts = LaneBits / ElemBits; + if (!LaneElts || (NumElts % LaneElts) != 0) + return false; + + uint8_t Ctl = static_cast(Imm.getZExtValue()); + + SmallVector ResultElements; + ResultElements.reserve(NumElts); + + for (unsigned Idx = 0; Idx != NumElts; Idx++) { + unsigned LaneBase = (Idx / LaneElts) * LaneElts; + unsigned LaneIdx = Idx % LaneElts; + unsigned SrcIdx = Idx; + unsigned Sel = (Ctl >> (2 * LaneIdx)) & 0x3; + + if (ElemBits == 32) { + SrcIdx = LaneBase + Sel; + } else { + constexpr unsigned HalfSize = 4; + bool InHigh = LaneIdx >= HalfSize; + if (!IsShufHW && !InHigh) { + SrcIdx = LaneBase + Sel; + } else if (IsShufHW && InHigh) { + unsigned Rel = LaneIdx - HalfSize; + Sel = (Ctl >> (2 * Rel)) & 0x3; + SrcIdx = LaneBase + HalfSize + Sel; + } + } + + ResultElements.push_back(Vec.getVectorElt(SrcIdx)); + } + + Out = APValue(ResultElements.data(), ResultElements.size()); + return true; +} + bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { if (!IsConstantEvaluatedBuiltinCall(E)) return ExprEvaluatorBaseTy::VisitCallExpr(E); @@ -11868,7 +11922,6 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } - case clang::X86::BI__builtin_ia32_vprotbi: case clang::X86::BI__builtin_ia32_vprotdi: case clang::X86::BI__builtin_ia32_vprotqi: @@ -12087,6 +12140,34 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + + case X86::BI__builtin_ia32_pshuflw: + case X86::BI__builtin_ia32_pshuflw256: + case X86::BI__builtin_ia32_pshuflw512: { + APValue R; + if (!evalPshufBuiltin(Info, E, false, R)) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_pshufhw: + case X86::BI__builtin_ia32_pshufhw256: + case X86::BI__builtin_ia32_pshufhw512: { + APValue R; + if (!evalPshufBuiltin(Info, E, true, R)) + return false; + return Success(R, E); + } + + case X86::BI__builtin_ia32_pshufd: + case X86::BI__builtin_ia32_pshufd256: + case X86::BI__builtin_ia32_pshufd512: { + APValue R; + if (!evalPshufBuiltin(Info, E, false, R)) + return false; + return Success(R, E); + } + case Builtin::BI__builtin_elementwise_clzg: case Builtin::BI__builtin_elementwise_ctzg: { APValue SourceLHS; diff --git a/clang/test/CodeGen/X86/avx2-builtins.c b/clang/test/CodeGen/X86/avx2-builtins.c index eff2797e87c75..4299b18243f21 100644 --- a/clang/test/CodeGen/X86/avx2-builtins.c +++ b/clang/test/CodeGen/X86/avx2-builtins.c @@ -1109,19 +1109,19 @@ __m256i test_mm256_shuffle_epi32(__m256i a) { // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> return _mm256_shuffle_epi32(a, 15); } - +TEST_CONSTEXPR(match_v8si(_mm256_shuffle_epi32((((__m256i)(__v8si){0,1,2,3,4,5,6,7})), 15), 3,3,0,0, 7,7,4,4)); __m256i test_mm256_shufflehi_epi16(__m256i a) { // CHECK-LABEL: test_mm256_shufflehi_epi16 // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> return _mm256_shufflehi_epi16(a, 107); } - +TEST_CONSTEXPR(match_v16hi(_mm256_shufflehi_epi16((((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15})), 107), 0,1,2,3, 7,6,6,5, 8,9,10,11, 15,14,14,13)); __m256i test_mm256_shufflelo_epi16(__m256i a) { // CHECK-LABEL: test_mm256_shufflelo_epi16 // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> return _mm256_shufflelo_epi16(a, 83); } - +TEST_CONSTEXPR(match_v16hi(_mm256_shufflelo_epi16(((__m256i)(__v16hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15}), 83), 3,0,1,1, 4,5,6,7, 11,8,9,9, 12,13,14,15) ); __m256i test_mm256_sign_epi8(__m256i a, __m256i b) { // CHECK-LABEL: test_mm256_sign_epi8 // CHECK: call <32 x i8> @llvm.x86.avx2.psign.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}}) diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 3f42ac0268978..bd19363c8d948 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -1876,13 +1876,15 @@ __m512i test_mm512_shufflehi_epi16(__m512i __A) { // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> return _mm512_shufflehi_epi16(__A, 5); } - +TEST_CONSTEXPR(match_v32hi(_mm512_shufflehi_epi16((((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 0,1,2,3, 5,5,4,4, 8,9,10,11, 13,13,12,12, 16,17,18,19, 21,21,20,20, 24,25,26,27, 29,29,28,28)); __m512i test_mm512_mask_shufflehi_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_shufflehi_epi16 // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shufflehi_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflehi_epi16((((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131})), 0xFFFF0000u, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115, 16,17,18,19,21,21,20,20, 24,25,26,27,29,29,28,28)); +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflehi_epi16(((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}), 0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 0,1,2,3,5,5,4,4, 8,9,10,11,13,13,12,12, 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131)); __m512i test_mm512_maskz_shufflehi_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_shufflehi_epi16 @@ -1890,12 +1892,15 @@ __m512i test_mm512_maskz_shufflehi_epi16(__mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shufflehi_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflehi_epi16(0xAAAAAAAAu, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 0,1,0,3,0,5,0,4, 0,9,0,11,0,13,0,12, 0,17,0,19,0,21,0,20, 0,25,0,27,0,29,0,28)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflehi_epi16(0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 0,1,2,3,5,5,4,4, 8,9,10,11,13,13,12,12, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); __m512i test_mm512_shufflelo_epi16(__m512i __A) { // CHECK-LABEL: test_mm512_shufflelo_epi16 // CHECK: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> return _mm512_shufflelo_epi16(__A, 5); } +TEST_CONSTEXPR( match_v32hi(_mm512_shufflelo_epi16(((__m512i)(__v32hi){ 0,1,2,3, 4,5,6,7, 8,9,10,11, 12,13,14,15, 16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31}), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31)); __m512i test_mm512_mask_shufflelo_epi16(__m512i __W, __mmask32 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_shufflelo_epi16 @@ -1903,6 +1908,8 @@ __m512i test_mm512_mask_shufflelo_epi16(__m512i __W, __mmask32 __U, __m512i __A) // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shufflelo_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflelo_epi16((((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 0xFFFFFFFF, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31)); +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shufflelo_epi16(((__m512i)(__v32hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131}), 0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 1,1,0,0,4,5,6,7, 9,9,8,8,12,13,14,15, 116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131)); __m512i test_mm512_maskz_shufflelo_epi16(__mmask32 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_shufflelo_epi16 @@ -1910,6 +1917,8 @@ __m512i test_mm512_maskz_shufflelo_epi16(__mmask32 __U, __m512i __A) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shufflelo_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflelo_epi16(0xFFFFFFFF, (((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31})), 5), 1,1,0,0, 4,5,6,7, 9,9,8,8, 12,13,14,15, 17,17,16,16, 20,21,22,23, 25,25,24,24, 28,29,30,31)); +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shufflelo_epi16(0x0000FFFFu, ((__m512i)(__v32hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}), 5), 1,1,0,0,4,5,6,7, 9,9,8,8,12,13,14,15, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); __m512i test_mm512_sllv_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_sllv_epi16 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 84eaad8d99e61..47cb485a84210 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9073,20 +9073,25 @@ __m512i test_mm512_shuffle_epi32(__m512i __A) { // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> return _mm512_shuffle_epi32(__A, 1); } - +TEST_CONSTEXPR(match_v16si(_mm512_shuffle_epi32((((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15})), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12)); __m512i test_mm512_mask_shuffle_epi32(__m512i __W, __mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_mask_shuffle_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shuffle_epi32(__W, __U, __A, 1); } - +TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0xFFFFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12)); +TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0x0000u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207)); +TEST_CONSTEXPR(match_v16si(_mm512_mask_shuffle_epi32(((__m512i)(__v16si){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}), 0x00FFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 200,201,202,203,204,205,206,207)); __m512i test_mm512_maskz_shuffle_epi32(__mmask16 __U, __m512i __A) { // CHECK-LABEL: test_mm512_maskz_shuffle_epi32 // CHECK: shufflevector <16 x i32> %{{.*}}, <16 x i32> poison, <16 x i32> // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shuffle_epi32(__U, __A, 1); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0xFFFFu, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,4,4,4, 9,8,8,8, 13,12,12,12)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0x5555u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 5,0,4,0, 9,0,8,0, 13,0,12,0)); +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shuffle_epi32(0x8001u, ((__m512i)(__v16si){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}), 1), 1,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,12)); __m512d test_mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A) { // CHECK-LABEL: test_mm512_mask_expand_pd diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index 5282c7ab06dea..88006232c5c99 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -10025,6 +10025,11 @@ __m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return _mm_mask_shuffle_epi32(__W, __U, __A, 1); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x0Fu, ((__m128i)(__v4si){0,1,2,3}), 1), 1,0,0,0)); +TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x0Au, ((__m128i)(__v4si){0,1,2,3}), 1), 100,0,102,0)); +TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x05u, ((__m128i)(__v4si){0,1,2,3}), 1), 1,101,0,103)); +TEST_CONSTEXPR(match_v4si(_mm_mask_shuffle_epi32(((__m128i)(__v4si){100,101,102,103}), 0x00u, ((__m128i)(__v4si){0,1,2,3}), 1), 100,101,102,103)); + __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_shuffle_epi32 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> @@ -10032,6 +10037,10 @@ __m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) { return _mm_maskz_shuffle_epi32(__U, __A, 2); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x01u, ((__m128i)(__v4si){0,1,2,3}), 2), 2,0,0,0)); +TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x0Au, ((__m128i)(__v4si){0,1,2,3}), 2), 0,0,0,0)); +TEST_CONSTEXPR(match_v4si(_mm_maskz_shuffle_epi32(0x0Fu, ((__m128i)(__v4si){0,1,2,3}), 2), 2,0,0,0)); + __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_shuffle_epi32 // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> @@ -10039,6 +10048,10 @@ __m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return _mm256_mask_shuffle_epi32(__W, __U, __A, 2); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0xF0u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 100,101,102,103, 6,4,4,4)); +TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0x33u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,102,103, 6,4,106,107)); +TEST_CONSTEXPR(match_v8si(_mm256_mask_shuffle_epi32(((__m256i)(__v8si){100,101,102,103,104,105,106,107}), 0x00u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 100,101,102,103,104,105,106,107)); + __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_shuffle_epi32 // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> @@ -10046,6 +10059,10 @@ __m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) { return _mm256_maskz_shuffle_epi32(__U, __A, 2); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0x33u, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,0,0)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xAAu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 0,0,0,0, 0,4,0,4)); +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shuffle_epi32(0xFFu, ((__m256i)(__v8si){0,1,2,3,4,5,6,7}), 2), 2,0,0,0, 6,4,4,4)); + __m128d test_mm_mask_mov_pd(__m128d __W, __mmask8 __U, __m128d __A) { // CHECK-LABEL: test_mm_mask_mov_pd // CHECK: select <2 x i1> %{{.*}}, <2 x double> %{{.*}}, <2 x double> %{{.*}} diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c b/clang/test/CodeGen/X86/avx512vlbw-builtins.c index 6c9c80efcef9d..1fe1ec08ede88 100644 --- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c @@ -3393,6 +3393,13 @@ __m128i test_mm_mask_shufflehi_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return _mm_mask_shufflehi_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,5,5,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,104,105,106,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xFFu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,5,5,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,104,105,106,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,101,2,103,5,105,4,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflehi_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,1,102,3,104,5,106,4)); + __m128i test_mm_maskz_shufflehi_epi16(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_shufflehi_epi16 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> @@ -3400,6 +3407,13 @@ __m128i test_mm_maskz_shufflehi_epi16(__mmask8 __U, __m128i __A) { return _mm_maskz_shufflehi_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,5,5,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xFFu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,5,5,4,4)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,2,3,0,0,0,0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,2,0,5,0,4,0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflehi_epi16(0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,0,3,0,5,0,4)); + __m128i test_mm_mask_shufflelo_epi16(__m128i __W, __mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_mask_shufflelo_epi16 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> @@ -3407,6 +3421,13 @@ __m128i test_mm_mask_shufflelo_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return _mm_mask_shufflelo_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),0xFF,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,4,5,6,7)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x00u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,104,105,106,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,104,105,106,107)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,101,102,103,4,5,6,7)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),100,1,102,0,104,5,106,7)); +TEST_CONSTEXPR(match_v8hi(_mm_mask_shufflelo_epi16(((__m128i)(__v8hi){100,101,102,103,104,105,106,107}),0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,101,0,103,4,105,6,107)); + __m128i test_mm_maskz_shufflelo_epi16(__mmask8 __U, __m128i __A) { // CHECK-LABEL: test_mm_maskz_shufflelo_epi16 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> @@ -3414,6 +3435,12 @@ __m128i test_mm_maskz_shufflelo_epi16(__mmask8 __U, __m128i __A) { return _mm_maskz_shufflelo_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xFF,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,4,5,6,7)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0x0Fu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,1,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xF0u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,0,0,0,4,5,6,7)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0xAAu,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),0,1,0,0,0,5,0,7)); +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shufflelo_epi16(0x55u,((__m128i)(__v8hi){0,1,2,3,4,5,6,7}),5),1,0,0,0,4,0,6,0)); + __m256i test_mm256_mask_shufflehi_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_shufflehi_epi16 // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> @@ -3421,6 +3448,12 @@ __m256i test_mm256_mask_shufflehi_epi16(__m256i __W, __mmask16 __U, __m256i __A) return _mm256_mask_shufflehi_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0xFF00u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),100,101,102,103,104,105,106,107,8,9,10,11,13,13,12,12)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x0000u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0xFFFFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,108,109,110,111,112,113,114,115)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflehi_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115}),0x5555u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,101,2,103,5,105,4,107,8,109,10,111,13,113,12,115)); + __m256i test_mm256_maskz_shufflehi_epi16(__mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_shufflehi_epi16 // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> @@ -3428,6 +3461,13 @@ __m256i test_mm256_maskz_shufflehi_epi16(__mmask16 __U, __m256i __A) { return _mm256_maskz_shufflehi_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x0000u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xFFFFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,8,9,10,11,13,13,12,12)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,2,3,5,5,4,4,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xFF00u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,0,0,0,0,8,9,10,11,13,13,12,12)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0x5555u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,2,0,5,0,4,0,8,0,10,0,13,0,12,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflehi_epi16(0xAAAAu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,1,0,3,0,5,0,4,0,9,0,11,0,13,0,12)); + __m256i test_mm256_mask_shufflelo_epi16(__m256i __W, __mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm256_mask_shufflelo_epi16 // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> @@ -3435,6 +3475,11 @@ __m256i test_mm256_mask_shufflelo_epi16(__m256i __W, __mmask16 __U, __m256i __A) return _mm256_mask_shufflelo_epi16(__W, __U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0xFFFF,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0x000Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,104,105,106,107,200,201,202,203,204,205,206,207)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,200,201,202,203,204,205,206,207)); +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shufflelo_epi16(((__m256i)(__v16hi){100,101,102,103,104,105,106,107,200,201,202,203,204,205,206,207}),0xF00Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,104,105,106,107,200,201,202,203,12,13,14,15)); + __m256i test_mm256_maskz_shufflelo_epi16(__mmask16 __U, __m256i __A) { // CHECK-LABEL: test_mm256_maskz_shufflelo_epi16 // CHECK: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> @@ -3442,6 +3487,11 @@ __m256i test_mm256_maskz_shufflelo_epi16(__mmask16 __U, __m256i __A) { return _mm256_maskz_shufflelo_epi16(__U, __A, 5); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0xFFFF,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,9,9,8,8,12,13,14,15)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0x000Fu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0x00FFu,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),1,1,0,0,4,5,6,7,0,0,0,0,0,0,0,0)); +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shufflelo_epi16(0xF0F0u,((__m256i)(__v16hi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}),5),0,0,0,0,4,5,6,7,0,0,0,0,12,13,14,15)); + void test_mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { // CHECK-LABEL: test_mm_mask_cvtepi16_storeu_epi8 diff --git a/clang/test/CodeGen/X86/mmx-builtins.c b/clang/test/CodeGen/X86/mmx-builtins.c index f9ee32e440795..a4494b69219da 100644 --- a/clang/test/CodeGen/X86/mmx-builtins.c +++ b/clang/test/CodeGen/X86/mmx-builtins.c @@ -586,7 +586,7 @@ __m64 test_mm_shuffle_pi16(__m64 a) { // CHECK: shufflevector <4 x i16> {{%.*}}, <4 x i16> {{%.*}}, <4 x i32> return _mm_shuffle_pi16(a, 3); } - +TEST_CONSTEXPR(match_v4hi(_mm_shuffle_pi16(((__m64)(__v4hi){0,1,2,3}), 3), 3,0,0,0)); __m64 test_mm_sign_pi8(__m64 a, __m64 b) { // CHECK-LABEL: test_mm_sign_pi8 // CHECK: call <16 x i8> @llvm.x86.ssse3.psign.b.128( diff --git a/clang/test/CodeGen/X86/sse2-builtins.c b/clang/test/CodeGen/X86/sse2-builtins.c index 65bfec39c8f5a..8428fd6540ac9 100644 --- a/clang/test/CodeGen/X86/sse2-builtins.c +++ b/clang/test/CodeGen/X86/sse2-builtins.c @@ -1301,7 +1301,7 @@ __m128i test_mm_shuffle_epi32(__m128i A) { // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> zeroinitializer return _mm_shuffle_epi32(A, 0); } - +TEST_CONSTEXPR(match_v4si(_mm_shuffle_epi32(((__m128i)(__v4si){0,1,2,3}), 0), 0,0,0,0)); __m128d test_mm_shuffle_pd(__m128d A, __m128d B) { // CHECK-LABEL: test_mm_shuffle_pd // CHECK: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> @@ -1313,13 +1313,13 @@ __m128i test_mm_shufflehi_epi16(__m128i A) { // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> return _mm_shufflehi_epi16(A, 0); } - +TEST_CONSTEXPR(match_v8hi(_mm_shufflehi_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}), 0), 0,1,2,3, 4,4,4,4)); __m128i test_mm_shufflelo_epi16(__m128i A) { // CHECK-LABEL: test_mm_shufflelo_epi16 // CHECK: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> return _mm_shufflelo_epi16(A, 0); } - +TEST_CONSTEXPR(match_v8hi(_mm_shufflelo_epi16(((__m128i)(__v8hi){0,1,2,3,4,5,6,7}), 0), 0,0,0,0, 4,5,6,7)); __m128i test_mm_sll_epi16(__m128i A, __m128i B) { // CHECK-LABEL: test_mm_sll_epi16 // CHECK: call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %{{.*}}, <8 x i16> %{{.*}})