Skip to content

Commit 140d465

Browse files
authored
[X86][ByteCode] Allow PSHUFB intrinsics to be used in constexpr #156612 (#163148)
The PSHUFB instruction shuffles bytes within each 128-bit lane: for each control byte, if bit 7 is set, the output byte is zeroed; otherwise, the low 4 bits select a source byte (0–15) from the same lane. Note: _mm_shuffle_pi8 function had to change as __anyext128 had negative indices which are invalid in constant expression context. Fixes #156612
1 parent 9734aa8 commit 140d465

File tree

12 files changed

+138
-38
lines changed

12 files changed

+138
-38
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -125,14 +125,14 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
125125

126126
let Features = "ssse3" in {
127127
def pmulhrsw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
128-
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
129128
def psignb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
130129
def psignw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
131130
def psignd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
132131
}
133132

134133
let Features = "ssse3", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
135134
def pmaddubsw128 : X86Builtin<"_Vector<8, short>(_Vector<16, char>, _Vector<16, char>)">;
135+
def pshufb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
136136
}
137137
}
138138

@@ -610,7 +610,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
610610
def pmovmskb256 : X86Builtin<"int(_Vector<32, char>)">;
611611
def pmulhrsw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
612612
def psadbw256 : X86Builtin<"_Vector<4, long long int>(_Vector<32, char>, _Vector<32, char>)">;
613-
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
614613
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
615614
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
616615
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -649,6 +648,8 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
649648
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
650649
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
651650

651+
def pshufb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
652+
652653
def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
653654
def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
654655
def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
@@ -1347,14 +1348,15 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<512>
13471348

13481349
let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
13491350
def ucmpw512_mask : X86Builtin<"unsigned int(_Vector<32, short>, _Vector<32, short>, _Constant int, unsigned int)">;
1350-
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
13511351
}
13521352

13531353
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
13541354
def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
13551355
def packssdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
13561356
def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
13571357
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
1358+
1359+
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
13581360
}
13591361

13601362
let Features = "avx512cd,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2790,6 +2790,34 @@ static bool interp__builtin_blend(InterpState &S, CodePtr OpPC,
27902790
return true;
27912791
}
27922792

2793+
static bool interp__builtin_ia32_pshufb(InterpState &S, CodePtr OpPC,
2794+
const CallExpr *Call) {
2795+
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
2796+
const Pointer &Control = S.Stk.pop<Pointer>();
2797+
const Pointer &Src = S.Stk.pop<Pointer>();
2798+
const Pointer &Dst = S.Stk.peek<Pointer>();
2799+
2800+
unsigned NumElems = Dst.getNumElems();
2801+
assert(NumElems == Control.getNumElems());
2802+
assert(NumElems == Dst.getNumElems());
2803+
2804+
for (unsigned Idx = 0; Idx != NumElems; ++Idx) {
2805+
uint8_t Ctlb = static_cast<uint8_t>(Control.elem<int8_t>(Idx));
2806+
2807+
if (Ctlb & 0x80) {
2808+
Dst.elem<int8_t>(Idx) = 0;
2809+
} else {
2810+
unsigned LaneBase = (Idx / 16) * 16;
2811+
unsigned SrcOffset = Ctlb & 0x0F;
2812+
unsigned SrcIdx = LaneBase + SrcOffset;
2813+
2814+
Dst.elem<int8_t>(Idx) = Src.elem<int8_t>(SrcIdx);
2815+
}
2816+
}
2817+
Dst.initializeAllElements();
2818+
return true;
2819+
}
2820+
27932821
static bool interp__builtin_ia32_pshuf(InterpState &S, CodePtr OpPC,
27942822
const CallExpr *Call, bool IsShufHW) {
27952823
assert(Call->getNumArgs() == 2 && "masked forms handled via select*");
@@ -3943,6 +3971,11 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
39433971
case X86::BI__builtin_ia32_selectpd_512:
39443972
return interp__builtin_select(S, OpPC, Call);
39453973

3974+
case X86::BI__builtin_ia32_pshufb128:
3975+
case X86::BI__builtin_ia32_pshufb256:
3976+
case X86::BI__builtin_ia32_pshufb512:
3977+
return interp__builtin_ia32_pshufb(S, OpPC, Call);
3978+
39463979
case X86::BI__builtin_ia32_pshuflw:
39473980
case X86::BI__builtin_ia32_pshuflw256:
39483981
case X86::BI__builtin_ia32_pshuflw512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11619,6 +11619,44 @@ static bool evalPackBuiltin(const CallExpr *E, EvalInfo &Info, APValue &Result,
1161911619
return true;
1162011620
}
1162111621

11622+
static bool evalPshufbBuiltin(EvalInfo &Info, const CallExpr *Call,
11623+
APValue &Out) {
11624+
APValue SrcVec, ControlVec;
11625+
if (!EvaluateAsRValue(Info, Call->getArg(0), SrcVec))
11626+
return false;
11627+
if (!EvaluateAsRValue(Info, Call->getArg(1), ControlVec))
11628+
return false;
11629+
11630+
const auto *VT = Call->getType()->getAs<VectorType>();
11631+
if (!VT)
11632+
return false;
11633+
11634+
QualType ElemT = VT->getElementType();
11635+
unsigned NumElts = VT->getNumElements();
11636+
11637+
SmallVector<APValue, 64> ResultElements;
11638+
ResultElements.reserve(NumElts);
11639+
11640+
for (unsigned Idx = 0; Idx != NumElts; ++Idx) {
11641+
APValue CtlVal = ControlVec.getVectorElt(Idx);
11642+
APSInt CtlByte = CtlVal.getInt();
11643+
uint8_t Ctl = static_cast<uint8_t>(CtlByte.getZExtValue());
11644+
11645+
if (Ctl & 0x80) {
11646+
APValue Zero(Info.Ctx.MakeIntValue(0, ElemT));
11647+
ResultElements.push_back(Zero);
11648+
} else {
11649+
unsigned LaneBase = (Idx / 16) * 16;
11650+
unsigned SrcOffset = Ctl & 0x0F;
11651+
unsigned SrcIdx = LaneBase + SrcOffset;
11652+
11653+
ResultElements.push_back(SrcVec.getVectorElt(SrcIdx));
11654+
}
11655+
}
11656+
Out = APValue(ResultElements.data(), ResultElements.size());
11657+
return true;
11658+
}
11659+
1162211660
static bool evalPshufBuiltin(EvalInfo &Info, const CallExpr *Call,
1162311661
bool IsShufHW, APValue &Out) {
1162411662
APValue Vec;
@@ -12241,6 +12279,15 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1224112279
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
1224212280
}
1224312281

12282+
case X86::BI__builtin_ia32_pshufb128:
12283+
case X86::BI__builtin_ia32_pshufb256:
12284+
case X86::BI__builtin_ia32_pshufb512: {
12285+
APValue R;
12286+
if (!evalPshufbBuiltin(Info, E, R))
12287+
return false;
12288+
return Success(R, E);
12289+
}
12290+
1224412291
case X86::BI__builtin_ia32_pshuflw:
1224512292
case X86::BI__builtin_ia32_pshuflw256:
1224612293
case X86::BI__builtin_ia32_pshuflw512: {

clang/lib/Headers/avx2intrin.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1852,9 +1852,8 @@ _mm256_sad_epu8(__m256i __a, __m256i __b)
18521852
/// control byte specify the index (within the same 128-bit half) of \a __a
18531853
/// to copy to the result byte.
18541854
/// \returns A 256-bit integer vector containing the result.
1855-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1856-
_mm256_shuffle_epi8(__m256i __a, __m256i __b)
1857-
{
1855+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1856+
_mm256_shuffle_epi8(__m256i __a, __m256i __b) {
18581857
return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
18591858
}
18601859

clang/lib/Headers/avx512bwintrin.h

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -866,23 +866,20 @@ _mm512_mask_min_epu16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) {
866866
(__v32hi)__W);
867867
}
868868

869-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
870-
_mm512_shuffle_epi8(__m512i __A, __m512i __B)
871-
{
869+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
870+
_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
872871
return (__m512i)__builtin_ia32_pshufb512((__v64qi)__A,(__v64qi)__B);
873872
}
874873

875-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
876-
_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B)
877-
{
874+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
875+
_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
878876
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
879877
(__v64qi)_mm512_shuffle_epi8(__A, __B),
880878
(__v64qi)__W);
881879
}
882880

883-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
884-
_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B)
885-
{
881+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
882+
_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
886883
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
887884
(__v64qi)_mm512_shuffle_epi8(__A, __B),
888885
(__v64qi)_mm512_setzero_si512());

clang/lib/Headers/avx512vlbwintrin.h

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1067,33 +1067,29 @@ _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) {
10671067
(__v16hi)__W);
10681068
}
10691069

1070-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1071-
_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
1072-
{
1070+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
1071+
_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
10731072
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
10741073
(__v16qi)_mm_shuffle_epi8(__A, __B),
10751074
(__v16qi)__W);
10761075
}
10771076

1078-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
1079-
_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
1080-
{
1077+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
1078+
_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
10811079
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
10821080
(__v16qi)_mm_shuffle_epi8(__A, __B),
10831081
(__v16qi)_mm_setzero_si128());
10841082
}
10851083

1086-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1087-
_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
1088-
{
1084+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1085+
_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
10891086
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
10901087
(__v32qi)_mm256_shuffle_epi8(__A, __B),
10911088
(__v32qi)__W);
10921089
}
10931090

1094-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
1095-
_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
1096-
{
1091+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
1092+
_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
10971093
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
10981094
(__v32qi)_mm256_shuffle_epi8(__A, __B),
10991095
(__v32qi)_mm256_setzero_si256());

clang/lib/Headers/tmmintrin.h

Lines changed: 9 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -590,10 +590,9 @@ _mm_mulhrs_pi16(__m64 __a, __m64 __b)
590590
/// Bits [6:4] Reserved. \n
591591
/// Bits [3:0] select the source byte to be copied.
592592
/// \returns A 128-bit integer vector containing the copied or cleared values.
593-
static __inline__ __m128i __DEFAULT_FN_ATTRS
594-
_mm_shuffle_epi8(__m128i __a, __m128i __b)
595-
{
596-
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
593+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
594+
_mm_shuffle_epi8(__m128i __a, __m128i __b) {
595+
return (__m128i)__builtin_ia32_pshufb128((__v16qi)__a, (__v16qi)__b);
597596
}
598597

599598
/// Copies the 8-bit integers from a 64-bit integer vector to the
@@ -615,13 +614,12 @@ _mm_shuffle_epi8(__m128i __a, __m128i __b)
615614
/// destination. \n
616615
/// Bits [2:0] select the source byte to be copied.
617616
/// \returns A 64-bit integer vector containing the copied or cleared values.
618-
static __inline__ __m64 __DEFAULT_FN_ATTRS
619-
_mm_shuffle_pi8(__m64 __a, __m64 __b)
620-
{
621-
return __trunc64(__builtin_ia32_pshufb128(
622-
(__v16qi)__builtin_shufflevector(
623-
(__v2si)(__a), __extension__ (__v2si){}, 0, 1, 0, 1),
624-
(__v16qi)__anyext128(__b)));
617+
static __inline__ __m64 __DEFAULT_FN_ATTRS_CONSTEXPR
618+
_mm_shuffle_pi8(__m64 __a, __m64 __b) {
619+
return __trunc64(__builtin_ia32_pshufb128(
620+
(__v16qi)__builtin_shufflevector((__v2si)(__a), __extension__(__v2si){},
621+
0, 1, 0, 1),
622+
(__v16qi)__zext128(__b)));
625623
}
626624

627625
/// For each 8-bit integer in the first source operand, perform one of

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1130,6 +1130,8 @@ __m256i test_mm256_shuffle_epi8(__m256i a, __m256i b) {
11301130
return _mm256_shuffle_epi8(a, b);
11311131
}
11321132

1133+
TEST_CONSTEXPR(match_v32qi(_mm256_shuffle_epi8((__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qs){0,33,2,35,4,37,6,-39,8,41,10,43,12,45,14,-47,16,49,18,51,20,53,22,-55,24,57,26,59,28,61,30,-63}), 0,1,2,3,4,5,6,0,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,0,24,25,26,27,28,29,30,0));
1134+
11331135
__m256i test_mm256_shuffle_epi32(__m256i a) {
11341136
// CHECK-LABEL: test_mm256_shuffle_epi32
11351137
// CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> poison, <8 x i32> <i32 3, i32 3, i32 0, i32 0, i32 7, i32 7, i32 4, i32 4>

clang/test/CodeGen/X86/avx512bw-builtins.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1466,18 +1466,27 @@ __m512i test_mm512_shuffle_epi8(__m512i __A, __m512i __B) {
14661466
// CHECK: @llvm.x86.avx512.pshuf.b.512
14671467
return _mm512_shuffle_epi8(__A,__B);
14681468
}
1469+
1470+
TEST_CONSTEXPR(match_v64qi(_mm512_shuffle_epi8((__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qs){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,-15,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,-15,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,-79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,-95}), 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,0,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,0,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,0));
1471+
14691472
__m512i test_mm512_mask_shuffle_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
14701473
// CHECK-LABEL: test_mm512_mask_shuffle_epi8
14711474
// CHECK: @llvm.x86.avx512.pshuf.b.512
14721475
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
14731476
return _mm512_mask_shuffle_epi8(__W,__U,__A,__B);
14741477
}
1478+
1479+
TEST_CONSTEXPR(match_v64qi(_mm512_mask_shuffle_epi8((__m512i)(__v64qi){1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,5,5,5,5,6,6,6,6,6,6,6,6,7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8}, 0xFFFFFFFF00000000, (__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qi){63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,47,46,45,44,43,42,41,40,39,38,37,36,35,34,33,32,63,62,61,60,59,58,57,56,55,54,53,52,51,50,49,48));
1480+
14751481
__m512i test_mm512_maskz_shuffle_epi8(__mmask64 __U, __m512i __A, __m512i __B) {
14761482
// CHECK-LABEL: test_mm512_maskz_shuffle_epi8
14771483
// CHECK: @llvm.x86.avx512.pshuf.b.512
14781484
// CHECK: select <64 x i1> %{{.*}}, <64 x i8> %{{.*}}, <64 x i8> %{{.*}}
14791485
return _mm512_maskz_shuffle_epi8(__U,__A,__B);
14801486
}
1487+
1488+
TEST_CONSTEXPR(match_v64qi(_mm512_maskz_shuffle_epi8(0x8888888888888888,(__m512i)(__v64qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63}, (__m512i)(__v64qi){127,126,125,124,123,122,121,120,119,118,117,116,115,114,113,112,111,110,109,108,107,106,105,104,103,102,101,100,99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65,64}), 0,0,0,12,0,0,0,8,0,0,0,4,0,0,0,0,0,0,0,28,0,0,0,24,0,0,0,20,0,0,0,16,0,0,0,44,0,0,0,40,0,0,0,36,0,0,0,32,0,0,0,60,0,0,0,56,0,0,0,52,0,0,0,48));
1489+
14811490
__m512i test_mm512_subs_epi8(__m512i __A, __m512i __B) {
14821491
// CHECK-LABEL: test_mm512_subs_epi8
14831492
// CHECK: @llvm.ssub.sat.v64i8

clang/test/CodeGen/X86/avx512vlbw-builtins.c

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1688,24 +1688,37 @@ __m128i test_mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m12
16881688
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
16891689
return _mm_mask_shuffle_epi8(__W,__U,__A,__B);
16901690
}
1691+
1692+
TEST_CONSTEXPR(match_v16qi(_mm_mask_shuffle_epi8((__m128i)(__v16qi){1,1,1,1,1,1,1,1,2,2,4,4,6,6,8,8}, 0x00FF, (__m128i)(__v16qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}, (__m128i)(__v16qi){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 15,14,13,12,11,10,9,8,2,2,4,4,6,6,8,8));
1693+
16911694
__m128i test_mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
16921695
// CHECK-LABEL: test_mm_maskz_shuffle_epi8
16931696
// CHECK: @llvm.x86.ssse3.pshuf.b
16941697
// CHECK: select <16 x i1> %{{.*}}, <16 x i8> %{{.*}}, <16 x i8> %{{.*}}
16951698
return _mm_maskz_shuffle_epi8(__U,__A,__B);
16961699
}
1700+
1701+
TEST_CONSTEXPR(match_v16qi(_mm_maskz_shuffle_epi8(0xAAAA, (__m128i)(__v16qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}, (__m128i)(__v16qi){15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 0,14,0,12,0,10,0,8,0,6,0,4,0,2,0,0));
1702+
16971703
__m256i test_mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
16981704
// CHECK-LABEL: test_mm256_mask_shuffle_epi8
16991705
// CHECK: @llvm.x86.avx2.pshuf.b
17001706
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
17011707
return _mm256_mask_shuffle_epi8(__W,__U,__A,__B);
17021708
}
1709+
1710+
TEST_CONSTEXPR(match_v32qi(_mm256_mask_shuffle_epi8((__m256i)(__v32qi){1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4}, 0x80808080, (__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qi){31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 1,1,1,1,1,1,1,8,2,2,2,2,2,2,2,0,3,3,3,3,3,3,3,24,4,4,4,4,4,4,4,16));
1711+
1712+
17031713
__m256i test_mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
17041714
// CHECK-LABEL: test_mm256_maskz_shuffle_epi8
17051715
// CHECK: @llvm.x86.avx2.pshuf.b
17061716
// CHECK: select <32 x i1> %{{.*}}, <32 x i8> %{{.*}}, <32 x i8> %{{.*}}
17071717
return _mm256_maskz_shuffle_epi8(__U,__A,__B);
17081718
}
1719+
1720+
TEST_CONSTEXPR(match_v32qi(_mm256_maskz_shuffle_epi8(0x0000FFFF, (__m256i)(__v32qi){0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31}, (__m256i)(__v32qi){31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0}), 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0));
1721+
17091722
__m128i test_mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
17101723
// CHECK-LABEL: test_mm_mask_subs_epi8
17111724
// CHECK: @llvm.ssub.sat.v16i8

0 commit comments

Comments
 (0)