Skip to content

Commit 3ce1656

Browse files
[Headers][X86] Enable constexpr handling for MMX/SSE/AVX/AVX512 avg intrinsics (#157464)
This PR updates the avg builtins to support constant expression handling, by extending the VectorExprEvaluator::VisitCallExpr that handles elementwise integer binop builtins. Closes #155390 --------- Co-authored-by: Simon Pilgrim <llvm-dev@redking.me.uk>
1 parent 20c08f3 commit 3ce1656

File tree

13 files changed

+129
-103
lines changed

13 files changed

+129
-103
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,8 +93,6 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
9393
}
9494

9595
let Features = "sse2" in {
96-
def pavgb128 : X86Builtin<"_Vector<16, char>(_Vector<16, char>, _Vector<16, char>)">;
97-
def pavgw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
9896
def packsswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
9997
def packssdw128 : X86Builtin<"_Vector<8, short>(_Vector<4, int>, _Vector<4, int>)">;
10098
def packuswb128 : X86Builtin<"_Vector<16, char>(_Vector<8, short>, _Vector<8, short>)">;
@@ -106,6 +104,8 @@ let Attributes = [Const, NoThrow, RequiredVectorWidth<128>] in {
106104
}
107105

108106
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
107+
def pavgb128 : X86Builtin<"_Vector<16, unsigned char>(_Vector<16, unsigned char>, _Vector<16, unsigned char>)">;
108+
def pavgw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
109109
def pmulhw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
110110
def pmulhuw128 : X86Builtin<"_Vector<8, unsigned short>(_Vector<8, unsigned short>, _Vector<8, unsigned short>)">;
111111
}
@@ -575,8 +575,6 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
575575
def packuswb256 : X86Builtin<"_Vector<32, char>(_Vector<16, short>, _Vector<16, short>)">;
576576
def packusdw256 : X86Builtin<"_Vector<16, short>(_Vector<8, int>, _Vector<8, int>)">;
577577
def palignr256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Constant int)">;
578-
def pavgb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
579-
def pavgw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
580578
def pblendw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
581579
def phaddw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
582580
def phaddd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -618,6 +616,9 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
618616
}
619617

620618
let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
619+
def pavgb256 : X86Builtin<"_Vector<32, unsigned char>(_Vector<32, unsigned char>, _Vector<32, unsigned char>)">;
620+
def pavgw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
621+
621622
def pblendvb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>, _Vector<32, char>)">;
622623

623624
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
@@ -1307,8 +1308,6 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
13071308
def packsswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
13081309
def packusdw512 : X86Builtin<"_Vector<32, short>(_Vector<16, int>, _Vector<16, int>)">;
13091310
def packuswb512 : X86Builtin<"_Vector<64, char>(_Vector<32, short>, _Vector<32, short>)">;
1310-
def pavgb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
1311-
def pavgw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
13121311
def pshufb512 : X86Builtin<"_Vector<64, char>(_Vector<64, char>, _Vector<64, char>)">;
13131312
}
13141313

@@ -1350,6 +1349,8 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
13501349
}
13511350

13521351
let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
1352+
def pavgb512 : X86Builtin<"_Vector<64, unsigned char>(_Vector<64, unsigned char>, _Vector<64, unsigned char>)">;
1353+
def pavgw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
13531354
def pmulhuw512 : X86Builtin<"_Vector<32, unsigned short>(_Vector<32, unsigned short>, _Vector<32, unsigned short>)">;
13541355
def pmulhw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
13551356
}

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3320,6 +3320,15 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
33203320
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
33213321
});
33223322

3323+
case clang::X86::BI__builtin_ia32_pavgb128:
3324+
case clang::X86::BI__builtin_ia32_pavgw128:
3325+
case clang::X86::BI__builtin_ia32_pavgb256:
3326+
case clang::X86::BI__builtin_ia32_pavgw256:
3327+
case clang::X86::BI__builtin_ia32_pavgb512:
3328+
case clang::X86::BI__builtin_ia32_pavgw512:
3329+
return interp__builtin_elementwise_int_binop(S, OpPC, Call,
3330+
llvm::APIntOps::avgCeilU);
3331+
33233332
case clang::X86::BI__builtin_ia32_pmulhuw128:
33243333
case clang::X86::BI__builtin_ia32_pmulhuw256:
33253334
case clang::X86::BI__builtin_ia32_pmulhuw512:

clang/lib/AST/ExprConstant.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11694,6 +11694,14 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1169411694
return LHS.isSigned() ? LHS.ssub_sat(RHS) : LHS.usub_sat(RHS);
1169511695
});
1169611696

11697+
case clang::X86::BI__builtin_ia32_pavgb128:
11698+
case clang::X86::BI__builtin_ia32_pavgw128:
11699+
case clang::X86::BI__builtin_ia32_pavgb256:
11700+
case clang::X86::BI__builtin_ia32_pavgw256:
11701+
case clang::X86::BI__builtin_ia32_pavgb512:
11702+
case clang::X86::BI__builtin_ia32_pavgw512:
11703+
return EvaluateBinOpExpr(llvm::APIntOps::avgCeilU);
11704+
1169711705
case clang::X86::BI__builtin_ia32_pmulhuw128:
1169811706
case clang::X86::BI__builtin_ia32_pmulhuw256:
1169911707
case clang::X86::BI__builtin_ia32_pmulhuw512:

clang/lib/Headers/avx2intrin.h

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -496,10 +496,9 @@ _mm256_andnot_si256(__m256i __a, __m256i __b)
496496
/// \param __b
497497
/// A 256-bit integer vector.
498498
/// \returns A 256-bit integer vector containing the result.
499-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
500-
_mm256_avg_epu8(__m256i __a, __m256i __b)
501-
{
502-
return (__m256i)__builtin_ia32_pavgb256((__v32qi)__a, (__v32qi)__b);
499+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
500+
_mm256_avg_epu8(__m256i __a, __m256i __b) {
501+
return (__m256i)__builtin_ia32_pavgb256((__v32qu)__a, (__v32qu)__b);
503502
}
504503

505504
/// Computes the averages of the corresponding unsigned 16-bit integers in
@@ -522,10 +521,9 @@ _mm256_avg_epu8(__m256i __a, __m256i __b)
522521
/// \param __b
523522
/// A 256-bit vector of [16 x i16].
524523
/// \returns A 256-bit vector of [16 x i16] containing the result.
525-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
526-
_mm256_avg_epu16(__m256i __a, __m256i __b)
527-
{
528-
return (__m256i)__builtin_ia32_pavgw256((__v16hi)__a, (__v16hi)__b);
524+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
525+
_mm256_avg_epu16(__m256i __a, __m256i __b) {
526+
return (__m256i)__builtin_ia32_pavgw256((__v16hu)__a, (__v16hu)__b);
529527
}
530528

531529
/// Merges 8-bit integer values from either of the two 256-bit vectors

clang/lib/Headers/avx512bwintrin.h

Lines changed: 23 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -690,50 +690,40 @@ _mm512_maskz_adds_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
690690
(__v32hi)_mm512_setzero_si512());
691691
}
692692

693-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
694-
_mm512_avg_epu8 (__m512i __A, __m512i __B)
695-
{
696-
return (__m512i)__builtin_ia32_pavgb512((__v64qi)__A, (__v64qi)__B);
693+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
694+
_mm512_avg_epu8(__m512i __A, __m512i __B) {
695+
return (__m512i)__builtin_ia32_pavgb512((__v64qu)__A, (__v64qu)__B);
697696
}
698697

699-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
700-
_mm512_mask_avg_epu8 (__m512i __W, __mmask64 __U, __m512i __A,
701-
__m512i __B)
702-
{
703-
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
704-
(__v64qi)_mm512_avg_epu8(__A, __B),
705-
(__v64qi)__W);
698+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
699+
_mm512_mask_avg_epu8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) {
700+
return (__m512i)__builtin_ia32_selectb_512(
701+
(__mmask64)__U, (__v64qi)_mm512_avg_epu8(__A, __B), (__v64qi)__W);
706702
}
707703

708-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
709-
_mm512_maskz_avg_epu8 (__mmask64 __U, __m512i __A, __m512i __B)
710-
{
704+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
705+
_mm512_maskz_avg_epu8(__mmask64 __U, __m512i __A, __m512i __B) {
711706
return (__m512i)__builtin_ia32_selectb_512((__mmask64)__U,
712-
(__v64qi)_mm512_avg_epu8(__A, __B),
713-
(__v64qi)_mm512_setzero_si512());
707+
(__v64qi)_mm512_avg_epu8(__A, __B),
708+
(__v64qi)_mm512_setzero_si512());
714709
}
715710

716-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
717-
_mm512_avg_epu16 (__m512i __A, __m512i __B)
718-
{
719-
return (__m512i)__builtin_ia32_pavgw512((__v32hi)__A, (__v32hi)__B);
711+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
712+
_mm512_avg_epu16(__m512i __A, __m512i __B) {
713+
return (__m512i)__builtin_ia32_pavgw512((__v32hu)__A, (__v32hu)__B);
720714
}
721715

722-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
723-
_mm512_mask_avg_epu16 (__m512i __W, __mmask32 __U, __m512i __A,
724-
__m512i __B)
725-
{
726-
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
727-
(__v32hi)_mm512_avg_epu16(__A, __B),
728-
(__v32hi)__W);
716+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
717+
_mm512_mask_avg_epu16(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) {
718+
return (__m512i)__builtin_ia32_selectw_512(
719+
(__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B), (__v32hi)__W);
729720
}
730721

731-
static __inline__ __m512i __DEFAULT_FN_ATTRS512
732-
_mm512_maskz_avg_epu16 (__mmask32 __U, __m512i __A, __m512i __B)
733-
{
734-
return (__m512i)__builtin_ia32_selectw_512((__mmask32)__U,
735-
(__v32hi)_mm512_avg_epu16(__A, __B),
736-
(__v32hi) _mm512_setzero_si512());
722+
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR
723+
_mm512_maskz_avg_epu16(__mmask32 __U, __m512i __A, __m512i __B) {
724+
return (__m512i)__builtin_ia32_selectw_512(
725+
(__mmask32)__U, (__v32hi)_mm512_avg_epu16(__A, __B),
726+
(__v32hi)_mm512_setzero_si512());
737727
}
738728

739729
static __inline__ __m512i __DEFAULT_FN_ATTRS512_CONSTEXPR

clang/lib/Headers/avx512vlbwintrin.h

Lines changed: 27 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -795,68 +795,56 @@ _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
795795
(__v16hi)_mm256_setzero_si256());
796796
}
797797

798-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
799-
_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
800-
{
801-
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
802-
(__v16qi)_mm_avg_epu8(__A, __B),
803-
(__v16qi)__W);
798+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
799+
_mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
800+
return (__m128i)__builtin_ia32_selectb_128(
801+
(__mmask16)__U, (__v16qi)_mm_avg_epu8(__A, __B), (__v16qi)__W);
804802
}
805803

806-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
807-
_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
808-
{
804+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
805+
_mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) {
809806
return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
810807
(__v16qi)_mm_avg_epu8(__A, __B),
811808
(__v16qi)_mm_setzero_si128());
812809
}
813810

814-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
815-
_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
816-
{
817-
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
818-
(__v32qi)_mm256_avg_epu8(__A, __B),
819-
(__v32qi)__W);
811+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
812+
_mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
813+
return (__m256i)__builtin_ia32_selectb_256(
814+
(__mmask32)__U, (__v32qi)_mm256_avg_epu8(__A, __B), (__v32qi)__W);
820815
}
821816

822-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
823-
_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
824-
{
817+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
818+
_mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) {
825819
return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
826820
(__v32qi)_mm256_avg_epu8(__A, __B),
827821
(__v32qi)_mm256_setzero_si256());
828822
}
829823

830-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
831-
_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
832-
{
833-
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
834-
(__v8hi)_mm_avg_epu16(__A, __B),
835-
(__v8hi)__W);
824+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
825+
_mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
826+
return (__m128i)__builtin_ia32_selectw_128(
827+
(__mmask8)__U, (__v8hi)_mm_avg_epu16(__A, __B), (__v8hi)__W);
836828
}
837829

838-
static __inline__ __m128i __DEFAULT_FN_ATTRS128
839-
_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
840-
{
830+
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR
831+
_mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
841832
return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
842833
(__v8hi)_mm_avg_epu16(__A, __B),
843834
(__v8hi)_mm_setzero_si128());
844835
}
845836

846-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
847-
_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
848-
{
849-
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
850-
(__v16hi)_mm256_avg_epu16(__A, __B),
851-
(__v16hi)__W);
837+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
838+
_mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
839+
return (__m256i)__builtin_ia32_selectw_256(
840+
(__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B), (__v16hi)__W);
852841
}
853842

854-
static __inline__ __m256i __DEFAULT_FN_ATTRS256
855-
_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
856-
{
857-
return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
858-
(__v16hi)_mm256_avg_epu16(__A, __B),
859-
(__v16hi)_mm256_setzero_si256());
843+
static __inline__ __m256i __DEFAULT_FN_ATTRS256_CONSTEXPR
844+
_mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
845+
return (__m256i)__builtin_ia32_selectw_256(
846+
(__mmask16)__U, (__v16hi)_mm256_avg_epu16(__A, __B),
847+
(__v16hi)_mm256_setzero_si256());
860848
}
861849

862850
static __inline__ __m128i __DEFAULT_FN_ATTRS128_CONSTEXPR

clang/lib/Headers/emmintrin.h

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ typedef double __v2df __attribute__((__vector_size__(16)));
2727

2828
/* Unsigned types */
2929
typedef unsigned long long __v2du __attribute__((__vector_size__(16)));
30-
typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
3130

3231
/* We need an explicitly signed variant for char. Note that this shouldn't
3332
* appear in the interface though. */
@@ -2247,9 +2246,9 @@ _mm_adds_epu16(__m128i __a, __m128i __b) {
22472246
/// A 128-bit unsigned [16 x i8] vector.
22482247
/// \returns A 128-bit unsigned [16 x i8] vector containing the rounded
22492248
/// averages of both parameters.
2250-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
2251-
__m128i __b) {
2252-
return (__m128i)__builtin_ia32_pavgb128((__v16qi)__a, (__v16qi)__b);
2249+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2250+
_mm_avg_epu8(__m128i __a, __m128i __b) {
2251+
return (__m128i)__builtin_ia32_pavgb128((__v16qu)__a, (__v16qu)__b);
22532252
}
22542253

22552254
/// Computes the rounded averages of corresponding elements of two
@@ -2266,9 +2265,9 @@ static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu8(__m128i __a,
22662265
/// A 128-bit unsigned [8 x i16] vector.
22672266
/// \returns A 128-bit unsigned [8 x i16] vector containing the rounded
22682267
/// averages of both parameters.
2269-
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_avg_epu16(__m128i __a,
2270-
__m128i __b) {
2271-
return (__m128i)__builtin_ia32_pavgw128((__v8hi)__a, (__v8hi)__b);
2268+
static __inline__ __m128i __DEFAULT_FN_ATTRS_CONSTEXPR
2269+
_mm_avg_epu16(__m128i __a, __m128i __b) {
2270+
return (__m128i)__builtin_ia32_pavgw128((__v8hu)__a, (__v8hu)__b);
22722271
}
22732272

22742273
/// Multiplies the corresponding elements of two 128-bit signed [8 x i16]

clang/lib/Headers/xmmintrin.h

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ typedef float __m128_u __attribute__((__vector_size__(16), __aligned__(1)));
2424
/* Unsigned types */
2525
typedef unsigned int __v4su __attribute__((__vector_size__(16)));
2626
typedef unsigned short __v8hu __attribute__((__vector_size__(16)));
27+
typedef unsigned char __v16qu __attribute__((__vector_size__(16)));
2728

2829
/* This header should only be included in a hosted environment as it depends on
2930
* a standard library to provide allocation routines. */
@@ -2539,11 +2540,10 @@ _mm_maskmove_si64(__m64 __d, __m64 __n, char *__p)
25392540
/// \param __b
25402541
/// A 64-bit integer vector containing one of the source operands.
25412542
/// \returns A 64-bit integer vector containing the averages of both operands.
2542-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
2543-
_mm_avg_pu8(__m64 __a, __m64 __b)
2544-
{
2545-
return __trunc64(__builtin_ia32_pavgb128((__v16qi)__anyext128(__a),
2546-
(__v16qi)__anyext128(__b)));
2543+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
2544+
_mm_avg_pu8(__m64 __a, __m64 __b) {
2545+
return __trunc64(__builtin_ia32_pavgb128((__v16qu)__zext128(__a),
2546+
(__v16qu)__zext128(__b)));
25472547
}
25482548

25492549
/// Computes the rounded averages of the packed unsigned 16-bit integer
@@ -2559,11 +2559,10 @@ _mm_avg_pu8(__m64 __a, __m64 __b)
25592559
/// \param __b
25602560
/// A 64-bit integer vector containing one of the source operands.
25612561
/// \returns A 64-bit integer vector containing the averages of both operands.
2562-
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2
2563-
_mm_avg_pu16(__m64 __a, __m64 __b)
2564-
{
2565-
return __trunc64(__builtin_ia32_pavgw128((__v8hi)__anyext128(__a),
2566-
(__v8hi)__anyext128(__b)));
2562+
static __inline__ __m64 __DEFAULT_FN_ATTRS_SSE2_CONSTEXPR
2563+
_mm_avg_pu16(__m64 __a, __m64 __b) {
2564+
return __trunc64(
2565+
__builtin_ia32_pavgw128((__v8hu)__zext128(__a), (__v8hu)__zext128(__b)));
25672566
}
25682567

25692568
/// Subtracts the corresponding 8-bit unsigned integer values of the two

clang/test/CodeGen/X86/avx2-builtins.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,12 +128,14 @@ __m256i test_mm256_avg_epu8(__m256i a, __m256i b) {
128128
// CHECK: call <32 x i8> @llvm.x86.avx2.pavg.b(<32 x i8> %{{.*}}, <32 x i8> %{{.*}})
129129
return _mm256_avg_epu8(a, b);
130130
}
131+
TEST_CONSTEXPR(match_v32qu(_mm256_avg_epu8((__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}, (__m256i)(__v32qu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32));
131132

132133
__m256i test_mm256_avg_epu16(__m256i a, __m256i b) {
133134
// CHECK-LABEL: test_mm256_avg_epu16
134135
// CHECK: call <16 x i16> @llvm.x86.avx2.pavg.w(<16 x i16> %{{.*}}, <16 x i16> %{{.*}})
135136
return _mm256_avg_epu16(a, b);
136137
}
138+
TEST_CONSTEXPR(match_v16hu(_mm256_avg_epu16((__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}, (__m256i)(__v16hu){1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}), 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16));
137139

138140
// FIXME: We should also lower the __builtin_ia32_pblendw128 (and similar)
139141
// functions to this IR. In the future we could delete the corresponding

0 commit comments

Comments
 (0)