diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 69d18679fd6ec..bbe0aa3657c06 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -2142,7 +2142,7 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128> def vcomiss : X86Builtin<"int(_Vector<4, float>, _Vector<4, float>, _Constant int, _Constant int)">; } -let Features = "avx512bw", Attributes = [NoThrow, Const] in { +let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def kunpckdi : X86Builtin<"unsigned long long int(unsigned long long int, unsigned long long int)">; def kunpcksi : X86Builtin<"unsigned int(unsigned int, unsigned int)">; } @@ -3185,7 +3185,7 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, Constexpr] in { def ktestzdi : X86Builtin<"int(unsigned long long int, unsigned long long int)">; } -let Features = "avx512f", Attributes = [NoThrow, Const] in { +let Features = "avx512f", Attributes = [NoThrow, Const, Constexpr] in { def kunpckhi : X86Builtin<"unsigned short(unsigned short, unsigned short)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index afcbe9d4f5b81..a2f99c7c234fe 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -4619,6 +4619,18 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, S, OpPC, Call, [](const APSInt &LHS, const APSInt &RHS) { return LHS + RHS; }); + case X86::BI__builtin_ia32_kunpckhi: + case X86::BI__builtin_ia32_kunpckdi: + case X86::BI__builtin_ia32_kunpcksi: + return interp__builtin_elementwise_int_binop( + S, OpPC, Call, [](const APSInt &A, const APSInt &B) { + // Generic kunpack: extract lower half of each operand and concatenate + // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0] + unsigned BW = A.getBitWidth(); + return APSInt(A.trunc(BW / 2).concat(B.trunc(BW / 2)), + A.isUnsigned()); + }); + case X86::BI__builtin_ia32_phminposuw128: return interp__builtin_ia32_phminposuw(S, OpPC, Call); diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 888dca1e3a613..d9b3ee20e919f 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -16348,6 +16348,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E, return Success((A | B) == 0, E); } + case clang::X86::BI__builtin_ia32_kunpckhi: + case clang::X86::BI__builtin_ia32_kunpckdi: + case clang::X86::BI__builtin_ia32_kunpcksi: { + APSInt A, B; + if (!EvaluateInteger(E->getArg(0), A, Info) || + !EvaluateInteger(E->getArg(1), B, Info)) + return false; + + // Generic kunpack: extract lower half of each operand and concatenate + // Result = A[HalfWidth-1:0] concat B[HalfWidth-1:0] + unsigned BW = A.getBitWidth(); + APSInt Result(A.trunc(BW / 2).concat(B.trunc(BW / 2)), A.isUnsigned()); + return Success(Result, E); + } + case clang::X86::BI__builtin_ia32_lzcnt_u16: case clang::X86::BI__builtin_ia32_lzcnt_u32: case clang::X86::BI__builtin_ia32_lzcnt_u64: { diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h index 3cfa32eb9e727..c37b42d965167 100644 --- a/clang/lib/Headers/avx512bwintrin.h +++ b/clang/lib/Headers/avx512bwintrin.h @@ -1606,15 +1606,14 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) { (__v64qi) _mm512_setzero_si512()); } -static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A, - __mmask64 __B) { +static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackd(__mmask64 __A, __mmask64 __B) { return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A, (__mmask64) __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS -_mm512_kunpackw (__mmask32 __A, __mmask32 __B) -{ +static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackw(__mmask32 __A, __mmask32 __B) { return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A, (__mmask32) __B); } diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h index 976eba816b8bf..53b18df764370 100644 --- a/clang/lib/Headers/avx512fintrin.h +++ b/clang/lib/Headers/avx512fintrin.h @@ -8069,9 +8069,8 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) { return (unsigned char)__builtin_ia32_kortestzhi(__A, __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS -_mm512_kunpackb (__mmask16 __A, __mmask16 __B) -{ +static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR +_mm512_kunpackb(__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } diff --git a/clang/test/CodeGen/X86/avx512bw-builtins.c b/clang/test/CodeGen/X86/avx512bw-builtins.c index 45f1a600d380a..140a2c0dcbb56 100644 --- a/clang/test/CodeGen/X86/avx512bw-builtins.c +++ b/clang/test/CodeGen/X86/avx512bw-builtins.c @@ -2731,6 +2731,12 @@ __mmask64 test_mm512_kunpackd(__m512i __A, __m512i __B, __m512i __C, __m512i __D return _mm512_mask_cmpneq_epu8_mask(_mm512_kunpackd(_mm512_cmpneq_epu8_mask(__B, __A),_mm512_cmpneq_epu8_mask(__C, __D)), __E, __F); } +TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull); +TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x2345678989ABCDEFull); +TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0xFFFFFFFF00000000ull); +TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull); +TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull); + __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kunpackw // CHECK: [[LHS:%.*]] = bitcast i32 %{{.*}} to <32 x i1> @@ -2741,6 +2747,12 @@ __mmask32 test_mm512_kunpackw(__m512i __A, __m512i __B, __m512i __C, __m512i __D return _mm512_mask_cmpneq_epu16_mask(_mm512_kunpackw(_mm512_cmpneq_epu16_mask(__B, __A),_mm512_cmpneq_epu16_mask(__C, __D)), __E, __F); } +TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu); +TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu); +TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0xFFFF0000u); +TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu); +TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u); + __m512i test_mm512_loadu_epi16 (void *__P) { // CHECK-LABEL: test_mm512_loadu_epi16 diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c index 71e700af0069e..ec813e5acd7cf 100644 --- a/clang/test/CodeGen/X86/avx512f-builtins.c +++ b/clang/test/CodeGen/X86/avx512f-builtins.c @@ -9162,6 +9162,12 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D __E, __F); } +TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0x00FF); +TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34); +TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0xFF00); +TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55); +TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD); + __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) { // CHECK-LABEL: test_mm512_kxnor // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>