Skip to content

Conversation

@ahmednoursphinx
Copy link
Contributor

Resolves #166976

Add AVX-512 mask unpack intrinsics _mm512_kunpackd and _mm512_kunpackw
to avx512fintrin.h alongside the existing _mm512_kunpackb intrinsic.

These intrinsics extract and concatenate the lower halves of mask
registers, using the existing backend support for __builtin_ia32_kunpckdi
and __builtin_ia32_kunpcksi builtins.

Also adds __mmask32 and __mmask64 type definitions to avx512fintrin.h
for completeness.

Tests already exist in clang/test/CodeGen/X86/avx512bw-builtins.c.
…expr support

Add AVX-512 mask unpack intrinsics _mm512_kunpackd and _mm512_kunpackw
to avx512fintrin.h alongside the existing _mm512_kunpackb intrinsic.

These intrinsics extract and concatenate the lower halves of mask
registers, using the existing backend support for __builtin_ia32_kunpckdi
and __builtin_ia32_kunpcksi builtins.

Also adds __mmask32 and __mmask64 type definitions to avx512fintrin.h
for completeness.

This patch adds constexpr support for all three kunpack intrinsics by:
1. Using __DEFAULT_FN_ATTRS_CONSTEXPR attribute
2. Adding builtin interpretation in ExprConstant.cpp for compile-time
   evaluation in constexpr contexts
3. Adding constexpr tests to verify correct behavior

Tests already exist in clang/test/CodeGen/X86/avx512bw-builtins.c for
runtime code generation validation.
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics labels Nov 12, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 12, 2025

@llvm/pr-subscribers-backend-x86

@llvm/pr-subscribers-clang

Author: Ahmed Nour (ahmednoursphinx)

Changes

Resolves #166976


Full diff: https://github.com/llvm/llvm-project/pull/167683.diff

4 Files Affected:

  • (modified) clang/lib/AST/ExprConstant.cpp (+51)
  • (modified) clang/lib/Headers/avx512bwintrin.h (+4-5)
  • (modified) clang/lib/Headers/avx512fintrin.h (+14-3)
  • (modified) clang/test/CodeGen/X86/avx512f-builtins.c (+18)
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 1bfea24b228e8..a0a1d0ce2a94b 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -16287,6 +16287,42 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     return Success((A | B) == 0, E);
   }
 
+  case clang::X86::BI__builtin_ia32_kunpckhi: {
+    APSInt A, B;
+    if (!EvaluateInteger(E->getArg(0), A, Info) ||
+        !EvaluateInteger(E->getArg(1), B, Info))
+      return false;
+
+    // Extract lower 8 bits of each operand and concatenate
+    // Result = (A[7:0] << 8) | B[7:0]
+    APSInt Result = ((A & 0xFF) << 8) | (B & 0xFF);
+    return Success(Result, E);
+  }
+
+  case clang::X86::BI__builtin_ia32_kunpckdi: {
+    APSInt A, B;
+    if (!EvaluateInteger(E->getArg(0), A, Info) ||
+        !EvaluateInteger(E->getArg(1), B, Info))
+      return false;
+
+    // Extract lower 32 bits of each operand and concatenate
+    // Result = (A[31:0] << 32) | B[31:0]
+    APSInt Result = ((A & 0xFFFFFFFFULL) << 32) | (B & 0xFFFFFFFFULL);
+    return Success(Result, E);
+  }
+
+  case clang::X86::BI__builtin_ia32_kunpcksi: {
+    APSInt A, B;
+    if (!EvaluateInteger(E->getArg(0), A, Info) ||
+        !EvaluateInteger(E->getArg(1), B, Info))
+      return false;
+
+    // Extract lower 16 bits of each operand and concatenate
+    // Result = (A[15:0] << 16) | B[15:0]
+    APSInt Result = ((A & 0xFFFF) << 16) | (B & 0xFFFF);
+    return Success(Result, E);
+  }
+
   case clang::X86::BI__builtin_ia32_lzcnt_u16:
   case clang::X86::BI__builtin_ia32_lzcnt_u32:
   case clang::X86::BI__builtin_ia32_lzcnt_u64: {
@@ -16413,6 +16449,21 @@ bool IntExprEvaluator::VisitBuiltinCallExpr(const CallExpr *E,
     return Success(APValue(Result), E);
   }
 
+  case X86::BI__builtin_ia32_kunpckhi:
+  case X86::BI__builtin_ia32_kunpcksi:
+  case X86::BI__builtin_ia32_kunpckdi: {
+    return HandleMaskBinOp([](const APSInt &LHS, const APSInt &RHS) {
+      // Unpack: concatenate lower half of RHS with lower half of LHS
+      unsigned HalfBits = LHS.getBitWidth() / 2;
+      APSInt Mask = APSInt::getMaxValue(LHS.getBitWidth(), LHS.isUnsigned());
+      Mask = Mask.trunc(HalfBits).zext(LHS.getBitWidth());
+
+      APSInt LowerLHS = LHS & Mask;
+      APSInt LowerRHS = RHS & Mask;
+      return LowerRHS | (LowerLHS << HalfBits);
+    });
+  }
+
   case X86::BI__builtin_ia32_kaddqi:
   case X86::BI__builtin_ia32_kaddhi:
   case X86::BI__builtin_ia32_kaddsi:
diff --git a/clang/lib/Headers/avx512bwintrin.h b/clang/lib/Headers/avx512bwintrin.h
index 4a02c96620335..d247f648b9eb5 100644
--- a/clang/lib/Headers/avx512bwintrin.h
+++ b/clang/lib/Headers/avx512bwintrin.h
@@ -1606,15 +1606,14 @@ _mm512_maskz_set1_epi8(__mmask64 __M, char __A) {
                                               (__v64qi) _mm512_setzero_si512());
 }
 
-static __inline__ __mmask64 __DEFAULT_FN_ATTRS _mm512_kunpackd(__mmask64 __A,
-                                                               __mmask64 __B) {
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
   return (__mmask64) __builtin_ia32_kunpckdi ((__mmask64) __A,
                 (__mmask64) __B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
-_mm512_kunpackw (__mmask32 __A, __mmask32 __B)
-{
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
   return (__mmask32) __builtin_ia32_kunpcksi ((__mmask32) __A,
                 (__mmask32) __B);
 }
diff --git a/clang/lib/Headers/avx512fintrin.h b/clang/lib/Headers/avx512fintrin.h
index 997e9608e112f..badc30a7eb26c 100644
--- a/clang/lib/Headers/avx512fintrin.h
+++ b/clang/lib/Headers/avx512fintrin.h
@@ -40,6 +40,8 @@ typedef long long __m512i_u __attribute__((__vector_size__(64), __aligned__(1)))
 
 typedef unsigned char __mmask8;
 typedef unsigned short __mmask16;
+typedef unsigned int __mmask32;
+typedef unsigned long long __mmask64;
 
 /* Rounding mode macros.  */
 #define _MM_FROUND_TO_NEAREST_INT   0x00
@@ -8094,12 +8096,21 @@ _kortest_mask16_u8(__mmask16 __A, __mmask16 __B, unsigned char *__C) {
   return (unsigned char)__builtin_ia32_kortestzhi(__A, __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
-_mm512_kunpackb (__mmask16 __A, __mmask16 __B)
-{
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackb(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
 }
 
+static __inline__ __mmask64 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackd(__mmask64 __A, __mmask64 __B) {
+  return (__mmask64)__builtin_ia32_kunpckdi((__mmask64)__A, (__mmask64)__B);
+}
+
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS_CONSTEXPR
+_mm512_kunpackw(__mmask32 __A, __mmask32 __B) {
+  return (__mmask32)__builtin_ia32_kunpcksi((__mmask32)__A, (__mmask32)__B);
+}
+
 static __inline__ __mmask16 __DEFAULT_FN_ATTRS_CONSTEXPR
 _mm512_kxnor(__mmask16 __A, __mmask16 __B) {
   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
diff --git a/clang/test/CodeGen/X86/avx512f-builtins.c b/clang/test/CodeGen/X86/avx512f-builtins.c
index 17778b52d3671..9dcc749910175 100644
--- a/clang/test/CodeGen/X86/avx512f-builtins.c
+++ b/clang/test/CodeGen/X86/avx512f-builtins.c
@@ -9126,6 +9126,24 @@ __mmask16 test_mm512_kunpackb(__m512i __A, __m512i __B, __m512i __C, __m512i __D
                                                        __E, __F);
 }
 
+TEST_CONSTEXPR(_mm512_kunpackb(0xFF00, 0x00FF) == 0xFF00);
+TEST_CONSTEXPR(_mm512_kunpackb(0xABCD, 0x1234) == 0xCD34);
+TEST_CONSTEXPR(_mm512_kunpackb(0x00FF, 0xFF00) == 0x0000);
+TEST_CONSTEXPR(_mm512_kunpackb(0xAAAA, 0x5555) == 0xAA55);
+TEST_CONSTEXPR(_mm512_kunpackb(0x1234, 0xABCD) == 0x34CD);
+
+TEST_CONSTEXPR(_mm512_kunpackw(0xFFFF0000u, 0x0000FFFFu) == 0x0000FFFFu);
+TEST_CONSTEXPR(_mm512_kunpackw(0xABCD1234u, 0x56789ABCu) == 0x12349ABCu);
+TEST_CONSTEXPR(_mm512_kunpackw(0x0000FFFFu, 0xFFFF0000u) == 0x00000000u);
+TEST_CONSTEXPR(_mm512_kunpackw(0xAAAA5555u, 0x5555AAAAu) == 0x5555AAAAu);
+TEST_CONSTEXPR(_mm512_kunpackw(0x12345678u, 0xABCDEF12u) == 0x5678EF12u);
+
+TEST_CONSTEXPR(_mm512_kunpackd(0xFFFFFFFF00000000ull, 0x00000000FFFFFFFFull) == 0x00000000FFFFFFFFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xABCDEF0123456789ull, 0x0123456789ABCDEFull) == 0x234567899ABCDEFull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x00000000FFFFFFFFull, 0xFFFFFFFF00000000ull) == 0x0000000000000000ull);
+TEST_CONSTEXPR(_mm512_kunpackd(0xAAAA5555AAAA5555ull, 0x5555AAAA5555AAAAull) == 0xAAAA55555555AAAAull);
+TEST_CONSTEXPR(_mm512_kunpackd(0x123456789ABCDEFull, 0xFEDCBA9876543210ull) == 0x89ABCDEF76543210ull);
+
 __mmask16 test_mm512_kxnor(__m512i __A, __m512i __B, __m512i __C, __m512i __D, __m512i __E, __m512i __F) {
   // CHECK-LABEL: test_mm512_kxnor
   // CHECK: [[LHS:%.*]] = bitcast i16 %{{.*}} to <16 x i1>

@RKSimon RKSimon self-requested a review November 12, 2025 12:26
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Still missing InterpBuiltin and BuiltinsX86.td fixes

@llvmbot llvmbot added the clang:bytecode Issues for the clang bytecode constexpr interpreter label Nov 16, 2025
@github-actions
Copy link

github-actions bot commented Nov 16, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@RKSimon RKSimon self-requested a review November 16, 2025 15:08
Copy link
Collaborator

@RKSimon RKSimon left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM - cheers

@RKSimon RKSimon enabled auto-merge (squash) November 17, 2025 09:41
@RKSimon RKSimon merged commit 44f72fb into llvm:main Nov 17, 2025
9 of 10 checks passed
@llvm-ci
Copy link
Collaborator

llvm-ci commented Nov 17, 2025

LLVM Buildbot has detected a new failure on builder llvm-clang-x86_64-gcc-ubuntu running on sie-linux-worker3 while building clang at step 6 "test-build-unified-tree-check-all".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/174/builds/27581

Here is the relevant piece of the build log for the reference
Step 6 (test-build-unified-tree-check-all) failure: test (failure)
******************** TEST 'SanitizerCommon-asan-x86_64-Linux :: Linux/soft_rss_limit_mb_test.cpp' FAILED ********************
Exit Code: 1

Command Output (stdout):
--
# RUN: at line 2
/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/clang  --driver-mode=g++ -gline-tables-only -fsanitize=address  -m64 -funwind-tables  -I/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test -ldl -O2 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -o /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/asan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# executed command: /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/./bin/clang --driver-mode=g++ -gline-tables-only -fsanitize=address -m64 -funwind-tables -I/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test -ldl -O2 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -o /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/asan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# note: command had no output on stdout or stderr
# RUN: at line 5
env ASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=1      /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/asan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_1
# executed command: env ASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=1 /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/asan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_1
# note: command had no output on stdout or stderr
# RUN: at line 6
env ASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0 not  /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/asan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_0 --implicit-check-not="returned null"
# executed command: env ASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0 not /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/asan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_0 '--implicit-check-not=returned null'
# note: command had no output on stdout or stderr
# RUN: at line 10
env ASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0:can_use_proc_maps_statm=0 not  /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/asan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp 2>&1 | FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_0 --implicit-check-not="returned null"
# executed command: env ASAN_OPTIONS=soft_rss_limit_mb=220:quarantine_size=1:allocator_may_return_null=0:can_use_proc_maps_statm=0 not /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/build/runtimes/runtimes-bins/compiler-rt/test/sanitizer_common/asan-x86_64-Linux/Linux/Output/soft_rss_limit_mb_test.cpp.tmp
# note: command had no output on stdout or stderr
# executed command: FileCheck /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp -check-prefix=CHECK_MAY_RETURN_0 '--implicit-check-not=returned null'
# .---command stderr------------
# | �[1m/home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp:72:24: �[0m�[0;1;31merror: �[0m�[1mCHECK_MAY_RETURN_0: expected string not found in input
�[0m# | �[1m�[0m// CHECK_MAY_RETURN_0: Some of the malloc calls returned non-null:
# | �[0;1;32m                       ^
�[0m# | �[0;1;32m�[0m�[1m<stdin>:1:24: �[0m�[0;1;30mnote: �[0m�[1mscanning from here
�[0m# | �[1m�[0m[0] allocating 32 times
# | �[0;1;32m                       ^
�[0m# | �[0;1;32m�[0m�[1m<stdin>:10:55: �[0m�[0;1;30mnote: �[0m�[1mpossible intended match here
�[0m# | �[1m�[0m==3105301==HINT: if you don't care about these errors you may set allocator_may_return_null=1
# | �[0;1;32m                                                      ^
�[0m# | �[0;1;32m�[0m
# | Input file: <stdin>
# | Check file: /home/buildbot/buildbot-root/llvm-clang-x86_64-gcc-ubuntu/llvm-project/compiler-rt/test/sanitizer_common/TestCases/Linux/soft_rss_limit_mb_test.cpp
# | 
# | -dump-input=help explains the following input dump.
# | 
# | Input was:
# | <<<<<<
# | �[1m�[0m�[0;1;30m            1: �[0m�[1m�[0;1;46m[0] �[0mallocating 32 times�[0;1;46m �[0m
# | �[0;1;32mcheck:71           ^~~~~~~~~~~~~~~~~~~
�[0m# | �[0;1;32m�[0m�[0;1;32mnot:imp1       X~~~
�[0m# | �[0;1;32m�[0m�[0;1;31mcheck:72'0                            X error: no match found
�[0m# | �[0;1;31m�[0m�[0;1;30m            2: �[0m�[1m�[0;1;46m [0] �[0m
# | �[0;1;31mcheck:72'0     ~~~~~
...

@ahmednoursphinx ahmednoursphinx deleted the issue_166976 branch November 17, 2025 11:34
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

backend:X86 clang:bytecode Issues for the clang bytecode constexpr interpreter clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[X86][Clang] VectorExprEvaluator::VisitCallExpr / InterpretBuiltin - add AVX512 kunpck intrinsics to be used in constexp

4 participants