Skip to content

Conversation

FreddyLeaf
Copy link
Contributor

@FreddyLeaf FreddyLeaf commented Aug 2, 2024

Copy link

github-actions bot commented Aug 2, 2024

⚠️ C/C++ code formatter, clang-format found issues in your code. ⚠️

You can test this locally with the following command:
git-clang-format --diff 3d5cc7e1e632b74119af13824dabc346bd248c93 59df2576f81d5741271bc7329fade564b83d492d --extensions cpp,inc,c,h -- clang/lib/Headers/avx10_2_512satcvtintrin.h clang/lib/Headers/avx10_2satcvtintrin.h clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c clang/test/CodeGen/X86/avx10_2satcvt-builtins.c clang/lib/Headers/immintrin.h clang/lib/Sema/SemaX86.cpp llvm/lib/Target/X86/X86ISelLowering.cpp llvm/lib/Target/X86/X86ISelLowering.h llvm/lib/Target/X86/X86IntrinsicsInfo.h llvm/test/TableGen/x86-fold-tables.inc
View the diff from clang-format here.
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
index 0dadadb6a0..67b2d5a421 100644
--- a/clang/lib/Headers/avx10_2_512satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -68,7 +68,7 @@
 
 #define _mm512_ipcvtph_epi8(A)                                                 \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvtph_epi8(W, U, A)                                      \
@@ -84,7 +84,7 @@
 #define _mm512_ipcvt_roundph_epi8(A, R)                                        \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
                                               (__v32hu)_mm512_setzero_si512(), \
-                                              (__mmask32)-1, (const int)R))
+                                              (__mmask32) - 1, (const int)R))
 
 #define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R)                             \
   ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
@@ -97,7 +97,7 @@
 
 #define _mm512_ipcvtph_epu8(A)                                                 \
   ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvtph_epu8(W, U, A)                                      \
@@ -112,7 +112,7 @@
 
 #define _mm512_ipcvt_roundph_epu8(A, R)                                        \
   ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       (const int)R))
 
 #define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R)                             \
@@ -126,7 +126,7 @@
 
 #define _mm512_ipcvtps_epi8(A)                                                 \
   ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
-      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvtps_epi8(W, U, A)                                      \
@@ -142,7 +142,7 @@
 #define _mm512_ipcvt_roundps_epi8(A, R)                                        \
   ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
                                               (__v16su)_mm512_setzero_si512(), \
-                                              (__mmask16)-1, (const int)R))
+                                              (__mmask16) - 1, (const int)R))
 
 #define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R)                             \
   ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
@@ -155,7 +155,7 @@
 
 #define _mm512_ipcvtps_epu8(A)                                                 \
   ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
-      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvtps_epu8(W, U, A)                                      \
@@ -170,7 +170,7 @@
 
 #define _mm512_ipcvt_roundps_epu8(A, R)                                        \
   ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
-      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1,  \
       (const int)R))
 
 #define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R)                             \
@@ -184,7 +184,7 @@
 
 #define _mm512_ipcvttph_epi8(A)                                                \
   ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvttph_epi8(W, U, A)                                     \
@@ -199,7 +199,7 @@
 
 #define _mm512_ipcvtt_roundph_epi8(A, S)                                       \
   ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       S))
 
 #define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S)                            \
@@ -213,7 +213,7 @@
 
 #define _mm512_ipcvttph_epu8(A)                                                \
   ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvttph_epu8(W, U, A)                                     \
@@ -228,7 +228,7 @@
 
 #define _mm512_ipcvtt_roundph_epu8(A, S)                                       \
   ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
-      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32) - 1, \
       S))
 
 #define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S)                            \
@@ -242,7 +242,7 @@
 
 #define _mm512_ipcvttps_epi8(A)                                                \
   ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
-      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvttps_epi8(W, U, A)                                     \
@@ -257,7 +257,7 @@
 
 #define _mm512_ipcvtt_roundps_epi8(A, S)                                       \
   ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
-      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
       S))
 
 #define _mm512_mask_ipcvtt_roundps_epi8(W, U, A, S)                            \
@@ -271,7 +271,7 @@
 
 #define _mm512_ipcvttps_epu8(A)                                                \
   ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
-      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm512_mask_ipcvttps_epu8(W, U, A)                                     \
@@ -286,7 +286,7 @@
 
 #define _mm512_ipcvtt_roundps_epu8(A, S)                                       \
   ((__m512i)__builtin_ia32_vcvttps2iubs512_mask(                               \
-      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16) - 1, \
       S))
 
 #define _mm512_mask_ipcvtt_roundps_epu8(W, U, A, S)                            \
diff --git a/clang/lib/Headers/avx10_2satcvtintrin.h b/clang/lib/Headers/avx10_2satcvtintrin.h
index dd5c44fdaf..878f1adf87 100644
--- a/clang/lib/Headers/avx10_2satcvtintrin.h
+++ b/clang/lib/Headers/avx10_2satcvtintrin.h
@@ -66,7 +66,7 @@
 
 #define _mm_ipcvtph_epi8(A)                                                    \
   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask(                                 \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvtph_epi8(W, U, A)                                         \
   ((__m128i)__builtin_ia32_vcvtph2ibs128_mask((__v8hf)(__m128h)(A),            \
@@ -78,7 +78,7 @@
 
 #define _mm256_ipcvtph_epi8(A)                                                 \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvtph_epi8(W, U, A)                                      \
@@ -94,7 +94,7 @@
 #define _mm256_ipcvt_roundph_epi8(A, R)                                        \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask((__v16hf)(__m256h)(A),           \
                                               (__v16hu)_mm256_setzero_si256(), \
-                                              (__mmask16)-1, (const int)R))
+                                              (__mmask16) - 1, (const int)R))
 
 #define _mm256_mask_ipcvt_roundph_epi8(W, U, A, R)                             \
   ((__m256i)__builtin_ia32_vcvtph2ibs256_mask(                                 \
@@ -107,7 +107,7 @@
 
 #define _mm_ipcvtph_epu8(A)                                                    \
   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask(                                \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvtph_epu8(W, U, A)                                         \
   ((__m128i)__builtin_ia32_vcvtph2iubs128_mask((__v8hf)(__m128h)(A),           \
@@ -119,7 +119,7 @@
 
 #define _mm256_ipcvtph_epu8(A)                                                 \
   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvtph_epu8(W, U, A)                                      \
@@ -134,7 +134,7 @@
 
 #define _mm256_ipcvt_roundph_epu8(A, R)                                        \
   ((__m256i)__builtin_ia32_vcvtph2iubs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       (const int)R))
 
 #define _mm256_mask_ipcvt_roundph_epu8(W, U, A, R)                             \
@@ -148,7 +148,7 @@
 
 #define _mm_ipcvtps_epi8(A)                                                    \
   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask(                                 \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvtps_epi8(W, U, A)                                         \
   ((__m128i)__builtin_ia32_vcvtps2ibs128_mask((__v4sf)(__m128)(A),             \
@@ -160,7 +160,7 @@
 
 #define _mm256_ipcvtps_epi8(A)                                                 \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvtps_epi8(W, U, A)                                      \
@@ -176,7 +176,7 @@
 #define _mm256_ipcvt_roundps_epi8(A, R)                                        \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask((__v8sf)(__m256)(A),             \
                                               (__v8su)_mm256_setzero_si256(),  \
-                                              (__mmask8)-1, (const int)R))
+                                              (__mmask8) - 1, (const int)R))
 
 #define _mm256_mask_ipcvt_roundps_epi8(W, U, A, R)                             \
   ((__m256i)__builtin_ia32_vcvtps2ibs256_mask(                                 \
@@ -189,7 +189,7 @@
 
 #define _mm_ipcvtps_epu8(A)                                                    \
   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask(                                \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvtps_epu8(W, U, A)                                         \
   ((__m128i)__builtin_ia32_vcvtps2iubs128_mask((__v4sf)(__m128)(A),            \
@@ -201,7 +201,7 @@
 
 #define _mm256_ipcvtps_epu8(A)                                                 \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvtps_epu8(W, U, A)                                      \
@@ -217,7 +217,7 @@
 #define _mm256_ipcvt_roundps_epu8(A, R)                                        \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask((__v8sf)(__m256)(A),            \
                                                (__v8su)_mm256_setzero_si256(), \
-                                               (__mmask8)-1, (const int)R))
+                                               (__mmask8) - 1, (const int)R))
 
 #define _mm256_mask_ipcvt_roundps_epu8(W, U, A, R)                             \
   ((__m256i)__builtin_ia32_vcvtps2iubs256_mask(                                \
@@ -280,7 +280,7 @@
 
 #define _mm_ipcvttph_epi8(A)                                                   \
   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask(                                \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvttph_epi8(W, U, A)                                        \
   ((__m128i)__builtin_ia32_vcvttph2ibs128_mask((__v8hf)(__m128h)(A),           \
@@ -292,7 +292,7 @@
 
 #define _mm256_ipcvttph_epi8(A)                                                \
   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvttph_epi8(W, U, A)                                     \
@@ -307,7 +307,7 @@
 
 #define _mm256_ipcvtt_roundph_epi8(A, R)                                       \
   ((__m256i)__builtin_ia32_vcvttph2ibs256_mask(                                \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       (const int)R))
 
 #define _mm256_mask_ipcvtt_roundph_epi8(W, U, A, R)                            \
@@ -321,7 +321,7 @@
 
 #define _mm_ipcvttph_epu8(A)                                                   \
   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask(                               \
-      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8)-1))
+      (__v8hf)(__m128h)(A), (__v8hu)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvttph_epu8(W, U, A)                                        \
   ((__m128i)__builtin_ia32_vcvttph2iubs128_mask((__v8hf)(__m128h)(A),          \
@@ -333,7 +333,7 @@
 
 #define _mm256_ipcvttph_epu8(A)                                                \
   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvttph_epu8(W, U, A)                                     \
@@ -348,7 +348,7 @@
 
 #define _mm256_ipcvtt_roundph_epu8(A, R)                                       \
   ((__m256i)__builtin_ia32_vcvttph2iubs256_mask(                               \
-      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16)-1,   \
+      (__v16hf)(__m256h)(A), (__v16hu)_mm256_setzero_si256(), (__mmask16) - 1, \
       (const int)R))
 
 #define _mm256_mask_ipcvtt_roundph_epu8(W, U, A, R)                            \
@@ -362,7 +362,7 @@
 
 #define _mm_ipcvttps_epi8(A)                                                   \
   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask(                                \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvttps_epi8(W, U, A)                                        \
   ((__m128i)__builtin_ia32_vcvttps2ibs128_mask((__v4sf)(__m128)(A),            \
@@ -374,7 +374,7 @@
 
 #define _mm256_ipcvttps_epi8(A)                                                \
   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvttps_epi8(W, U, A)                                     \
@@ -390,7 +390,7 @@
 #define _mm256_ipcvtt_roundps_epi8(A, R)                                       \
   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask((__v8sf)(__m256)(A),            \
                                                (__v8su)_mm256_setzero_si256(), \
-                                               (__mmask8)-1, (const int)R))
+                                               (__mmask8) - 1, (const int)R))
 
 #define _mm256_mask_ipcvtt_roundps_epi8(W, U, A, R)                            \
   ((__m256i)__builtin_ia32_vcvttps2ibs256_mask(                                \
@@ -403,7 +403,7 @@
 
 #define _mm_ipcvttps_epu8(A)                                                   \
   ((__m128i)__builtin_ia32_vcvttps2iubs128_mask(                               \
-      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8)-1))
+      (__v4sf)(__m128)(A), (__v4su)_mm_setzero_si128(), (__mmask8) - 1))
 
 #define _mm_mask_ipcvttps_epu8(W, U, A)                                        \
   ((__m128i)__builtin_ia32_vcvttps2iubs128_mask((__v4sf)(__m128)(A),           \
@@ -415,7 +415,7 @@
 
 #define _mm256_ipcvttps_epu8(A)                                                \
   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       _MM_FROUND_CUR_DIRECTION))
 
 #define _mm256_mask_ipcvttps_epu8(W, U, A)                                     \
@@ -430,7 +430,7 @@
 
 #define _mm256_ipcvtt_roundps_epu8(A, R)                                       \
   ((__m256i)__builtin_ia32_vcvttps2iubs256_mask(                               \
-      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8)-1,       \
+      (__v8sf)(__m256)(A), (__v8su)_mm256_setzero_si256(), (__mmask8) - 1,     \
       (const int)R))
 
 #define _mm256_mask_ipcvtt_roundps_epu8(W, U, A, R)                            \

@FreddyLeaf FreddyLeaf marked this pull request as ready for review August 5, 2024 03:29
@FreddyLeaf FreddyLeaf requested a review from RKSimon August 5, 2024 03:29
@llvmbot llvmbot added clang Clang issues not falling into any other category backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics llvm:mc Machine (object) code llvm:ir labels Aug 5, 2024
@FreddyLeaf FreddyLeaf requested a review from e-kud August 5, 2024 03:30
@llvmbot
Copy link
Member

llvmbot commented Aug 5, 2024

@llvm/pr-subscribers-mc
@llvm/pr-subscribers-clang

@llvm/pr-subscribers-llvm-ir

Author: Freddy Ye (FreddyLeaf)

Changes

Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965


Patch is 611.58 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101599.diff

25 Files Affected:

  • (modified) clang/include/clang/Basic/BuiltinsX86.def (+38)
  • (modified) clang/lib/Headers/CMakeLists.txt (+2)
  • (added) clang/lib/Headers/avx10_2_512satcvtintrin.h (+327)
  • (added) clang/lib/Headers/avx10_2satcvtintrin.h (+448)
  • (modified) clang/lib/Headers/immintrin.h (+2)
  • (modified) clang/lib/Sema/SemaX86.cpp (+16)
  • (added) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins-error.c (+198)
  • (added) clang/test/CodeGen/X86/avx10_2_512satcvt-builtins.c (+379)
  • (added) clang/test/CodeGen/X86/avx10_2satcvt-builtins.c (+603)
  • (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+112)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+20)
  • (modified) llvm/lib/Target/X86/X86ISelLowering.h (+21)
  • (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+170)
  • (modified) llvm/lib/Target/X86/X86InstrFragmentsSIMD.td (+82)
  • (modified) llvm/lib/Target/X86/X86InstrUtils.td (+3-3)
  • (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+72)
  • (added) llvm/test/CodeGen/X86/avx10_2_512satcvt-intrinsics.ll (+1003)
  • (added) llvm/test/CodeGen/X86/avx10_2satcvt-intrinsics.ll (+1618)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvt-32.txt (+1363)
  • (added) llvm/test/MC/Disassembler/X86/avx10.2-satcvt-64.txt (+1363)
  • (added) llvm/test/MC/X86/avx10.2satcvt-32-att.s (+1362)
  • (added) llvm/test/MC/X86/avx10.2satcvt-32-intel.s (+1362)
  • (added) llvm/test/MC/X86/avx10.2satcvt-64-att.s (+1362)
  • (added) llvm/test/MC/X86/avx10.2satcvt-64-intel.s (+1362)
  • (modified) llvm/test/TableGen/x86-fold-tables.inc (+216)
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index c49b5c36da4fc..fb55057b8cbc3 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2158,6 +2158,44 @@ TARGET_BUILTIN(__builtin_ia32_vminmaxps512_round_mask, "V16fV16fV16fIiV16fUsIi",
 TARGET_BUILTIN(__builtin_ia32_vminmaxsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxsh_round_mask, "V8xV8xV8xIiV8xUcIi", "nV:128:", "avx10.2-256")
 TARGET_BUILTIN(__builtin_ia32_vminmaxss_round_mask, "V4fV4fV4fIiV4fUcIi", "nV:128:", "avx10.2-256")
+
+// AVX10.2 SATCVT
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162ibs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs128, "V8UsV8y", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs256, "V16UsV16y", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttnebf162iubs512, "V32UsV32y", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2ibs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs128_mask, "V8UsV8xV8UsUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs256_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2iubs512_mask, "V32UsV32xV32UsUiIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2ibs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs128_mask, "V4UiV4fV4UiUc", "nV:128:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs256_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2iubs512_mask, "V16UiV16fV16UiUsIi", "nV:512:", "avx10.2-512")
 #undef BUILTIN
 #undef TARGET_BUILTIN
 #undef TARGET_HEADER_BUILTIN
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f3d19e38f8f2b..91e106427ba1d 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -149,7 +149,9 @@ set(x86_files
   amxintrin.h
   avx10_2_512minmaxintrin.h
   avx10_2_512niintrin.h
+  avx10_2_512satcvtintrin.h
   avx10_2minmaxintrin.h
+  avx10_2satcvtintrin.h
   avx10_2niintrin.h
   avx2intrin.h
   avx512bf16intrin.h
diff --git a/clang/lib/Headers/avx10_2_512satcvtintrin.h b/clang/lib/Headers/avx10_2_512satcvtintrin.h
new file mode 100644
index 0000000000000..0ea645bee22f9
--- /dev/null
+++ b/clang/lib/Headers/avx10_2_512satcvtintrin.h
@@ -0,0 +1,327 @@
+/*===------ avx10_2_512satcvtintrin.h - AVX10_2_512SATCVT intrinsics -------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+#ifndef __IMMINTRIN_H
+#error                                                                         \
+    "Never use <avx10_2_512satcvtintrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __AVX10_2_512SATCVTINTRIN_H
+#define __AVX10_2_512SATCVTINTRIN_H
+
+#define _mm512_ipcvtnebf16_epi8(A)                                             \
+  ((__m512i)__builtin_ia32_vcvtnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvtnebf16_epi8(W, U, A)                                  \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epi8(A),                     \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvtnebf16_epi8(U, A)                                    \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epi8(A),                     \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvtnebf16_epu8(A)                                             \
+  ((__m512i)__builtin_ia32_vcvtnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvtnebf16_epu8(W, U, A)                                  \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epu8(A),                     \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvtnebf16_epu8(U, A)                                    \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvtnebf16_epu8(A),                     \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epi8(A)                                            \
+  ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epi8(W, U, A)                                 \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A),                    \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epi8(U, A)                                   \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A),                    \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epu8(A)                                            \
+  ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epu8(W, U, A)                                 \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A),                    \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epu8(U, A)                                   \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A),                    \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvtph_epi8(A)                                                 \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtph_epi8(W, U, A)                                      \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
+                                              (__v32hu)(W), (__mmask32)(U),    \
+                                              _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtph_epi8(U, A)                                        \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundph_epi8(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
+                                              (__v32hu)_mm512_setzero_si512(), \
+                                              (__mmask32)-1, (const int)R))
+
+#define _mm512_mask_ipcvt_roundph_epi8(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask(                                 \
+      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundph_epi8(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvtph2ibs512_mask((__v32hf)(__m512h)(A),           \
+                                              (__v32hu)_mm512_setzero_si512(), \
+                                              (__mmask32)(U), (const int)R))
+
+#define _mm512_ipcvtph_epu8(A)                                                 \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtph_epu8(W, U, A)                                      \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask((__v32hf)(__m512h)(A),          \
+                                               (__v32hu)(W), (__mmask32)(U),   \
+                                               _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtph_epu8(U, A)                                        \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundph_epu8(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      (const int)R))
+
+#define _mm512_mask_ipcvt_roundph_epu8(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundph_epu8(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvtph2iubs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      (const int)R))
+
+#define _mm512_ipcvtps_epi8(A)                                                 \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtps_epi8(W, U, A)                                      \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
+                                              (__v16su)(W), (__mmask16)(U),    \
+                                              _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtps_epi8(U, A)                                        \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundps_epi8(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
+                                              (__v16su)_mm512_setzero_si512(), \
+                                              (__mmask16)-1, (const int)R))
+
+#define _mm512_mask_ipcvt_roundps_epi8(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask(                                 \
+      (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundps_epi8(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvtps2ibs512_mask((__v16sf)(__m512)(A),            \
+                                              (__v16su)_mm512_setzero_si512(), \
+                                              (__mmask16)(U), (const int)R))
+
+#define _mm512_ipcvtps_epu8(A)                                                 \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvtps_epu8(W, U, A)                                      \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask((__v16sf)(__m512)(A),           \
+                                               (__v16su)(W), (__mmask16)(U),   \
+                                               _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvtps_epu8(U, A)                                        \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvt_roundps_epu8(A, R)                                        \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,    \
+      (const int)R))
+
+#define _mm512_mask_ipcvt_roundps_epu8(W, U, A, R)                             \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)(W), (__mmask16)(U), (const int)R))
+
+#define _mm512_maskz_ipcvt_roundps_epu8(U, A, R)                               \
+  ((__m512i)__builtin_ia32_vcvtps2iubs512_mask(                                \
+      (__v16sf)(__m512)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)(U),   \
+      (const int)R))
+
+#define _mm512_ipcvttnebf16_epi8(A)                                            \
+  ((__m512i)__builtin_ia32_vcvttnebf162ibs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epi8(W, U, A)                                 \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A),                    \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epi8(U, A)                                   \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epi8(A),                    \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttnebf16_epu8(A)                                            \
+  ((__m512i)__builtin_ia32_vcvttnebf162iubs512((__v32bf)(__m512bh)(A)))
+
+#define _mm512_mask_ipcvttnebf16_epu8(W, U, A)                                 \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A),                    \
+      (__v32hi)(__m512i)(W)))
+
+#define _mm512_maskz_ipcvttnebf16_epu8(U, A)                                   \
+  ((__m512i)__builtin_ia32_selectw_512(                                        \
+      (__mmask32)(U), (__v32hi)_mm512_ipcvttnebf16_epu8(A),                    \
+      (__v32hi)_mm512_setzero_si512()))
+
+#define _mm512_ipcvttph_epi8(A)                                                \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttph_epi8(W, U, A)                                     \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask((__v32hf)(__m512h)(A),          \
+                                               (__v32hu)(W), (__mmask32)(U),   \
+                                               _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvttph_epi8(U, A)                                       \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvtt_roundph_epi8(A, S)                                       \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      S))
+
+#define _mm512_mask_ipcvtt_roundph_epi8(W, U, A, S)                            \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
+
+#define _mm512_maskz_ipcvtt_roundph_epi8(U, A, S)                              \
+  ((__m512i)__builtin_ia32_vcvttph2ibs512_mask(                                \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      S))
+
+#define _mm512_ipcvttph_epu8(A)                                                \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttph_epu8(W, U, A)                                     \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask((__v32hf)(__m512h)(A),         \
+                                                (__v32hu)(W), (__mmask32)(U),  \
+                                                _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_maskz_ipcvttph_epu8(U, A)                                       \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_ipcvtt_roundph_epu8(A, S)                                       \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)-1,   \
+      S))
+
+#define _mm512_mask_ipcvtt_roundph_epu8(W, U, A, S)                            \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)(W), (__mmask32)(U), S))
+
+#define _mm512_maskz_ipcvtt_roundph_epu8(U, A, S)                              \
+  ((__m512i)__builtin_ia32_vcvttph2iubs512_mask(                               \
+      (__v32hf)(__m512h)(A), (__v32hu)_mm512_setzero_si512(), (__mmask32)(U),  \
+      S))
+
+#define _mm512_ipcvttps_epi8(A)                                                \
+  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask(                                \
+      (__v16sf)(__m512h)(A), (__v16su)_mm512_setzero_si512(), (__mmask16)-1,   \
+      _MM_FROUND_CUR_DIRECTION))
+
+#define _mm512_mask_ipcvttps_epi8(W, U, A)                                     \
+  ((__m512i)__builtin_ia32_vcvttps2ibs512_mask((__v16sf)(__m512h)(A),          \
+                 ...
[truncated]

Copy link
Contributor

@KanRobert KanRobert left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM for the fold table change

Copy link
Contributor

@phoebewang phoebewang left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM with one nit.

@FreddyLeaf FreddyLeaf merged commit 80721e0 into llvm:main Aug 6, 2024
6 of 7 checks passed
@FreddyLeaf FreddyLeaf deleted the avx10-satcvt branch August 6, 2024 11:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
backend:X86 clang:frontend Language frontend issues, e.g. anything involving "Sema" clang:headers Headers provided by Clang, e.g. for intrinsics clang Clang issues not falling into any other category llvm:ir llvm:mc Machine (object) code
Projects
None yet
Development

Successfully merging this pull request may close these issues.

5 participants