From f5f89201bf303730fa1ba8d88fdcacee2e6dd1cb Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Mon, 8 Sep 2025 15:10:30 -0500 Subject: [PATCH 1/5] [libc] Implement vector 'split' and 'concat' routines Summary: This provides some helpers for the split and concatenation routines for changing the size of an existing vector. This includes a simple tuple type to do the splitting. The tuple doesn't support structured bindings yet. The concat function is more limited than what would be ideal, but the shufflevector builtin requires things of equivalent sizes and I didn't think it was worth wrangling with that just yet. --- libc/src/__support/CPP/CMakeLists.txt | 3 + libc/src/__support/CPP/simd.h | 74 +++++++++++++++++++++++ libc/test/src/__support/CPP/simd_test.cpp | 26 ++++++++ 3 files changed, 103 insertions(+) diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt index d9b86b4fd2973..a9cb67df0b427 100644 --- a/libc/src/__support/CPP/CMakeLists.txt +++ b/libc/src/__support/CPP/CMakeLists.txt @@ -224,4 +224,7 @@ add_header_library( simd HDRS simd.h + DEPENDS + .utility + .tuple ) diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 54fe70a6e9830..70524dff19df2 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -16,7 +16,9 @@ #include "hdr/stdint_proxy.h" #include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/limits.h" +#include "src/__support/CPP/tuple.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/CPP/utility/integer_sequence.h" #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" @@ -51,6 +53,7 @@ template LIBC_INLINE constexpr size_t native_vector_size = 1; template LIBC_INLINE constexpr T poison() { return __builtin_nondeterministic_value(T()); } + } // namespace internal // Type aliases. @@ -273,6 +276,77 @@ LIBC_INLINE constexpr static simd select(simd m, simd x, return m ? x : y; } +namespace internal { +template +LIBC_INLINE constexpr static cpp::simd +extend(cpp::simd x, cpp::index_sequence) { + return __builtin_shufflevector(x, x, (I < O ? static_cast(I) : -1)...); +} +template +LIBC_INLINE constexpr static auto extend(cpp::simd x) { + if constexpr (N == M) + return x; + else if constexpr (M <= 2 * N) + return extend(x, cpp::make_index_sequence{}); + else + return extend( + extend(x, cpp::make_index_sequence<2 * N>{})); +} +template +LIBC_INLINE constexpr static cpp::simd +concat(cpp::simd x, cpp::simd y, cpp::index_sequence) { + constexpr size_t L = (N > M ? N : M); + + auto x_ext = extend(x); + auto y_ext = extend(y); + + auto remap = [](size_t idx) -> int { + if (idx < N) + return static_cast(idx); + if (idx < N + M) + return static_cast((idx - N) + L); + return -1; + }; + + return __builtin_shufflevector(x_ext, y_ext, remap(I)...); +} + +template +LIBC_INLINE constexpr static cpp::simd +slice(cpp::simd x, cpp::index_sequence) { + return __builtin_shufflevector(x, x, (Offset + I)...); +} +template +LIBC_INLINE constexpr static auto split(cpp::simd x) { + auto first = cpp::make_tuple( + slice(x, cpp::make_index_sequence{})); + if constexpr (sizeof...(Tail) > 0) + return cpp::tuple_cat(first, split(x)); + else + return first; +} + +} // namespace internal + +// Shuffling helpers. +template +LIBC_INLINE constexpr static auto concat(cpp::simd x, cpp::simd y) { + return internal::concat(x, y, make_index_sequence{}); +} +template +LIBC_INLINE constexpr static auto concat(cpp::simd x, cpp::simd y, + Rest... rest) { + auto xy = concat(x, y); + if constexpr (sizeof...(Rest)) + return concat(xy, rest...); + else + return xy; +} +template auto split(cpp::simd x) { + static_assert((... + Sizes) == N, "split sizes must sum to vector size"); + return internal::split(x); +} + // TODO: where expressions, scalar overloads, ABI types. } // namespace cpp diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp index 600bf65057b21..72ec10d12cb5b 100644 --- a/libc/test/src/__support/CPP/simd_test.cpp +++ b/libc/test/src/__support/CPP/simd_test.cpp @@ -68,3 +68,29 @@ TEST(LlvmLibcSIMDTest, MaskOperations) { EXPECT_EQ(cpp::find_first_set(mask), 0); EXPECT_EQ(cpp::find_last_set(mask), 2); } + +TEST(LlvmLibcSIMDTest, SplitConcat) { + cpp::simd v(1); + auto [v1, v2, v3, v4] = cpp::split<2, 2, 2, 2>(v); + static_assert(cpp::simd_size_v == 2 && + cpp::simd_size_v == 2 && + cpp::simd_size_v == 2 && + cpp::simd_size_v == 2, + "invalid size"); + + v1 = cpp::simd(1); + v2 = cpp::simd(2); + v3 = cpp::simd(3); + v4 = cpp::simd(4); + cpp::simd m = cpp::concat(v1, v2, v3, v4); + static_assert(cpp::simd_size_v == 8, "invalid size"); + + cpp::simd c = {1, 1, 2, 2, 3, 3, 4, 4}; + for (int i = 0; i < 8; ++i) + EXPECT_EQ(c[i], m[i]); + + cpp::simd c1('\0'); + cpp::simd c2('\0'); + cpp::simd c3 = cpp::concat(c1, c2); + static_assert(cpp::simd_size_v == 9, "invalid size"); +} From 0b9c4e7c1001e192242cb43455a093f03467e8e2 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Sep 2025 12:07:04 -0500 Subject: [PATCH 2/5] cleanup and comments --- libc/src/__support/CPP/simd.h | 127 ++++++++++++++++++---------------- 1 file changed, 68 insertions(+), 59 deletions(-) diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 70524dff19df2..d69df2bb92fe5 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -34,9 +34,6 @@ namespace cpp { namespace internal { -template -using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); - #if defined(LIBC_TARGET_CPU_HAS_AVX512F) template LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T); @@ -50,10 +47,6 @@ LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T); template LIBC_INLINE constexpr size_t native_vector_size = 1; #endif -template LIBC_INLINE constexpr T poison() { - return __builtin_nondeterministic_value(T()); -} - } // namespace internal // Type aliases. @@ -64,6 +57,74 @@ using simd = T [[clang::ext_vector_type(N)]]; template using simd_mask = simd>; +namespace internal { + +template +using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); + +template LIBC_INLINE constexpr T poison() { + return __builtin_nondeterministic_value(T()); +} + +template +LIBC_INLINE constexpr static cpp::simd +extend(cpp::simd x, cpp::index_sequence) { + return __builtin_shufflevector( + x, x, (Indices < OriginalSize ? static_cast(Indices) : -1)...); +} + +template +LIBC_INLINE constexpr static auto extend(cpp::simd x) { + // Recursively resize an input vector to the target size, increasing its size + // by at most double the input size each step. + if constexpr (N == TargetSize) + return x; + else if constexpr (TargetSize <= 2 * N) + return extend(x, cpp::make_index_sequence{}); + else + return extend( + extend(x, cpp::make_index_sequence<2 * N>{})); +} + +template +LIBC_INLINE constexpr static cpp::simd +concat(cpp::simd x, cpp::simd y, cpp::index_sequence) { + constexpr size_t Length = (N > M ? N : M); + auto remap = [](size_t idx) -> int { + if (idx < N) + return static_cast(idx); + if (idx < N + M) + return static_cast((idx - N) + Length); + return -1; + }; + + // Extend the input vectors until they are the same size, then use the indices + // to shuffle in only the indices that correspond to the original values. + auto x_ext = extend(x); + auto y_ext = extend(y); + return __builtin_shufflevector(x_ext, y_ext, remap(Indices)...); +} + +template +LIBC_INLINE constexpr static cpp::simd +slice(cpp::simd x, cpp::index_sequence) { + return __builtin_shufflevector(x, x, (Offset + Indices)...); +} + +template +LIBC_INLINE constexpr static auto split(cpp::simd x) { + // Recursively splits the input vector by walking the variadic template list, + // increasing our current head each call. + auto first = cpp::make_tuple( + slice(x, cpp::make_index_sequence{})); + if constexpr (sizeof...(Tail) > 0) + return cpp::tuple_cat(first, split(x)); + else + return first; +} + +} // namespace internal + // Type trait helpers. template struct simd_size : cpp::integral_constant { @@ -276,58 +337,6 @@ LIBC_INLINE constexpr static simd select(simd m, simd x, return m ? x : y; } -namespace internal { -template -LIBC_INLINE constexpr static cpp::simd -extend(cpp::simd x, cpp::index_sequence) { - return __builtin_shufflevector(x, x, (I < O ? static_cast(I) : -1)...); -} -template -LIBC_INLINE constexpr static auto extend(cpp::simd x) { - if constexpr (N == M) - return x; - else if constexpr (M <= 2 * N) - return extend(x, cpp::make_index_sequence{}); - else - return extend( - extend(x, cpp::make_index_sequence<2 * N>{})); -} -template -LIBC_INLINE constexpr static cpp::simd -concat(cpp::simd x, cpp::simd y, cpp::index_sequence) { - constexpr size_t L = (N > M ? N : M); - - auto x_ext = extend(x); - auto y_ext = extend(y); - - auto remap = [](size_t idx) -> int { - if (idx < N) - return static_cast(idx); - if (idx < N + M) - return static_cast((idx - N) + L); - return -1; - }; - - return __builtin_shufflevector(x_ext, y_ext, remap(I)...); -} - -template -LIBC_INLINE constexpr static cpp::simd -slice(cpp::simd x, cpp::index_sequence) { - return __builtin_shufflevector(x, x, (Offset + I)...); -} -template -LIBC_INLINE constexpr static auto split(cpp::simd x) { - auto first = cpp::make_tuple( - slice(x, cpp::make_index_sequence{})); - if constexpr (sizeof...(Tail) > 0) - return cpp::tuple_cat(first, split(x)); - else - return first; -} - -} // namespace internal - // Shuffling helpers. template LIBC_INLINE constexpr static auto concat(cpp::simd x, cpp::simd y) { From 1d3c8fd6e4ddf8d7b1e3044d8fc41a6fd878cc5b Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Sep 2025 12:18:38 -0500 Subject: [PATCH 3/5] Use max --- libc/src/__support/CPP/simd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index d69df2bb92fe5..1c3a3f0ee8d6c 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -89,19 +89,19 @@ LIBC_INLINE constexpr static auto extend(cpp::simd x) { template LIBC_INLINE constexpr static cpp::simd concat(cpp::simd x, cpp::simd y, cpp::index_sequence) { - constexpr size_t Length = (N > M ? N : M); + constexpr size_t Size = cpp::max(N, M); auto remap = [](size_t idx) -> int { if (idx < N) return static_cast(idx); if (idx < N + M) - return static_cast((idx - N) + Length); + return static_cast((idx - N) + Size); return -1; }; // Extend the input vectors until they are the same size, then use the indices // to shuffle in only the indices that correspond to the original values. - auto x_ext = extend(x); - auto y_ext = extend(y); + auto x_ext = extend(x); + auto y_ext = extend(y); return __builtin_shufflevector(x_ext, y_ext, remap(Indices)...); } From 944418d3283ad5f55fb34a4c97544bdfda793233 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Sep 2025 15:36:58 -0500 Subject: [PATCH 4/5] comments --- libc/src/__support/CPP/simd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 1c3a3f0ee8d6c..3c7e65acc3c0a 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -76,7 +76,7 @@ extend(cpp::simd x, cpp::index_sequence) { template LIBC_INLINE constexpr static auto extend(cpp::simd x) { // Recursively resize an input vector to the target size, increasing its size - // by at most double the input size each step. + // by at most double the input size each step due to shufflevector limitation. if constexpr (N == TargetSize) return x; else if constexpr (TargetSize <= 2 * N) @@ -115,12 +115,12 @@ template LIBC_INLINE constexpr static auto split(cpp::simd x) { // Recursively splits the input vector by walking the variadic template list, // increasing our current head each call. - auto first = cpp::make_tuple( + auto result = cpp::make_tuple( slice(x, cpp::make_index_sequence{})); if constexpr (sizeof...(Tail) > 0) - return cpp::tuple_cat(first, split(x)); + return cpp::tuple_cat(result, split(x)); else - return first; + return result; } } // namespace internal From b0d3b7193810950003d6774c1068965cd2acff16 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Wed, 10 Sep 2025 16:15:15 -0500 Subject: [PATCH 5/5] Better test --- libc/test/src/__support/CPP/simd_test.cpp | 30 ++++++++--------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp index 72ec10d12cb5b..b4f5685e3b1d1 100644 --- a/libc/test/src/__support/CPP/simd_test.cpp +++ b/libc/test/src/__support/CPP/simd_test.cpp @@ -70,27 +70,17 @@ TEST(LlvmLibcSIMDTest, MaskOperations) { } TEST(LlvmLibcSIMDTest, SplitConcat) { - cpp::simd v(1); + cpp::simd v{1, 1, 2, 2, 3, 3, 4, 4}; auto [v1, v2, v3, v4] = cpp::split<2, 2, 2, 2>(v); - static_assert(cpp::simd_size_v == 2 && - cpp::simd_size_v == 2 && - cpp::simd_size_v == 2 && - cpp::simd_size_v == 2, - "invalid size"); - - v1 = cpp::simd(1); - v2 = cpp::simd(2); - v3 = cpp::simd(3); - v4 = cpp::simd(4); - cpp::simd m = cpp::concat(v1, v2, v3, v4); - static_assert(cpp::simd_size_v == 8, "invalid size"); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(v1 == 1))); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(v2 == 2))); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(v3 == 3))); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(v4 == 4))); - cpp::simd c = {1, 1, 2, 2, 3, 3, 4, 4}; - for (int i = 0; i < 8; ++i) - EXPECT_EQ(c[i], m[i]); + cpp::simd m = cpp::concat(v1, v2, v3, v4); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(m == v))); - cpp::simd c1('\0'); - cpp::simd c2('\0'); - cpp::simd c3 = cpp::concat(c1, c2); - static_assert(cpp::simd_size_v == 9, "invalid size"); + cpp::simd c(~0); + cpp::simd n = cpp::concat(c, c, c, c, c, c, c, c); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(n == ~0))); }