diff --git a/libc/src/__support/CPP/CMakeLists.txt b/libc/src/__support/CPP/CMakeLists.txt index d9b86b4fd2973..a9cb67df0b427 100644 --- a/libc/src/__support/CPP/CMakeLists.txt +++ b/libc/src/__support/CPP/CMakeLists.txt @@ -224,4 +224,7 @@ add_header_library( simd HDRS simd.h + DEPENDS + .utility + .tuple ) diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index 54fe70a6e9830..3c7e65acc3c0a 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -16,7 +16,9 @@ #include "hdr/stdint_proxy.h" #include "src/__support/CPP/algorithm.h" #include "src/__support/CPP/limits.h" +#include "src/__support/CPP/tuple.h" #include "src/__support/CPP/type_traits.h" +#include "src/__support/CPP/utility/integer_sequence.h" #include "src/__support/macros/attributes.h" #include "src/__support/macros/config.h" @@ -32,9 +34,6 @@ namespace cpp { namespace internal { -template -using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); - #if defined(LIBC_TARGET_CPU_HAS_AVX512F) template LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T); @@ -48,9 +47,6 @@ LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T); template LIBC_INLINE constexpr size_t native_vector_size = 1; #endif -template LIBC_INLINE constexpr T poison() { - return __builtin_nondeterministic_value(T()); -} } // namespace internal // Type aliases. @@ -61,6 +57,74 @@ using simd = T [[clang::ext_vector_type(N)]]; template using simd_mask = simd>; +namespace internal { + +template +using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT); + +template LIBC_INLINE constexpr T poison() { + return __builtin_nondeterministic_value(T()); +} + +template +LIBC_INLINE constexpr static cpp::simd +extend(cpp::simd x, cpp::index_sequence) { + return __builtin_shufflevector( + x, x, (Indices < OriginalSize ? static_cast(Indices) : -1)...); +} + +template +LIBC_INLINE constexpr static auto extend(cpp::simd x) { + // Recursively resize an input vector to the target size, increasing its size + // by at most double the input size each step due to shufflevector limitation. + if constexpr (N == TargetSize) + return x; + else if constexpr (TargetSize <= 2 * N) + return extend(x, cpp::make_index_sequence{}); + else + return extend( + extend(x, cpp::make_index_sequence<2 * N>{})); +} + +template +LIBC_INLINE constexpr static cpp::simd +concat(cpp::simd x, cpp::simd y, cpp::index_sequence) { + constexpr size_t Size = cpp::max(N, M); + auto remap = [](size_t idx) -> int { + if (idx < N) + return static_cast(idx); + if (idx < N + M) + return static_cast((idx - N) + Size); + return -1; + }; + + // Extend the input vectors until they are the same size, then use the indices + // to shuffle in only the indices that correspond to the original values. + auto x_ext = extend(x); + auto y_ext = extend(y); + return __builtin_shufflevector(x_ext, y_ext, remap(Indices)...); +} + +template +LIBC_INLINE constexpr static cpp::simd +slice(cpp::simd x, cpp::index_sequence) { + return __builtin_shufflevector(x, x, (Offset + Indices)...); +} + +template +LIBC_INLINE constexpr static auto split(cpp::simd x) { + // Recursively splits the input vector by walking the variadic template list, + // increasing our current head each call. + auto result = cpp::make_tuple( + slice(x, cpp::make_index_sequence{})); + if constexpr (sizeof...(Tail) > 0) + return cpp::tuple_cat(result, split(x)); + else + return result; +} + +} // namespace internal + // Type trait helpers. template struct simd_size : cpp::integral_constant { @@ -273,6 +337,25 @@ LIBC_INLINE constexpr static simd select(simd m, simd x, return m ? x : y; } +// Shuffling helpers. +template +LIBC_INLINE constexpr static auto concat(cpp::simd x, cpp::simd y) { + return internal::concat(x, y, make_index_sequence{}); +} +template +LIBC_INLINE constexpr static auto concat(cpp::simd x, cpp::simd y, + Rest... rest) { + auto xy = concat(x, y); + if constexpr (sizeof...(Rest)) + return concat(xy, rest...); + else + return xy; +} +template auto split(cpp::simd x) { + static_assert((... + Sizes) == N, "split sizes must sum to vector size"); + return internal::split(x); +} + // TODO: where expressions, scalar overloads, ABI types. } // namespace cpp diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp index 600bf65057b21..b4f5685e3b1d1 100644 --- a/libc/test/src/__support/CPP/simd_test.cpp +++ b/libc/test/src/__support/CPP/simd_test.cpp @@ -68,3 +68,19 @@ TEST(LlvmLibcSIMDTest, MaskOperations) { EXPECT_EQ(cpp::find_first_set(mask), 0); EXPECT_EQ(cpp::find_last_set(mask), 2); } + +TEST(LlvmLibcSIMDTest, SplitConcat) { + cpp::simd v{1, 1, 2, 2, 3, 3, 4, 4}; + auto [v1, v2, v3, v4] = cpp::split<2, 2, 2, 2>(v); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(v1 == 1))); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(v2 == 2))); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(v3 == 3))); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(v4 == 4))); + + cpp::simd m = cpp::concat(v1, v2, v3, v4); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(m == v))); + + cpp::simd c(~0); + cpp::simd n = cpp::concat(c, c, c, c, c, c, c, c); + EXPECT_TRUE(cpp::all_of(cpp::simd_cast(n == ~0))); +}