Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions libc/src/__support/CPP/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -224,4 +224,7 @@ add_header_library(
simd
HDRS
simd.h
DEPENDS
.utility
.tuple
)
95 changes: 89 additions & 6 deletions libc/src/__support/CPP/simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@
#include "hdr/stdint_proxy.h"
#include "src/__support/CPP/algorithm.h"
#include "src/__support/CPP/limits.h"
#include "src/__support/CPP/tuple.h"
#include "src/__support/CPP/type_traits.h"
#include "src/__support/CPP/utility/integer_sequence.h"
#include "src/__support/macros/attributes.h"
#include "src/__support/macros/config.h"

Expand All @@ -32,9 +34,6 @@ namespace cpp {

namespace internal {

template <typename T>
using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT);

#if defined(LIBC_TARGET_CPU_HAS_AVX512F)
template <typename T>
LIBC_INLINE_VAR constexpr size_t native_vector_size = 64 / sizeof(T);
Expand All @@ -48,9 +47,6 @@ LIBC_INLINE_VAR constexpr size_t native_vector_size = 16 / sizeof(T);
template <typename T> LIBC_INLINE constexpr size_t native_vector_size = 1;
#endif

template <typename T> LIBC_INLINE constexpr T poison() {
return __builtin_nondeterministic_value(T());
}
} // namespace internal

// Type aliases.
Expand All @@ -61,6 +57,74 @@ using simd = T [[clang::ext_vector_type(N)]];
template <typename T>
using simd_mask = simd<bool, internal::native_vector_size<T>>;

namespace internal {

template <typename T>
using get_as_integer_type_t = unsigned _BitInt(sizeof(T) * CHAR_BIT);

template <typename T> LIBC_INLINE constexpr T poison() {
return __builtin_nondeterministic_value(T());
}

template <typename T, size_t N, size_t OriginalSize, size_t... Indices>
LIBC_INLINE constexpr static cpp::simd<T, sizeof...(Indices)>
extend(cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
return __builtin_shufflevector(
x, x, (Indices < OriginalSize ? static_cast<int>(Indices) : -1)...);
}

template <typename T, size_t N, size_t TargetSize, size_t OriginalSize>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the value of OriginalSize is never used

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is just below,

    return extend<T, 2 * N, TargetSize, OriginalSize>(
        extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{}));

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's used to recurse there, but I don't see how the value is used since that's just another call to the same function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function right above this is called, I guess I could call it 'do_extend' if you think that'd make it clearer, but it's just basic recurse until you get to the version of the function that doesn't expand.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nevermind, renaming the function breaks everything because I remembered I wrote it this way on purpose. It'll just need to be confusing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All good, in that case I'd say add a comment explaining that it's confusing but that's how it has to be and call it good.

LIBC_INLINE constexpr static auto extend(cpp::simd<T, N> x) {
// Recursively resize an input vector to the target size, increasing its size
// by at most double the input size each step due to shufflevector limitation.
if constexpr (N == TargetSize)
return x;
else if constexpr (TargetSize <= 2 * N)
return extend<T, N, TargetSize>(x, cpp::make_index_sequence<TargetSize>{});
else
return extend<T, 2 * N, TargetSize, OriginalSize>(
extend<T, N, 2 * N>(x, cpp::make_index_sequence<2 * N>{}));
}

template <typename T, size_t N, size_t M, size_t... Indices>
LIBC_INLINE constexpr static cpp::simd<T, N + M>
concat(cpp::simd<T, N> x, cpp::simd<T, M> y, cpp::index_sequence<Indices...>) {
constexpr size_t Size = cpp::max(N, M);
auto remap = [](size_t idx) -> int {
if (idx < N)
return static_cast<int>(idx);
if (idx < N + M)
return static_cast<int>((idx - N) + Size);
return -1;
};

// Extend the input vectors until they are the same size, then use the indices
// to shuffle in only the indices that correspond to the original values.
auto x_ext = extend<T, N, Size, N>(x);
auto y_ext = extend<T, M, Size, M>(y);
return __builtin_shufflevector(x_ext, y_ext, remap(Indices)...);
}

template <typename T, size_t N, size_t Count, size_t Offset, size_t... Indices>
LIBC_INLINE constexpr static cpp::simd<T, Count>
slice(cpp::simd<T, N> x, cpp::index_sequence<Indices...>) {
return __builtin_shufflevector(x, x, (Offset + Indices)...);
}

template <typename T, size_t N, size_t Offset, size_t Head, size_t... Tail>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a comment explaining that first is a tuple of one simd vector instead of a tuple containing the values in that simd vector? I misunderstood this the first time I read it.

LIBC_INLINE constexpr static auto split(cpp::simd<T, N> x) {
// Recursively splits the input vector by walking the variadic template list,
// increasing our current head each call.
auto result = cpp::make_tuple(
slice<T, N, Head, Offset>(x, cpp::make_index_sequence<Head>{}));
if constexpr (sizeof...(Tail) > 0)
return cpp::tuple_cat(result, split<T, N, Offset + Head, Tail...>(x));
else
return result;
}

} // namespace internal

// Type trait helpers.
template <typename T>
struct simd_size : cpp::integral_constant<size_t, __builtin_vectorelements(T)> {
Expand Down Expand Up @@ -273,6 +337,25 @@ LIBC_INLINE constexpr static simd<T, N> select(simd<bool, N> m, simd<T, N> x,
return m ? x : y;
}

// Shuffling helpers.
template <typename T, size_t N, size_t M>
LIBC_INLINE constexpr static auto concat(cpp::simd<T, N> x, cpp::simd<T, M> y) {
return internal::concat(x, y, make_index_sequence<N + M>{});
}
template <typename T, size_t N, size_t M, typename... Rest>
LIBC_INLINE constexpr static auto concat(cpp::simd<T, N> x, cpp::simd<T, M> y,
Rest... rest) {
auto xy = concat(x, y);
if constexpr (sizeof...(Rest))
return concat(xy, rest...);
else
return xy;
}
template <size_t... Sizes, typename T, size_t N> auto split(cpp::simd<T, N> x) {
static_assert((... + Sizes) == N, "split sizes must sum to vector size");
return internal::split<T, N, 0, Sizes...>(x);
}

// TODO: where expressions, scalar overloads, ABI types.

} // namespace cpp
Expand Down
16 changes: 16 additions & 0 deletions libc/test/src/__support/CPP/simd_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,19 @@ TEST(LlvmLibcSIMDTest, MaskOperations) {
EXPECT_EQ(cpp::find_first_set(mask), 0);
EXPECT_EQ(cpp::find_last_set(mask), 2);
}

TEST(LlvmLibcSIMDTest, SplitConcat) {
cpp::simd<char, 8> v{1, 1, 2, 2, 3, 3, 4, 4};
auto [v1, v2, v3, v4] = cpp::split<2, 2, 2, 2>(v);
EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(v1 == 1)));
EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(v2 == 2)));
EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(v3 == 3)));
EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(v4 == 4)));

cpp::simd<char, 8> m = cpp::concat(v1, v2, v3, v4);
EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(m == v)));

cpp::simd<char, 1> c(~0);
cpp::simd<char, 8> n = cpp::concat(c, c, c, c, c, c, c, c);
EXPECT_TRUE(cpp::all_of(cpp::simd_cast<bool>(n == ~0)));
}
Loading