diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h index d2a5b17fa4b9f..422d2f4c8433d 100644 --- a/libc/src/__support/CPP/simd.h +++ b/libc/src/__support/CPP/simd.h @@ -287,34 +287,72 @@ LIBC_INLINE constexpr static T hmax(simd v) { } // Accessor helpers. -template = 0> -LIBC_INLINE T load_unaligned(const void *ptr) { +template +LIBC_INLINE T constexpr static load(const void *ptr, bool aligned = false) { + if (aligned) + ptr = __builtin_assume_aligned(ptr, alignof(T)); T tmp; - __builtin_memcpy(&tmp, ptr, sizeof(T)); + __builtin_memcpy_inline( + &tmp, reinterpret_cast *>(ptr), sizeof(T)); return tmp; } template = 0> -LIBC_INLINE T load_aligned(const void *ptr) { - return load_unaligned(__builtin_assume_aligned(ptr, alignof(T))); +LIBC_INLINE constexpr static void store(T v, void *ptr, bool aligned = false) { + if (aligned) + ptr = __builtin_assume_aligned(ptr, alignof(T)); + __builtin_memcpy_inline(ptr, &v, sizeof(T)); } template = 0> -LIBC_INLINE T store_unaligned(T v, void *ptr) { - __builtin_memcpy(ptr, &v, sizeof(T)); +LIBC_INLINE constexpr static T +load_masked(simd> mask, const void *ptr, + T passthru = internal::poison(), bool aligned = false) { + if (aligned) + ptr = __builtin_assume_aligned(ptr, alignof(T)); + return __builtin_masked_load( + mask, reinterpret_cast *>(ptr), passthru); } template = 0> -LIBC_INLINE T store_aligned(T v, void *ptr) { - store_unaligned(v, __builtin_assume_aligned(ptr, alignof(T))); +LIBC_INLINE constexpr static void store_masked(simd> mask, + T v, void *ptr, + bool aligned = false) { + if (aligned) + ptr = __builtin_assume_aligned(ptr, alignof(T)); + __builtin_masked_store(mask, v, + reinterpret_cast *>(ptr)); +} +template = 0> +LIBC_INLINE constexpr static T gather(simd> mask, Idx idx, + const void *base, bool aligned = false) { + if (aligned) + base = __builtin_assume_aligned(base, alignof(T)); + return __builtin_masked_gather( + mask, idx, reinterpret_cast *>(base)); +} +template = 0> +LIBC_INLINE constexpr static void scatter(simd> mask, + Idx idx, T v, void *base, + bool aligned = false) { + if (aligned) + base = __builtin_assume_aligned(base, alignof(T)); + __builtin_masked_scatter(mask, idx, v, + reinterpret_cast *>(base)); } template = 0> -LIBC_INLINE T -masked_load(simd> m, void *ptr, - T passthru = internal::poison>()) { - return __builtin_masked_load(m, ptr, passthru); +LIBC_INLINE constexpr static T +expand(simd> mask, const void *ptr, + T passthru = internal::poison(), bool aligned = false) { + if (aligned) + ptr = __builtin_assume_aligned(ptr, alignof(T)); + return __builtin_masked_expand_load( + mask, reinterpret_cast *>(ptr), passthru); } template = 0> -LIBC_INLINE T masked_store(simd> m, T v, void *ptr) { - __builtin_masked_store( - m, v, static_cast(__builtin_assume_aligned(ptr, alignof(T)))); +LIBC_INLINE constexpr static void compress(simd> mask, T v, + void *ptr, bool aligned = false) { + if (aligned) + ptr = __builtin_assume_aligned(ptr, alignof(T)); + __builtin_masked_compress_store( + mask, v, reinterpret_cast *>(ptr)); } // Construction helpers. diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h index 5e553e301d4da..d7435afb03719 100644 --- a/libc/src/string/memory_utils/generic/inline_strlen.h +++ b/libc/src/string/memory_utils/generic/inline_strlen.h @@ -32,14 +32,15 @@ string_length(const char *src) { const cpp::simd *aligned = reinterpret_cast *>( __builtin_align_down(src, alignment)); - cpp::simd chars = cpp::load_aligned>(aligned); + cpp::simd chars = cpp::load>(aligned, /*aligned=*/true); cpp::simd_mask mask = chars == null_byte; size_t offset = src - reinterpret_cast(aligned); if (cpp::any_of(shift_mask(mask, offset))) return cpp::find_first_set(shift_mask(mask, offset)); for (;;) { - cpp::simd chars = cpp::load_aligned>(++aligned); + cpp::simd chars = cpp::load>(++aligned, + /*aligned=*/true); cpp::simd_mask mask = chars == null_byte; if (cpp::any_of(mask)) return (reinterpret_cast(aligned) - src) + diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp index c8f34df8ab028..8bead8461d649 100644 --- a/libc/test/src/__support/CPP/simd_test.cpp +++ b/libc/test/src/__support/CPP/simd_test.cpp @@ -86,3 +86,65 @@ TEST(LlvmLibcSIMDTest, SplitConcat) { cpp::simd n = cpp::concat(c, c, c, c, c, c, c, c); EXPECT_TRUE(cpp::all_of(n == ~0)); } + +TEST(LlvmLibcSIMDTest, LoadStore) { + constexpr size_t SIZE = cpp::simd_size_v>; + alignas(alignof(cpp::simd)) int buf[SIZE]; + + cpp::simd v1 = cpp::splat(1); + cpp::store(v1, buf); + cpp::simd v2 = cpp::load>(buf); + + EXPECT_TRUE(cpp::all_of(v1 == 1)); + EXPECT_TRUE(cpp::all_of(v2 == 1)); + + cpp::simd v3 = cpp::splat(2); + cpp::store(v3, buf, /*aligned=*/true); + cpp::simd v4 = cpp::load>(buf, /*aligned=*/true); + + EXPECT_TRUE(cpp::all_of(v3 == 2)); + EXPECT_TRUE(cpp::all_of(v4 == 2)); +} + +TEST(LlvmLibcSIMDTest, MaskedLoadStore) { + constexpr size_t SIZE = cpp::simd_size_v>; + alignas(alignof(cpp::simd)) int buf[SIZE] = {0}; + + cpp::simd mask = cpp::iota(0) % 2 == 0; + cpp::simd v1 = cpp::splat(1); + + cpp::store_masked>(mask, v1, buf); + cpp::simd v2 = cpp::load_masked>(mask, buf); + + EXPECT_TRUE(cpp::all_of((v2 == 1) == mask)); +} + +TEST(LlvmLibcSIMDTest, GatherScatter) { + constexpr int SIZE = cpp::simd_size_v>; + alignas(alignof(cpp::simd)) int buf[SIZE]; + + cpp::simd mask = cpp::iota(1); + cpp::simd idx = cpp::iota(0); + cpp::simd v1 = cpp::splat(1); + + cpp::scatter>(mask, idx, v1, buf); + cpp::simd v2 = cpp::gather>(mask, idx, buf); + + EXPECT_TRUE(cpp::all_of(v1 == 1)); + EXPECT_TRUE(cpp::all_of(v2 == 1)); +} + +TEST(LlvmLibcSIMDTest, MaskedCompressExpand) { + constexpr size_t SIZE = cpp::simd_size_v>; + alignas(alignof(cpp::simd)) int buf[SIZE] = {0}; + + cpp::simd mask_expand = cpp::iota(0) % 2 == 0; + cpp::simd mask_compress = 1; + + cpp::simd v1 = cpp::iota(0); + + cpp::compress>(mask_compress, v1, buf); + cpp::simd v2 = cpp::expand>(mask_expand, buf); + + EXPECT_TRUE(cpp::all_of(!mask_expand || v2 <= SIZE / 2)); +}