-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[libc] Update the memory helper functions for simd types #160174
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-libc Author: Joseph Huber (jhuber6) ChangesSummary: I had to rename this from Full diff: https://github.com/llvm/llvm-project/pull/160174.diff 3 Files Affected:
diff --git a/libc/src/__support/CPP/simd.h b/libc/src/__support/CPP/simd.h
index d2a5b17fa4b9f..5e25084cc35f1 100644
--- a/libc/src/__support/CPP/simd.h
+++ b/libc/src/__support/CPP/simd.h
@@ -287,34 +287,70 @@ LIBC_INLINE constexpr static T hmax(simd<T, N> v) {
}
// Accessor helpers.
-template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T load_unaligned(const void *ptr) {
+template <typename T, bool Aligned = false, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE T constexpr static simd_load(const void *ptr) {
+ if constexpr (Aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
T tmp;
- __builtin_memcpy(&tmp, ptr, sizeof(T));
+ __builtin_memcpy(&tmp, reinterpret_cast<const T *>(ptr), sizeof(T));
return tmp;
}
-template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T load_aligned(const void *ptr) {
- return load_unaligned<T>(__builtin_assume_aligned(ptr, alignof(T)));
-}
-template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T store_unaligned(T v, void *ptr) {
+template <typename T, bool Aligned = false, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static void simd_store(T v, void *ptr) {
+ if constexpr (Aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
__builtin_memcpy(ptr, &v, sizeof(T));
}
-template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T store_aligned(T v, void *ptr) {
- store_unaligned<T>(v, __builtin_assume_aligned(ptr, alignof(T)));
-}
-template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T
-masked_load(simd<bool, simd_size_v<T>> m, void *ptr,
- T passthru = internal::poison<simd_element_type<T>>()) {
- return __builtin_masked_load(m, ptr, passthru);
-}
-template <typename T, internal::enable_if_simd_t<T> = 0>
-LIBC_INLINE T masked_store(simd<bool, simd_size_v<T>> m, T v, void *ptr) {
- __builtin_masked_store(
- m, v, static_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))));
+template <typename T, bool Aligned = false, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static T
+simd_load(simd<bool, simd_size_v<T>> mask, const void *ptr,
+ const T passthru = internal::poison<T>()) {
+ if constexpr (Aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ return __builtin_masked_load(mask, reinterpret_cast<const T *>(ptr),
+ passthru);
+}
+template <typename T, bool Aligned = false, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static void simd_store(simd<bool, simd_size_v<T>> mask,
+ T v, void *ptr) {
+ if constexpr (Aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ __builtin_masked_store(mask, v, reinterpret_cast<T *>(ptr));
+}
+template <typename T, bool Aligned = false, typename Idx,
+ internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static T simd_load(simd<bool, simd_size_v<T>> mask,
+ Idx idx, void *base) {
+ if constexpr (Aligned)
+ base = __builtin_assume_aligned(base, alignof(T));
+ return __builtin_masked_gather(
+ mask, idx, reinterpret_cast<simd_element_type_t<T> *>(base));
+}
+template <typename T, bool Aligned = false, typename Idx,
+ internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static void simd_store(simd<bool, simd_size_v<T>> mask,
+ Idx idx, T v, void *base) {
+ if constexpr (Aligned)
+ base = __builtin_assume_aligned(base, alignof(T));
+ __builtin_masked_scatter(mask, idx, v,
+ reinterpret_cast<simd_element_type_t<T> *>(base));
+}
+
+template <typename T, bool Aligned = false, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static T
+simd_load_expand(simd<bool, simd_size_v<T>> mask, const void *ptr,
+ const T passthru = internal::poison<T>()) {
+ if constexpr (Aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ return __builtin_masked_expand_load(mask, reinterpret_cast<const T *>(ptr),
+ passthru);
+}
+template <typename T, bool Aligned = false, internal::enable_if_simd_t<T> = 0>
+LIBC_INLINE constexpr static void
+simd_store_compress(simd<bool, simd_size_v<T>> mask, T v, void *ptr) {
+ if constexpr (Aligned)
+ ptr = __builtin_assume_aligned(ptr, alignof(T));
+ __builtin_masked_compress_store(mask, v, reinterpret_cast<T *>(ptr));
}
// Construction helpers.
diff --git a/libc/src/string/memory_utils/generic/inline_strlen.h b/libc/src/string/memory_utils/generic/inline_strlen.h
index 5e553e301d4da..688462d744085 100644
--- a/libc/src/string/memory_utils/generic/inline_strlen.h
+++ b/libc/src/string/memory_utils/generic/inline_strlen.h
@@ -32,14 +32,14 @@ string_length(const char *src) {
const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>(
__builtin_align_down(src, alignment));
- cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned);
+ cpp::simd<char> chars = cpp::simd_load<cpp::simd<char>, true>(aligned);
cpp::simd_mask<char> mask = chars == null_byte;
size_t offset = src - reinterpret_cast<const char *>(aligned);
if (cpp::any_of(shift_mask(mask, offset)))
return cpp::find_first_set(shift_mask(mask, offset));
for (;;) {
- cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned);
+ cpp::simd<char> chars = cpp::simd_load<cpp::simd<char>, true>(++aligned);
cpp::simd_mask<char> mask = chars == null_byte;
if (cpp::any_of(mask))
return (reinterpret_cast<const char *>(aligned) - src) +
diff --git a/libc/test/src/__support/CPP/simd_test.cpp b/libc/test/src/__support/CPP/simd_test.cpp
index c8f34df8ab028..ba9940d4d55df 100644
--- a/libc/test/src/__support/CPP/simd_test.cpp
+++ b/libc/test/src/__support/CPP/simd_test.cpp
@@ -86,3 +86,63 @@ TEST(LlvmLibcSIMDTest, SplitConcat) {
cpp::simd<char, 8> n = cpp::concat(c, c, c, c, c, c, c, c);
EXPECT_TRUE(cpp::all_of(n == ~0));
}
+
+TEST(LlvmLibcSIMDTest, LoadStore) {
+ constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+ alignas(alignof(cpp::simd<int>)) int buf[SIZE];
+
+ cpp::simd<int> v1 = cpp::splat(1);
+ cpp::simd_store<cpp::simd<int>>(v1, buf);
+ cpp::simd<int> v2 = cpp::simd_load<cpp::simd<int>>(buf);
+
+ EXPECT_TRUE(cpp::all_of(v1 == 1));
+ EXPECT_TRUE(cpp::all_of(v2 == 1));
+
+ cpp::simd<int> v3 = cpp::splat(2);
+ cpp::simd_store<cpp::simd<int>, true>(v3, buf);
+ cpp::simd<int> v4 = cpp::simd_load<cpp::simd<int>, true>(buf);
+
+ EXPECT_TRUE(cpp::all_of(v3 == 2));
+ EXPECT_TRUE(cpp::all_of(v4 == 2));
+}
+
+TEST(LlvmLibcSIMDTest, MaskedLoadStore) {
+ constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+ alignas(alignof(cpp::simd<int>)) int buf[SIZE] = {0};
+
+ cpp::simd<int> m = cpp::iota(0) % 2 == 0;
+
+ cpp::simd<int> v1 = cpp::splat(1);
+ cpp::simd_store<cpp::simd<int>>(m, v1, buf);
+ cpp::simd<int> v2 = cpp::simd_load<cpp::simd<int>>(m, buf);
+
+ EXPECT_TRUE(cpp::all_of((v2 == 1) == m));
+}
+
+TEST(LlvmLibcSIMDTest, MaskedCompressExpand) {
+ constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
+ alignas(alignof(cpp::simd<int>)) int buf[SIZE] = {0};
+
+ cpp::simd<int> m1 = cpp::iota(0) % 2 == 0;
+ cpp::simd<int> m2 = 1;
+
+ cpp::simd<int> v1 = cpp::iota(1);
+ cpp::simd_store_compress<cpp::simd<int>>(m2, v1, buf);
+ cpp::simd<int> v2 = cpp::simd_load_expand<cpp::simd<int>>(m1, buf);
+
+ EXPECT_TRUE(cpp::all_of(!m1 || v2 <= SIZE / 2));
+}
+
+TEST(LlvmLibcSIMDTest, GatherScatter) {
+ constexpr int SIZE = cpp::simd_size_v<cpp::simd<int>>;
+ alignas(alignof(cpp::simd<int>)) int buf[SIZE];
+
+ cpp::simd<int> m = cpp::iota(1);
+ cpp::simd<int> idx = cpp::iota(0);
+ cpp::simd<int> v1 = cpp::splat(1);
+ cpp::simd_store<cpp::simd<int>>(m, idx, v1, buf);
+ cpp::simd<int> v2 = cpp::simd_load<cpp::simd<int>>(m, idx, buf);
+
+ EXPECT_TRUE(cpp::all_of(v1 == 1));
+ EXPECT_TRUE(cpp::all_of(v2 == 1));
+}
|
b7bfb2e
to
f728642
Compare
Bots haven't picked up the new builtins yet, will re-run later. |
f728642
to
ef62d91
Compare
libc/src/__support/CPP/simd.h
Outdated
T tmp; | ||
__builtin_memcpy(&tmp, ptr, sizeof(T)); | ||
__builtin_memcpy(&tmp, reinterpret_cast<const T *>(ptr), sizeof(T)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since this clang only, use __builtin_memcpy_inline
.
Sematically, __builtin_memcpy
is a libcall wrapper while __builtin_memcpy_inline
is the intrinsic.
Although in fixed size cases, they do behave similarly.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
True, that's best since it's a constant size and we can avoid any potential memory calls.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with a minor comment.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
a80910b
to
0475808
Compare
Summary: This unifies the interface to just be a bunch of `load` and `store` functions that optionally accept a mask / indices for gathers and scatters with masks. I had to rename this from `load` and `store` because it conflicts with the other version in `op_generic`. I might just work around that with a trait instead. inline
0475808
to
442156b
Compare
Summary: This unifies the interface to just be a bunch of `load` and `store` functions that optionally accept a mask / indices for gathers and scatters with masks. I had to rename this from `load` and `store` because it conflicts with the other version in `op_generic`. I might just work around that with a trait instead.
Summary:
This unifies the interface to just be a bunch of
load
andstore
functions that optionally accept a mask / indices for gathers and
scatters with masks.
I had to rename this from
load
andstore
because it conflicts withthe other version in
op_generic
. I might just work around that with atrait instead.