Skip to content

Commit ef62d91

Browse files
committed
[libc] Update the memory helper functions for simd types
Summary: This unifies the interface to just be a bunch of `load` and `store` functions that optionally accept a mask / indices for gathers and scatters with masks. I had to rename this from `load` and `store` because it conflicts with the other version in `op_generic`. I might just work around that with a trait instead.
1 parent 89d79b6 commit ef62d91

File tree

3 files changed

+115
-18
lines changed

3 files changed

+115
-18
lines changed

libc/src/__support/CPP/simd.h

Lines changed: 50 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -287,34 +287,68 @@ LIBC_INLINE constexpr static T hmax(simd<T, N> v) {
287287
}
288288

289289
// Accessor helpers.
290-
template <typename T, internal::enable_if_simd_t<T> = 0>
291-
LIBC_INLINE T load_unaligned(const void *ptr) {
290+
template <typename T>
291+
LIBC_INLINE T constexpr static load(const void *ptr, bool aligned = false) {
292+
if (aligned)
293+
ptr = __builtin_assume_aligned(ptr, alignof(T));
292294
T tmp;
293-
__builtin_memcpy(&tmp, ptr, sizeof(T));
295+
__builtin_memcpy(&tmp, reinterpret_cast<const T *>(ptr), sizeof(T));
294296
return tmp;
295297
}
296298
template <typename T, internal::enable_if_simd_t<T> = 0>
297-
LIBC_INLINE T load_aligned(const void *ptr) {
298-
return load_unaligned<T>(__builtin_assume_aligned(ptr, alignof(T)));
299+
LIBC_INLINE constexpr static void store(T v, void *ptr, bool aligned = false) {
300+
if (aligned)
301+
ptr = __builtin_assume_aligned(ptr, alignof(T));
302+
__builtin_memcpy(ptr, &v, sizeof(T));
299303
}
300304
template <typename T, internal::enable_if_simd_t<T> = 0>
301-
LIBC_INLINE T store_unaligned(T v, void *ptr) {
302-
__builtin_memcpy(ptr, &v, sizeof(T));
305+
LIBC_INLINE constexpr static T
306+
load_masked(simd<bool, simd_size_v<T>> mask, void *ptr,
307+
T passthru = internal::poison<T>(), bool aligned = false) {
308+
if (aligned)
309+
ptr = __builtin_assume_aligned(ptr, alignof(T));
310+
return __builtin_masked_load(mask, reinterpret_cast<T *>(ptr), passthru);
303311
}
304312
template <typename T, internal::enable_if_simd_t<T> = 0>
305-
LIBC_INLINE T store_aligned(T v, void *ptr) {
306-
store_unaligned<T>(v, __builtin_assume_aligned(ptr, alignof(T)));
313+
LIBC_INLINE constexpr static void store_masked(simd<bool, simd_size_v<T>> mask,
314+
T v, void *ptr,
315+
bool aligned = false) {
316+
if (aligned)
317+
ptr = __builtin_assume_aligned(ptr, alignof(T));
318+
__builtin_masked_store(mask, v, reinterpret_cast<T *>(ptr));
319+
}
320+
template <typename T, typename Idx, internal::enable_if_simd_t<T> = 0>
321+
LIBC_INLINE constexpr static T gather(simd<bool, simd_size_v<T>> mask, Idx idx,
322+
void *base, bool aligned = false) {
323+
if (aligned)
324+
base = __builtin_assume_aligned(base, alignof(T));
325+
return __builtin_masked_gather(
326+
mask, idx, reinterpret_cast<simd_element_type_t<T> *>(base));
327+
}
328+
template <typename T, typename Idx, internal::enable_if_simd_t<T> = 0>
329+
LIBC_INLINE constexpr static void scatter(simd<bool, simd_size_v<T>> mask,
330+
Idx idx, T v, void *base,
331+
bool aligned = false) {
332+
if (aligned)
333+
base = __builtin_assume_aligned(base, alignof(T));
334+
__builtin_masked_scatter(mask, idx, v,
335+
reinterpret_cast<simd_element_type_t<T> *>(base));
307336
}
308337
template <typename T, internal::enable_if_simd_t<T> = 0>
309-
LIBC_INLINE T
310-
masked_load(simd<bool, simd_size_v<T>> m, void *ptr,
311-
T passthru = internal::poison<simd_element_type<T>>()) {
312-
return __builtin_masked_load(m, ptr, passthru);
338+
LIBC_INLINE constexpr static T
339+
expand(simd<bool, simd_size_v<T>> mask, void *ptr,
340+
T passthru = internal::poison<T>(), bool aligned = false) {
341+
if (aligned)
342+
ptr = __builtin_assume_aligned(ptr, alignof(T));
343+
return __builtin_masked_expand_load(mask, reinterpret_cast<T *>(ptr),
344+
passthru);
313345
}
314346
template <typename T, internal::enable_if_simd_t<T> = 0>
315-
LIBC_INLINE T masked_store(simd<bool, simd_size_v<T>> m, T v, void *ptr) {
316-
__builtin_masked_store(
317-
m, v, static_cast<T *>(__builtin_assume_aligned(ptr, alignof(T))));
347+
LIBC_INLINE constexpr static void compress(simd<bool, simd_size_v<T>> mask, T v,
348+
void *ptr, bool aligned = false) {
349+
if (aligned)
350+
ptr = __builtin_assume_aligned(ptr, alignof(T));
351+
__builtin_masked_compress_store(mask, v, reinterpret_cast<T *>(ptr));
318352
}
319353

320354
// Construction helpers.

libc/src/string/memory_utils/generic/inline_strlen.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,15 @@ string_length(const char *src) {
3232
const cpp::simd<char> *aligned = reinterpret_cast<const cpp::simd<char> *>(
3333
__builtin_align_down(src, alignment));
3434

35-
cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(aligned);
35+
cpp::simd<char> chars = cpp::load<cpp::simd<char>>(aligned, /*aligned=*/true);
3636
cpp::simd_mask<char> mask = chars == null_byte;
3737
size_t offset = src - reinterpret_cast<const char *>(aligned);
3838
if (cpp::any_of(shift_mask(mask, offset)))
3939
return cpp::find_first_set(shift_mask(mask, offset));
4040

4141
for (;;) {
42-
cpp::simd<char> chars = cpp::load_aligned<cpp::simd<char>>(++aligned);
42+
cpp::simd<char> chars = cpp::load<cpp::simd<char>>(++aligned,
43+
/*aligned=*/true);
4344
cpp::simd_mask<char> mask = chars == null_byte;
4445
if (cpp::any_of(mask))
4546
return (reinterpret_cast<const char *>(aligned) - src) +

libc/test/src/__support/CPP/simd_test.cpp

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,3 +86,65 @@ TEST(LlvmLibcSIMDTest, SplitConcat) {
8686
cpp::simd<char, 8> n = cpp::concat(c, c, c, c, c, c, c, c);
8787
EXPECT_TRUE(cpp::all_of(n == ~0));
8888
}
89+
90+
TEST(LlvmLibcSIMDTest, LoadStore) {
91+
constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
92+
alignas(alignof(cpp::simd<int>)) int buf[SIZE];
93+
94+
cpp::simd<int> v1 = cpp::splat(1);
95+
cpp::store(v1, buf);
96+
cpp::simd<int> v2 = cpp::load<cpp::simd<int>>(buf);
97+
98+
EXPECT_TRUE(cpp::all_of(v1 == 1));
99+
EXPECT_TRUE(cpp::all_of(v2 == 1));
100+
101+
cpp::simd<int> v3 = cpp::splat(2);
102+
cpp::store(v3, buf, /*aligned=*/true);
103+
cpp::simd<int> v4 = cpp::load<cpp::simd<int>>(buf, /*aligned=*/true);
104+
105+
EXPECT_TRUE(cpp::all_of(v3 == 2));
106+
EXPECT_TRUE(cpp::all_of(v4 == 2));
107+
}
108+
109+
TEST(LlvmLibcSIMDTest, MaskedLoadStore) {
110+
constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
111+
alignas(alignof(cpp::simd<int>)) int buf[SIZE] = {0};
112+
113+
cpp::simd<int> mask = cpp::iota(0) % 2 == 0;
114+
cpp::simd<int> v1 = cpp::splat(1);
115+
116+
cpp::store_masked<cpp::simd<int>>(mask, v1, buf);
117+
cpp::simd<int> v2 = cpp::load_masked<cpp::simd<int>>(mask, buf);
118+
119+
EXPECT_TRUE(cpp::all_of((v2 == 1) == mask));
120+
}
121+
122+
TEST(LlvmLibcSIMDTest, MaskedCompressExpand) {
123+
constexpr size_t SIZE = cpp::simd_size_v<cpp::simd<int>>;
124+
alignas(alignof(cpp::simd<int>)) int buf[SIZE] = {0};
125+
126+
cpp::simd<int> mask_expand = cpp::iota(0) % 2 == 0;
127+
cpp::simd<int> mask_compress = 1;
128+
129+
cpp::simd<int> v1 = cpp::iota(0);
130+
131+
cpp::compress<cpp::simd<int>>(mask_compress, v1, buf);
132+
cpp::simd<int> v2 = cpp::expand<cpp::simd<int>>(mask_expand, buf);
133+
134+
EXPECT_TRUE(cpp::all_of(!mask_expand || v2 <= SIZE / 2));
135+
}
136+
137+
TEST(LlvmLibcSIMDTest, GatherScatter) {
138+
constexpr int SIZE = cpp::simd_size_v<cpp::simd<int>>;
139+
alignas(alignof(cpp::simd<int>)) int buf[SIZE];
140+
141+
cpp::simd<int> mask = cpp::iota(1);
142+
cpp::simd<int> idx = cpp::iota(0);
143+
cpp::simd<int> v1 = cpp::splat(1);
144+
145+
cpp::scatter<cpp::simd<int>>(mask, idx, v1, buf);
146+
cpp::simd<int> v2 = cpp::gather<cpp::simd<int>>(mask, idx, buf);
147+
148+
EXPECT_TRUE(cpp::all_of(v1 == 1));
149+
EXPECT_TRUE(cpp::all_of(v2 == 1));
150+
}

0 commit comments

Comments
 (0)