Skip to content

Commit

Permalink
[libc][NFC] Use new approach based on types to code memmove
Browse files Browse the repository at this point in the history
  • Loading branch information
gchatelet committed Apr 11, 2023
1 parent d827865 commit 355a5d5
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 83 deletions.
83 changes: 32 additions & 51 deletions libc/src/string/memory_utils/memmove_implementations.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,73 +34,54 @@ inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) {
}
}

template <size_t MaxSize>
[[maybe_unused]] LIBC_INLINE void inline_memmove_generic(Ptr dst, CPtr src,
size_t count) {
if (count == 0)
return;
if (count == 1)
return generic::Memmove<1, MaxSize>::block(dst, src);
if (count <= 4)
return generic::Memmove<2, MaxSize>::head_tail(dst, src, count);
if (count <= 8)
return generic::Memmove<4, MaxSize>::head_tail(dst, src, count);
if (count <= 16)
return generic::Memmove<8, MaxSize>::head_tail(dst, src, count);
if (count <= 32)
return generic::Memmove<16, MaxSize>::head_tail(dst, src, count);
if (count <= 64)
return generic::Memmove<32, MaxSize>::head_tail(dst, src, count);
if (count <= 128)
return generic::Memmove<64, MaxSize>::head_tail(dst, src, count);
if (dst < src) {
generic::Memmove<32, MaxSize>::template align_forward<Arg::Src>(dst, src,
count);
return generic::Memmove<64, MaxSize>::loop_and_tail_forward(dst, src,
count);
} else {
generic::Memmove<32, MaxSize>::template align_backward<Arg::Src>(dst, src,
count);
return generic::Memmove<64, MaxSize>::loop_and_tail_backward(dst, src,
count);
}
}

LIBC_INLINE void inline_memmove(Ptr dst, CPtr src, size_t count) {
#if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64)
#if defined(LIBC_TARGET_ARCH_IS_X86)
static constexpr size_t kMaxSize = x86::kAvx512F ? 64
: x86::kAvx ? 32
: x86::kSse2 ? 16
: 8;
#if defined(__AVX512F__)
using uint128_t = uint8x16_t;
using uint256_t = uint8x32_t;
using uint512_t = uint8x64_t;
#elif defined(__AVX__)
using uint128_t = uint8x16_t;
using uint256_t = uint8x32_t;
using uint512_t = cpp::array<uint8x32_t, 2>;
#elif defined(__SSE2__)
using uint128_t = uint8x16_t;
using uint256_t = cpp::array<uint8x16_t, 2>;
using uint512_t = cpp::array<uint8x16_t, 4>;
#else
using uint128_t = cpp::array<uint64_t, 2>;
using uint256_t = cpp::array<uint64_t, 4>;
using uint512_t = cpp::array<uint64_t, 8>;
#endif
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
static constexpr size_t kMaxSize = aarch64::kNeon ? 16 : 8;
static_assert(aarch64::kNeon, "aarch64 supports vector types");
using uint128_t = uint8x16_t;
using uint256_t = uint8x32_t;
using uint512_t = uint8x64_t;
#endif
// return inline_memmove_generic<kMaxSize>(dst, src, count);
if (count == 0)
return;
if (count == 1)
return generic::Memmove<1, kMaxSize>::block(dst, src);
return generic::Memmove<uint8_t>::block(dst, src);
if (count <= 4)
return generic::Memmove<2, kMaxSize>::head_tail(dst, src, count);
return generic::Memmove<uint16_t>::head_tail(dst, src, count);
if (count <= 8)
return generic::Memmove<4, kMaxSize>::head_tail(dst, src, count);
return generic::Memmove<uint32_t>::head_tail(dst, src, count);
if (count <= 16)
return generic::Memmove<8, kMaxSize>::head_tail(dst, src, count);
return generic::Memmove<uint64_t>::head_tail(dst, src, count);
if (count <= 32)
return generic::Memmove<16, kMaxSize>::head_tail(dst, src, count);
return generic::Memmove<uint128_t>::head_tail(dst, src, count);
if (count <= 64)
return generic::Memmove<32, kMaxSize>::head_tail(dst, src, count);
return generic::Memmove<uint256_t>::head_tail(dst, src, count);
if (count <= 128)
return generic::Memmove<64, kMaxSize>::head_tail(dst, src, count);
return generic::Memmove<uint512_t>::head_tail(dst, src, count);
if (dst < src) {
generic::Memmove<32, kMaxSize>::align_forward<Arg::Src>(dst, src, count);
return generic::Memmove<64, kMaxSize>::loop_and_tail_forward(dst, src,
count);
generic::Memmove<uint256_t>::align_forward<Arg::Src>(dst, src, count);
return generic::Memmove<uint512_t>::loop_and_tail_forward(dst, src, count);
} else {
generic::Memmove<32, kMaxSize>::align_backward<Arg::Src>(dst, src, count);
return generic::Memmove<64, kMaxSize>::loop_and_tail_backward(dst, src,
count);
generic::Memmove<uint256_t>::align_backward<Arg::Src>(dst, src, count);
return generic::Memmove<uint512_t>::loop_and_tail_backward(dst, src, count);
}
#else
return inline_memmove_embedded_tiny(dst, src, count);
Expand Down
54 changes: 22 additions & 32 deletions libc/src/string/memory_utils/op_generic.h
Original file line number Diff line number Diff line change
Expand Up @@ -254,32 +254,22 @@ template <typename T, typename... TS> struct Memset {
// Memmove
///////////////////////////////////////////////////////////////////////////////

template <size_t Size, size_t MaxSize> struct Memmove {
static_assert(is_power2(MaxSize));
using T = details::getTypeFor<Size, MaxSize>;
static constexpr size_t SIZE = Size;
template <typename T> struct Memmove {
static constexpr size_t SIZE = sum_sizeof<T>();

LIBC_INLINE static void block(Ptr dst, CPtr src) {
if constexpr (details::is_void_v<T>) {
deferred_static_assert("Unimplemented Size");
} else {
store<T>(dst, load<T>(src));
}
store<T>(dst, load<T>(src));
}

LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) {
const size_t offset = count - Size;
if constexpr (details::is_void_v<T>) {
deferred_static_assert("Unimplemented Size");
} else {
// The load and store operations can be performed in any order as long as
// they are not interleaved. More investigations are needed to determine
// the best order.
const auto head = load<T>(src);
const auto tail = load<T>(src + offset);
store<T>(dst, head);
store<T>(dst + offset, tail);
}
const size_t offset = count - SIZE;
// The load and store operations can be performed in any order as long as
// they are not interleaved. More investigations are needed to determine
// the best order.
const auto head = load<T>(src);
const auto tail = load<T>(src + offset);
store<T>(dst, head);
store<T>(dst + offset, tail);
}

// Align forward suitable when dst < src. The alignment is performed with
Expand All @@ -305,8 +295,8 @@ template <size_t Size, size_t MaxSize> struct Memmove {
Ptr prev_dst = dst;
CPtr prev_src = src;
size_t prev_count = count;
align_to_next_boundary<Size, AlignOn>(dst, src, count);
adjust(Size, dst, src, count);
align_to_next_boundary<SIZE, AlignOn>(dst, src, count);
adjust(SIZE, dst, src, count);
head_tail(prev_dst, prev_src, prev_count - count);
}

Expand All @@ -333,9 +323,9 @@ template <size_t Size, size_t MaxSize> struct Memmove {
Ptr headtail_dst = dst + count;
CPtr headtail_src = src + count;
size_t headtail_size = 0;
align_to_next_boundary<Size, AlignOn>(headtail_dst, headtail_src,
align_to_next_boundary<SIZE, AlignOn>(headtail_dst, headtail_src,
headtail_size);
adjust(-2 * Size, headtail_dst, headtail_src, headtail_size);
adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size);
head_tail(headtail_dst, headtail_src, headtail_size);
count -= headtail_size;
}
Expand All @@ -356,15 +346,15 @@ template <size_t Size, size_t MaxSize> struct Memmove {
// [_______________________SSSSSSSS_____]
LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src,
size_t count) {
static_assert(Size > 1, "a loop of size 1 does not need tail");
const size_t tail_offset = count - Size;
static_assert(SIZE > 1, "a loop of size 1 does not need tail");
const size_t tail_offset = count - SIZE;
const auto tail_value = load<T>(src + tail_offset);
size_t offset = 0;
LIBC_LOOP_NOUNROLL
do {
block(dst + offset, src + offset);
offset += Size;
} while (offset < count - Size);
offset += SIZE;
} while (offset < count - SIZE);
store<T>(dst + tail_offset, tail_value);
}

Expand All @@ -384,13 +374,13 @@ template <size_t Size, size_t MaxSize> struct Memmove {
// [_____SSSSSSSS_______________________]
LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src,
size_t count) {
static_assert(Size > 1, "a loop of size 1 does not need tail");
static_assert(SIZE > 1, "a loop of size 1 does not need tail");
const auto head_value = load<T>(src);
ptrdiff_t offset = count - Size;
ptrdiff_t offset = count - SIZE;
LIBC_LOOP_NOUNROLL
do {
block(dst + offset, src + offset);
offset -= Size;
offset -= SIZE;
} while (offset >= 0);
store<T>(dst, head_value);
}
Expand Down

0 comments on commit 355a5d5

Please sign in to comment.