diff --git a/libc/src/string/memory_utils/memmove_implementations.h b/libc/src/string/memory_utils/memmove_implementations.h index 1eb6d4e7e235a..8a203174caefa 100644 --- a/libc/src/string/memory_utils/memmove_implementations.h +++ b/libc/src/string/memory_utils/memmove_implementations.h @@ -34,73 +34,54 @@ inline_memmove_embedded_tiny(Ptr dst, CPtr src, size_t count) { } } -template -[[maybe_unused]] LIBC_INLINE void inline_memmove_generic(Ptr dst, CPtr src, - size_t count) { - if (count == 0) - return; - if (count == 1) - return generic::Memmove<1, MaxSize>::block(dst, src); - if (count <= 4) - return generic::Memmove<2, MaxSize>::head_tail(dst, src, count); - if (count <= 8) - return generic::Memmove<4, MaxSize>::head_tail(dst, src, count); - if (count <= 16) - return generic::Memmove<8, MaxSize>::head_tail(dst, src, count); - if (count <= 32) - return generic::Memmove<16, MaxSize>::head_tail(dst, src, count); - if (count <= 64) - return generic::Memmove<32, MaxSize>::head_tail(dst, src, count); - if (count <= 128) - return generic::Memmove<64, MaxSize>::head_tail(dst, src, count); - if (dst < src) { - generic::Memmove<32, MaxSize>::template align_forward(dst, src, - count); - return generic::Memmove<64, MaxSize>::loop_and_tail_forward(dst, src, - count); - } else { - generic::Memmove<32, MaxSize>::template align_backward(dst, src, - count); - return generic::Memmove<64, MaxSize>::loop_and_tail_backward(dst, src, - count); - } -} - LIBC_INLINE void inline_memmove(Ptr dst, CPtr src, size_t count) { #if defined(LIBC_TARGET_ARCH_IS_X86) || defined(LIBC_TARGET_ARCH_IS_AARCH64) #if defined(LIBC_TARGET_ARCH_IS_X86) - static constexpr size_t kMaxSize = x86::kAvx512F ? 64 - : x86::kAvx ? 32 - : x86::kSse2 ? 16 - : 8; +#if defined(__AVX512F__) + using uint128_t = uint8x16_t; + using uint256_t = uint8x32_t; + using uint512_t = uint8x64_t; +#elif defined(__AVX__) + using uint128_t = uint8x16_t; + using uint256_t = uint8x32_t; + using uint512_t = cpp::array; +#elif defined(__SSE2__) + using uint128_t = uint8x16_t; + using uint256_t = cpp::array; + using uint512_t = cpp::array; +#else + using uint128_t = cpp::array; + using uint256_t = cpp::array; + using uint512_t = cpp::array; +#endif #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) - static constexpr size_t kMaxSize = aarch64::kNeon ? 16 : 8; + static_assert(aarch64::kNeon, "aarch64 supports vector types"); + using uint128_t = uint8x16_t; + using uint256_t = uint8x32_t; + using uint512_t = uint8x64_t; #endif - // return inline_memmove_generic(dst, src, count); if (count == 0) return; if (count == 1) - return generic::Memmove<1, kMaxSize>::block(dst, src); + return generic::Memmove::block(dst, src); if (count <= 4) - return generic::Memmove<2, kMaxSize>::head_tail(dst, src, count); + return generic::Memmove::head_tail(dst, src, count); if (count <= 8) - return generic::Memmove<4, kMaxSize>::head_tail(dst, src, count); + return generic::Memmove::head_tail(dst, src, count); if (count <= 16) - return generic::Memmove<8, kMaxSize>::head_tail(dst, src, count); + return generic::Memmove::head_tail(dst, src, count); if (count <= 32) - return generic::Memmove<16, kMaxSize>::head_tail(dst, src, count); + return generic::Memmove::head_tail(dst, src, count); if (count <= 64) - return generic::Memmove<32, kMaxSize>::head_tail(dst, src, count); + return generic::Memmove::head_tail(dst, src, count); if (count <= 128) - return generic::Memmove<64, kMaxSize>::head_tail(dst, src, count); + return generic::Memmove::head_tail(dst, src, count); if (dst < src) { - generic::Memmove<32, kMaxSize>::align_forward(dst, src, count); - return generic::Memmove<64, kMaxSize>::loop_and_tail_forward(dst, src, - count); + generic::Memmove::align_forward(dst, src, count); + return generic::Memmove::loop_and_tail_forward(dst, src, count); } else { - generic::Memmove<32, kMaxSize>::align_backward(dst, src, count); - return generic::Memmove<64, kMaxSize>::loop_and_tail_backward(dst, src, - count); + generic::Memmove::align_backward(dst, src, count); + return generic::Memmove::loop_and_tail_backward(dst, src, count); } #else return inline_memmove_embedded_tiny(dst, src, count); diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h index a7c5636c2d1ca..1d203d6261403 100644 --- a/libc/src/string/memory_utils/op_generic.h +++ b/libc/src/string/memory_utils/op_generic.h @@ -254,32 +254,22 @@ template struct Memset { // Memmove /////////////////////////////////////////////////////////////////////////////// -template struct Memmove { - static_assert(is_power2(MaxSize)); - using T = details::getTypeFor; - static constexpr size_t SIZE = Size; +template struct Memmove { + static constexpr size_t SIZE = sum_sizeof(); LIBC_INLINE static void block(Ptr dst, CPtr src) { - if constexpr (details::is_void_v) { - deferred_static_assert("Unimplemented Size"); - } else { - store(dst, load(src)); - } + store(dst, load(src)); } LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) { - const size_t offset = count - Size; - if constexpr (details::is_void_v) { - deferred_static_assert("Unimplemented Size"); - } else { - // The load and store operations can be performed in any order as long as - // they are not interleaved. More investigations are needed to determine - // the best order. - const auto head = load(src); - const auto tail = load(src + offset); - store(dst, head); - store(dst + offset, tail); - } + const size_t offset = count - SIZE; + // The load and store operations can be performed in any order as long as + // they are not interleaved. More investigations are needed to determine + // the best order. + const auto head = load(src); + const auto tail = load(src + offset); + store(dst, head); + store(dst + offset, tail); } // Align forward suitable when dst < src. The alignment is performed with @@ -305,8 +295,8 @@ template struct Memmove { Ptr prev_dst = dst; CPtr prev_src = src; size_t prev_count = count; - align_to_next_boundary(dst, src, count); - adjust(Size, dst, src, count); + align_to_next_boundary(dst, src, count); + adjust(SIZE, dst, src, count); head_tail(prev_dst, prev_src, prev_count - count); } @@ -333,9 +323,9 @@ template struct Memmove { Ptr headtail_dst = dst + count; CPtr headtail_src = src + count; size_t headtail_size = 0; - align_to_next_boundary(headtail_dst, headtail_src, + align_to_next_boundary(headtail_dst, headtail_src, headtail_size); - adjust(-2 * Size, headtail_dst, headtail_src, headtail_size); + adjust(-2 * SIZE, headtail_dst, headtail_src, headtail_size); head_tail(headtail_dst, headtail_src, headtail_size); count -= headtail_size; } @@ -356,15 +346,15 @@ template struct Memmove { // [_______________________SSSSSSSS_____] LIBC_INLINE static void loop_and_tail_forward(Ptr dst, CPtr src, size_t count) { - static_assert(Size > 1, "a loop of size 1 does not need tail"); - const size_t tail_offset = count - Size; + static_assert(SIZE > 1, "a loop of size 1 does not need tail"); + const size_t tail_offset = count - SIZE; const auto tail_value = load(src + tail_offset); size_t offset = 0; LIBC_LOOP_NOUNROLL do { block(dst + offset, src + offset); - offset += Size; - } while (offset < count - Size); + offset += SIZE; + } while (offset < count - SIZE); store(dst + tail_offset, tail_value); } @@ -384,13 +374,13 @@ template struct Memmove { // [_____SSSSSSSS_______________________] LIBC_INLINE static void loop_and_tail_backward(Ptr dst, CPtr src, size_t count) { - static_assert(Size > 1, "a loop of size 1 does not need tail"); + static_assert(SIZE > 1, "a loop of size 1 does not need tail"); const auto head_value = load(src); - ptrdiff_t offset = count - Size; + ptrdiff_t offset = count - SIZE; LIBC_LOOP_NOUNROLL do { block(dst + offset, src + offset); - offset -= Size; + offset -= SIZE; } while (offset >= 0); store(dst, head_value); }