diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h index 03c6cab46e870..02d90a1b29296 100644 --- a/libc/src/string/memory_utils/op_generic.h +++ b/libc/src/string/memory_utils/op_generic.h @@ -35,35 +35,11 @@ namespace __llvm_libc::generic { -// CTPair and CTMap below implement a compile time map. -// This is useful to map from a Size to a type handling this size. -// -// Example usage: -// using MyMap = CTMap, -// CTPair<2, uint16_t>, -// >; -// ... -// using UInt8T = MyMap::find_type<1>; -template struct CTPair { - using type = T; - LIBC_INLINE static CTPair get_pair(cpp::integral_constant) { - return {}; - } -}; -template struct CTMap : public Pairs... { - using Pairs::get_pair...; - template - using find_type = - typename decltype(get_pair(cpp::integral_constant{}))::type; -}; - -// Helper to test if a type is void. -template inline constexpr bool is_void_v = cpp::is_same_v; - -// Implements load, store and splat for unsigned integral types. +// Implements generic load, store, splat and set for unsigned integral types. template struct ScalarType { + static_assert(cpp::is_integral_v && !cpp::is_signed_v); using Type = T; - static_assert(cpp::is_integral_v && !cpp::is_signed_v); + static constexpr size_t SIZE = sizeof(Type); LIBC_INLINE static Type load(CPtr src) { return ::__llvm_libc::load(src); @@ -74,6 +50,9 @@ template struct ScalarType { LIBC_INLINE static Type splat(uint8_t value) { return Type(~0) / Type(0xFF) * Type(value); } + LIBC_INLINE static void set(Ptr dst, uint8_t value) { + store(dst, splat(value)); + } }; // GCC can only take literals as __vector_size__ argument so we have to use @@ -101,9 +80,10 @@ template <> struct VectorValueType<64> { using type = uint8_t __attribute__((__vector_size__(64))); }; -// Implements load, store and splat for vector types. +// Implements generic load, store, splat and set for vector types. template struct VectorType { using Type = typename VectorValueType::type; + static constexpr size_t SIZE = Size; LIBC_INLINE static Type load(CPtr src) { return ::__llvm_libc::load(src); } @@ -117,35 +97,17 @@ template struct VectorType { Out[i] = static_cast(value); return Out; } + LIBC_INLINE static void set(Ptr dst, uint8_t value) { + store(dst, splat(value)); + } }; -static_assert((UINTPTR_MAX == 4294967295U) || - (UINTPTR_MAX == 18446744073709551615UL), - "We currently only support 32- or 64-bit platforms"); - -#if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64) -#define LLVM_LIBC_HAS_UINT64 -#endif - -// Map from sizes to structures offering static load, store and splat methods. -// Note: On platforms lacking vector support, we use the ArrayType below and -// decompose the operation in smaller pieces. -using NativeTypeMap = - CTMap>, // - CTPair<2, ScalarType>, // - CTPair<4, ScalarType>, // -#if defined(LLVM_LIBC_HAS_UINT64) - CTPair<8, ScalarType>, // Not available on 32bit -#endif // - CTPair<16, VectorType<16>>, // - CTPair<32, VectorType<32>>, // - CTPair<64, VectorType<64>>>; - -// Implements load, store and splat for sizes not natively supported by the +// Implements load, store and set for sizes not natively supported by the // platform. SubType is either ScalarType or VectorType. template struct ArrayType { using Type = cpp::array; - static constexpr size_t SizeOfElement = sizeof(typename SubType::Type); + static constexpr size_t SizeOfElement = SubType::SIZE; + static constexpr size_t SIZE = SizeOfElement * ArraySize; LIBC_INLINE static Type load(CPtr src) { Type Value; for (size_t I = 0; I < ArraySize; ++I) @@ -156,27 +118,106 @@ template struct ArrayType { for (size_t I = 0; I < ArraySize; ++I) SubType::store(dst + (I * SizeOfElement), Value[I]); } - LIBC_INLINE static Type splat(uint8_t value) { - Type Out; + LIBC_INLINE static void set(Ptr dst, uint8_t value) { + const auto Splat = SubType::splat(value); for (size_t I = 0; I < ArraySize; ++I) - Out[I] = SubType::splat(value); - return Out; + SubType::store(dst + (I * SizeOfElement), Splat); } }; -// Checks whether we should use an ArrayType. +static_assert((UINTPTR_MAX == 4294967295U) || + (UINTPTR_MAX == 18446744073709551615UL), + "We currently only support 32- or 64-bit platforms"); + +#if defined(LIBC_TARGET_ARCH_IS_X86_64) || defined(LIBC_TARGET_ARCH_IS_AARCH64) +#define LLVM_LIBC_HAS_UINT64 +#endif + +namespace details { +// Checks that each type's SIZE is sorted in strictly decreasing order. +// i.e. First::SIZE > Second::SIZE > ... > Last::SIZE +template +constexpr bool is_decreasing_size() { + if constexpr (sizeof...(Next) > 0) { + return (First::SIZE > Second::SIZE) && + is_decreasing_size(); + } else { + return First::SIZE > Second::SIZE; + } +} + +// Helper to test if a type is void. +template inline constexpr bool is_void_v = cpp::is_same_v; + +} // namespace details + +// 'SupportedTypes' holds a list of natively supported types. +// The types are instanciations of ScalarType or VectorType. +// They should be ordered in strictly decreasing order. +// The 'TypeFor' type retrieves is the largest supported type that can +// handle 'Size' bytes. e.g. +// +// using ST = SupportedTypes, ScalarType>; +// using Type = ST::TypeFor<10>; +// static_assert(cpp:is_same_v>); +template +struct SupportedTypes { + static_assert(details::is_decreasing_size()); + using MaxType = First; + + template + using TypeFor = cpp::conditional_t< + (Size >= First::SIZE), First, + typename SupportedTypes::template TypeFor>; +}; + +template +struct SupportedTypes { + static_assert(details::is_decreasing_size()); + using MaxType = First; + + template + using TypeFor = cpp::conditional_t< + (Size >= First::SIZE), First, + cpp::conditional_t<(Size >= Second::SIZE), Second, void>>; +}; + +// Map from sizes to structures offering static load, store and splat methods. +// Note: On platforms lacking vector support, we use the ArrayType below and +// decompose the operation in smaller pieces. + +// Lists a generic native types to use for Memset and Memmove operations. +// TODO: Inject the native types within Memset and Memmove depending on the +// target architectures and derive MaxSize from it. +using NativeTypeMap = + SupportedTypes, // + VectorType<32>, // + VectorType<16>, +#if defined(LLVM_LIBC_HAS_UINT64) + ScalarType, // Not available on 32bit +#endif + ScalarType, // + ScalarType, // + ScalarType>; + +namespace details { + +// In case the 'Size' is not supported we can fall back to a sequence of smaller +// operations using the largest natively supported type. template static constexpr bool useArrayType() { return (Size > MaxSize) && ((Size % MaxSize) == 0) && - !is_void_v>; + !details::is_void_v>; } -// Compute the type to handle an operation of Size bytes knowing that the +// Compute the type to handle an operation of 'Size' bytes knowing that the // underlying platform only support native types up to MaxSize bytes. template using getTypeFor = cpp::conditional_t< useArrayType(), - ArrayType, Size / MaxSize>, - NativeTypeMap::find_type>; + ArrayType, Size / MaxSize>, + NativeTypeMap::TypeFor>; + +} // namespace details /////////////////////////////////////////////////////////////////////////////// // Memcpy @@ -201,11 +242,11 @@ template struct Memset { Memset<1, MaxSize>::block(dst + 2, value); Memset<2, MaxSize>::block(dst, value); } else { - using T = getTypeFor; - if constexpr (is_void_v) { + using T = details::getTypeFor; + if constexpr (details::is_void_v) { deferred_static_assert("Unimplemented Size"); } else { - T::store(dst, T::splat(value)); + T::set(dst, value); } } } @@ -230,147 +271,17 @@ template struct Memset { } }; -/////////////////////////////////////////////////////////////////////////////// -// Bcmp -/////////////////////////////////////////////////////////////////////////////// -template struct Bcmp { - static constexpr size_t SIZE = Size; - static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) - ? sizeof(uint64_t) - : sizeof(uint32_t); - - template LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) { - return load(p1) ^ load(p2); - } - - template - LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) { - return load(p1) != load(p2); - } - - LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) { - if constexpr (Size == 1) { - return load_xor(p1, p2); - } else if constexpr (Size == 2) { - return load_xor(p1, p2); - } else if constexpr (Size == 4) { - return load_xor(p1, p2); - } else if constexpr (Size == 8) { - return load_not_equal(p1, p2); - } else if constexpr (useArrayType()) { - for (size_t offset = 0; offset < Size; offset += MaxSize) - if (auto value = Bcmp::block(p1 + offset, p2 + offset)) - return value; - } else { - deferred_static_assert("Unimplemented Size"); - } - return BcmpReturnType::ZERO(); - } - - LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { - return block(p1 + count - SIZE, p2 + count - SIZE); - } - - LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) { - return block(p1, p2) | tail(p1, p2, count); - } - - LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2, - size_t count) { - static_assert(Size > 1, "a loop of size 1 does not need tail"); - size_t offset = 0; - do { - if (auto value = block(p1 + offset, p2 + offset)) - return value; - offset += SIZE; - } while (offset < count - SIZE); - return tail(p1, p2, count); - } -}; - -/////////////////////////////////////////////////////////////////////////////// -// Memcmp -/////////////////////////////////////////////////////////////////////////////// -template struct Memcmp { - static constexpr size_t SIZE = Size; - static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) - ? sizeof(uint64_t) - : sizeof(uint32_t); - - template LIBC_INLINE static T load_be(CPtr ptr) { - return Endian::to_big_endian(load(ptr)); - } - - template - LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) { - return load_be(p1) - load_be(p2); - } - - template - LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) { - const auto la = load_be(p1); - const auto lb = load_be(p2); - return la > lb ? 1 : la < lb ? -1 : 0; - } - - LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) { - if constexpr (Size == 1) { - return load_be_diff(p1, p2); - } else if constexpr (Size == 2) { - return load_be_diff(p1, p2); - } else if constexpr (Size == 4) { - return load_be_cmp(p1, p2); - } else if constexpr (Size == 8) { - return load_be_cmp(p1, p2); - } else if constexpr (useArrayType()) { - for (size_t offset = 0; offset < Size; offset += MaxSize) - if (Bcmp::block(p1 + offset, p2 + offset)) - return Memcmp::block(p1 + offset, p2 + offset); - return MemcmpReturnType::ZERO(); - } else if constexpr (Size == 3) { - if (auto value = Memcmp<2>::block(p1, p2)) - return value; - return Memcmp<1>::block(p1 + 2, p2 + 2); - } else { - deferred_static_assert("Unimplemented Size"); - } - } - - LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { - return block(p1 + count - SIZE, p2 + count - SIZE); - } - - LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2, - size_t count) { - if (auto value = block(p1, p2)) - return value; - return tail(p1, p2, count); - } - - LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2, - size_t count) { - static_assert(Size > 1, "a loop of size 1 does not need tail"); - size_t offset = 0; - do { - if (auto value = block(p1 + offset, p2 + offset)) - return value; - offset += SIZE; - } while (offset < count - SIZE); - return tail(p1, p2, count); - } -}; - /////////////////////////////////////////////////////////////////////////////// // Memmove /////////////////////////////////////////////////////////////////////////////// template struct Memmove { static_assert(is_power2(MaxSize)); - using T = getTypeFor; + using T = details::getTypeFor; static constexpr size_t SIZE = Size; LIBC_INLINE static void block(Ptr dst, CPtr src) { - if constexpr (is_void_v) { + if constexpr (details::is_void_v) { deferred_static_assert("Unimplemented Size"); } else { T::store(dst, T::load(src)); @@ -379,7 +290,7 @@ template struct Memmove { LIBC_INLINE static void head_tail(Ptr dst, CPtr src, size_t count) { const size_t offset = count - Size; - if constexpr (is_void_v) { + if constexpr (details::is_void_v) { deferred_static_assert("Unimplemented Size"); } else { // The load and store operations can be performed in any order as long as @@ -506,6 +417,137 @@ template struct Memmove { } }; +/////////////////////////////////////////////////////////////////////////////// +// Bcmp +/////////////////////////////////////////////////////////////////////////////// +template struct Bcmp { + static constexpr size_t SIZE = Size; + static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) + ? sizeof(uint64_t) + : sizeof(uint32_t); + + template LIBC_INLINE static uint32_t load_xor(CPtr p1, CPtr p2) { + static_assert(sizeof(T) <= sizeof(uint32_t)); + return load(p1) ^ load(p2); + } + + template + LIBC_INLINE static uint32_t load_not_equal(CPtr p1, CPtr p2) { + return load(p1) != load(p2); + } + + LIBC_INLINE static BcmpReturnType block(CPtr p1, CPtr p2) { + if constexpr (Size == 1) { + return load_xor(p1, p2); + } else if constexpr (Size == 2) { + return load_xor(p1, p2); + } else if constexpr (Size == 4) { + return load_xor(p1, p2); + } else if constexpr (Size == 8) { + return load_not_equal(p1, p2); + } else if constexpr (details::useArrayType()) { + for (size_t offset = 0; offset < Size; offset += MaxSize) + if (auto value = Bcmp::block(p1 + offset, p2 + offset)) + return value; + } else { + deferred_static_assert("Unimplemented Size"); + } + return BcmpReturnType::ZERO(); + } + + LIBC_INLINE static BcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { + return block(p1 + count - SIZE, p2 + count - SIZE); + } + + LIBC_INLINE static BcmpReturnType head_tail(CPtr p1, CPtr p2, size_t count) { + return block(p1, p2) | tail(p1, p2, count); + } + + LIBC_INLINE static BcmpReturnType loop_and_tail(CPtr p1, CPtr p2, + size_t count) { + static_assert(Size > 1, "a loop of size 1 does not need tail"); + size_t offset = 0; + do { + if (auto value = block(p1 + offset, p2 + offset)) + return value; + offset += SIZE; + } while (offset < count - SIZE); + return tail(p1, p2, count); + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// Memcmp +/////////////////////////////////////////////////////////////////////////////// +template struct Memcmp { + static constexpr size_t SIZE = Size; + static constexpr size_t MaxSize = LLVM_LIBC_IS_DEFINED(LLVM_LIBC_HAS_UINT64) + ? sizeof(uint64_t) + : sizeof(uint32_t); + + template LIBC_INLINE static T load_be(CPtr ptr) { + return Endian::to_big_endian(load(ptr)); + } + + template + LIBC_INLINE static MemcmpReturnType load_be_diff(CPtr p1, CPtr p2) { + return load_be(p1) - load_be(p2); + } + + template + LIBC_INLINE static MemcmpReturnType load_be_cmp(CPtr p1, CPtr p2) { + const auto la = load_be(p1); + const auto lb = load_be(p2); + return la > lb ? 1 : la < lb ? -1 : 0; + } + + LIBC_INLINE static MemcmpReturnType block(CPtr p1, CPtr p2) { + if constexpr (Size == 1) { + return load_be_diff(p1, p2); + } else if constexpr (Size == 2) { + return load_be_diff(p1, p2); + } else if constexpr (Size == 4) { + return load_be_cmp(p1, p2); + } else if constexpr (Size == 8) { + return load_be_cmp(p1, p2); + } else if constexpr (details::useArrayType()) { + for (size_t offset = 0; offset < Size; offset += MaxSize) + if (Bcmp::block(p1 + offset, p2 + offset)) + return Memcmp::block(p1 + offset, p2 + offset); + return MemcmpReturnType::ZERO(); + } else if constexpr (Size == 3) { + if (auto value = Memcmp<2>::block(p1, p2)) + return value; + return Memcmp<1>::block(p1 + 2, p2 + 2); + } else { + deferred_static_assert("Unimplemented Size"); + } + } + + LIBC_INLINE static MemcmpReturnType tail(CPtr p1, CPtr p2, size_t count) { + return block(p1 + count - SIZE, p2 + count - SIZE); + } + + LIBC_INLINE static MemcmpReturnType head_tail(CPtr p1, CPtr p2, + size_t count) { + if (auto value = block(p1, p2)) + return value; + return tail(p1, p2, count); + } + + LIBC_INLINE static MemcmpReturnType loop_and_tail(CPtr p1, CPtr p2, + size_t count) { + static_assert(Size > 1, "a loop of size 1 does not need tail"); + size_t offset = 0; + do { + if (auto value = block(p1 + offset, p2 + offset)) + return value; + offset += SIZE; + } while (offset < count - SIZE); + return tail(p1, p2, count); + } +}; + } // namespace __llvm_libc::generic #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_OP_GENERIC_H