| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,60 @@ | ||
| //===-- Elementary operations to compose memory primitives ----------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This file defines the concept of a Backend. | ||
| // It constitutes the lowest level of the framework and is akin to instruction | ||
| // selection. It defines how to implement aligned/unaligned, | ||
| // temporal/non-temporal native loads and stores for a particular architecture | ||
| // as well as efficient ways to fill and compare types. | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H | ||
| #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H | ||
|
|
||
| #include "src/string/memory_utils/address.h" // Temporality, Aligned | ||
| #include "src/string/memory_utils/sized_op.h" // SizedOp | ||
| #include <stddef.h> // size_t | ||
| #include <stdint.h> // uint##_t | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| // Backends must implement the following interface. | ||
| struct NoBackend { | ||
| static constexpr bool IS_BACKEND_TYPE = true; | ||
|
|
||
| // Loads a T from `src` honoring Temporality and Alignment. | ||
| template <typename T, Temporality, Aligned> static T load(const T *src); | ||
|
|
||
| // Stores a T to `dst` honoring Temporality and Alignment. | ||
| template <typename T, Temporality, Aligned> | ||
| static void store(T *dst, T value); | ||
|
|
||
| // Returns a T filled with `value` bytes. | ||
| template <typename T> static T splat(ubyte value); | ||
|
|
||
| // Returns zero iff v1 == v2. | ||
| template <typename T> static uint64_t notEquals(T v1, T v2); | ||
|
|
||
| // Returns zero iff v1 == v2, a negative number if v1 < v2 and a positive | ||
| // number otherwise. | ||
| template <typename T> static int32_t threeWayCmp(T v1, T v2); | ||
|
|
||
| // Returns the type to use to consume Size bytes. | ||
| // If no type handles Size bytes at once | ||
| template <size_t Size> using getNextType = void; | ||
| }; | ||
|
|
||
| } // namespace __llvm_libc | ||
|
|
||
| // We inline all backend implementations here to simplify the build system. | ||
| // Each file need to be guarded with the appropriate LLVM_LIBC_ARCH_XXX ifdef. | ||
| #include "src/string/memory_utils/backend_aarch64.h" | ||
| #include "src/string/memory_utils/backend_scalar.h" | ||
| #include "src/string/memory_utils/backend_x86.h" | ||
|
|
||
| #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,177 @@ | ||
| //===-- Sized Operations --------------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
| // | ||
| // This file defines the SizedOp struct that serves as the middle end of the | ||
| // framework. It implements sized memory operations by breaking them down into | ||
| // simpler types whose availability is described in the Backend. It also | ||
| // provides a way to load and store sized chunks of memory (necessary for the | ||
| // move operation). SizedOp are the building blocks of higher order algorithms | ||
| // like HeadTail, Align or Loop. | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H | ||
| #define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H | ||
|
|
||
| #include <stddef.h> // size_t | ||
|
|
||
| #ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE | ||
| #define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \ | ||
| __has_builtin(__builtin_memcpy_inline) | ||
| #endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE | ||
|
|
||
| #ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE | ||
| #define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE \ | ||
| __has_builtin(__builtin_memset_inline) | ||
| #endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| template <typename Backend, size_t Size> struct SizedOp { | ||
| static constexpr size_t SIZE = Size; | ||
|
|
||
| private: | ||
| static_assert(Backend::IS_BACKEND_TYPE); | ||
| static_assert(SIZE > 0); | ||
| using type = typename Backend::template getNextType<Size>; | ||
| static constexpr size_t TYPE_SIZE = sizeof(type); | ||
| static_assert(SIZE >= TYPE_SIZE); | ||
| static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE; | ||
| using NextBlock = SizedOp<Backend, NEXT_SIZE>; | ||
|
|
||
| // Returns whether we can use an aligned operations. | ||
| // This is possible because the address type carries known compile-time | ||
| // alignment informations. | ||
| template <typename T, typename AddrT> static constexpr Aligned isAligned() { | ||
| static_assert(IsAddressType<AddrT>::Value); | ||
| return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES | ||
| : Aligned::NO; | ||
| } | ||
|
|
||
| // Loads a value of the current `type` from `src`. | ||
| // This function is responsible for extracting Temporality and Alignment from | ||
| // the Address type. | ||
| template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) { | ||
| static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ); | ||
| constexpr auto AS = isAligned<type, SrcAddrT>(); | ||
| constexpr auto TS = SrcAddrT::TEMPORALITY; | ||
| return Backend::template load<type, TS, AS>(as<const type>(src)); | ||
| } | ||
|
|
||
| // Stores a value of the current `type` to `dst`. | ||
| // This function is responsible for extracting Temporality and Alignment from | ||
| // the Address type. | ||
| template <typename DstAddrT> | ||
| static inline void nativeStore(type value, DstAddrT dst) { | ||
| static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE); | ||
| constexpr auto AS = isAligned<type, DstAddrT>(); | ||
| constexpr auto TS = DstAddrT::TEMPORALITY; | ||
| return Backend::template store<type, TS, AS>(as<type>(dst), value); | ||
| } | ||
|
|
||
| // A well aligned POD structure to store Size bytes. | ||
| // This is used to implement the move operations. | ||
| struct Value { | ||
| alignas(alignof(type)) ubyte payload[Size]; | ||
| }; | ||
|
|
||
| public: | ||
| template <typename DstAddrT, typename SrcAddrT> | ||
| static inline void copy(DstAddrT dst, SrcAddrT src) { | ||
| static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE); | ||
| static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ); | ||
| if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE && | ||
| DstAddrT::TEMPORALITY == Temporality::TEMPORAL && | ||
| SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) { | ||
| // delegate optimized copy to compiler. | ||
| __builtin_memcpy_inline(dst.ptr(), src.ptr(), Size); | ||
| return; | ||
| } | ||
| nativeStore(nativeLoad(src), dst); | ||
| if constexpr (NEXT_SIZE > 0) | ||
| NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src)); | ||
| } | ||
|
|
||
| template <typename DstAddrT, typename SrcAddrT> | ||
| static inline void move(DstAddrT dst, SrcAddrT src) { | ||
| const auto payload = nativeLoad(src); | ||
| if constexpr (NEXT_SIZE > 0) | ||
| NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src)); | ||
| nativeStore(payload, dst); | ||
| } | ||
|
|
||
| template <typename DstAddrT> | ||
| static inline void set(DstAddrT dst, ubyte value) { | ||
| if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE && | ||
| DstAddrT::TEMPORALITY == Temporality::TEMPORAL) { | ||
| // delegate optimized set to compiler. | ||
| __builtin_memset_inline(dst.ptr(), value, Size); | ||
| return; | ||
| } | ||
| nativeStore(Backend::template splat<type>(value), dst); | ||
| if constexpr (NEXT_SIZE > 0) | ||
| NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value); | ||
| } | ||
|
|
||
| template <typename SrcAddrT1, typename SrcAddrT2> | ||
| static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) { | ||
| const uint64_t current = | ||
| Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2)); | ||
| if constexpr (NEXT_SIZE > 0) { | ||
| // In the case where we cannot handle Size with single operation (e.g. | ||
| // Size == 3) we can either return early if current is non zero or | ||
| // aggregate all the operations through the bitwise or operator. | ||
| // We chose the later to reduce branching. | ||
| return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1), | ||
| offsetAddr<TYPE_SIZE>(src2))); | ||
| } else { | ||
| return current; | ||
| } | ||
| } | ||
|
|
||
| template <typename SrcAddrT1, typename SrcAddrT2> | ||
| static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) { | ||
| const auto a = nativeLoad(src1); | ||
| const auto b = nativeLoad(src2); | ||
| // If we cannot handle Size as a single operation we have two choices: | ||
| // - Either use Backend's threeWayCmp directly and return it is non | ||
| // zero. | ||
| // | ||
| // if (int32_t res = Backend::template threeWayCmp<type>(a, b)) | ||
| // return res; | ||
| // | ||
| // - Or use Backend's notEquals first and use threeWayCmp only if | ||
| // different, the assumption here is that notEquals is faster than | ||
| // threeWayCmp and that we can save cycles when the Size needs to be | ||
| // decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1) | ||
| // | ||
| // if (Backend::template notEquals<type>(a, b)) | ||
| // return Backend::template threeWayCmp<type>(a, b); | ||
| // | ||
| // We chose the former to reduce code bloat and branching. | ||
| if (int32_t res = Backend::template threeWayCmp<type>(a, b)) | ||
| return res; | ||
| if constexpr (NEXT_SIZE > 0) | ||
| return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1), | ||
| offsetAddr<TYPE_SIZE>(src2)); | ||
| return 0; | ||
| } | ||
|
|
||
| template <typename SrcAddrT> static Value load(SrcAddrT src) { | ||
| Value output; | ||
| copy(DstAddr<alignof(type)>(output.payload), src); | ||
| return output; | ||
| } | ||
|
|
||
| template <typename DstAddrT> static void store(DstAddrT dst, Value value) { | ||
| copy(dst, SrcAddr<alignof(type)>(value.payload)); | ||
| } | ||
| }; | ||
|
|
||
| } // namespace __llvm_libc | ||
|
|
||
| #endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,197 @@ | ||
| //===-- Unittests for backends --------------------------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/__support/CPP/Array.h" | ||
| #include "src/__support/CPP/ArrayRef.h" | ||
| #include "src/__support/CPP/Bit.h" | ||
| #include "src/__support/architectures.h" | ||
| #include "src/string/memory_utils/backends.h" | ||
| #include "utils/UnitTest/Test.h" | ||
| #include <string.h> | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| template <size_t Size> using Buffer = cpp::Array<char, Size>; | ||
|
|
||
| static char GetRandomChar() { | ||
| // Implementation of C++ minstd_rand seeded with 123456789. | ||
| // https://en.cppreference.com/w/cpp/numeric/random | ||
| // "Minimum standard", recommended by Park, Miller, and Stockmeyer in 1993 | ||
| static constexpr const uint64_t a = 48271; | ||
| static constexpr const uint64_t c = 0; | ||
| static constexpr const uint64_t m = 2147483647; | ||
| static uint64_t seed = 123456789; | ||
| seed = (a * seed + c) % m; | ||
| return seed; | ||
| } | ||
|
|
||
| static void Randomize(cpp::MutableArrayRef<char> buffer) { | ||
| for (auto ¤t : buffer) | ||
| current = GetRandomChar(); | ||
| } | ||
|
|
||
| template <size_t Size> static Buffer<Size> GetRandomBuffer() { | ||
| Buffer<Size> buffer; | ||
| Randomize(buffer); | ||
| return buffer; | ||
| } | ||
|
|
||
| template <typename Backend, size_t Size> struct Conf { | ||
| static_assert(Backend::IS_BACKEND_TYPE); | ||
| using BufferT = Buffer<Size>; | ||
| using T = typename Backend::template getNextType<Size>; | ||
| static_assert(sizeof(T) == Size); | ||
| static constexpr size_t SIZE = Size; | ||
|
|
||
| static BufferT splat(ubyte value) { | ||
| return bit_cast<BufferT>(Backend::template splat<T>(value)); | ||
| } | ||
|
|
||
| static uint64_t notEquals(const BufferT &v1, const BufferT &v2) { | ||
| return Backend::template notEquals<T>(bit_cast<T>(v1), bit_cast<T>(v2)); | ||
| } | ||
|
|
||
| static int32_t threeWayCmp(const BufferT &v1, const BufferT &v2) { | ||
| return Backend::template threeWayCmp<T>(bit_cast<T>(v1), bit_cast<T>(v2)); | ||
| } | ||
| }; | ||
|
|
||
| using FunctionTypes = testing::TypeList< // | ||
| #if defined(LLVM_LIBC_ARCH_X86) // | ||
| Conf<X86Backend, 1>, // | ||
| Conf<X86Backend, 2>, // | ||
| Conf<X86Backend, 4>, // | ||
| Conf<X86Backend, 8>, // | ||
| #if HAS_M128 | ||
| Conf<X86Backend, 16>, // | ||
| #endif | ||
| #if HAS_M256 | ||
| Conf<X86Backend, 32>, // | ||
| #endif | ||
| #if HAS_M512 | ||
| Conf<X86Backend, 64>, // | ||
| #endif | ||
| #endif // defined(LLVM_LIBC_ARCH_X86) | ||
| Conf<Scalar64BitBackend, 1>, // | ||
| Conf<Scalar64BitBackend, 2>, // | ||
| Conf<Scalar64BitBackend, 4>, // | ||
| Conf<Scalar64BitBackend, 8> // | ||
| >; | ||
|
|
||
| TYPED_TEST(LlvmLibcMemoryBackend, splat, FunctionTypes) { | ||
| for (auto value : cpp::Array<uint8_t, 3>{0u, 1u, 255u}) { | ||
| alignas(64) const auto stored = ParamType::splat(bit_cast<ubyte>(value)); | ||
| for (size_t i = 0; i < ParamType::SIZE; ++i) | ||
| EXPECT_EQ(bit_cast<uint8_t>(stored[i]), value); | ||
| } | ||
| } | ||
|
|
||
| TYPED_TEST(LlvmLibcMemoryBackend, notEquals, FunctionTypes) { | ||
| alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>(); | ||
| EXPECT_EQ(ParamType::notEquals(a, a), 0UL); | ||
| for (size_t i = 0; i < a.size(); ++i) { | ||
| alignas(64) auto b = a; | ||
| ++b[i]; | ||
| EXPECT_NE(ParamType::notEquals(a, b), 0UL); | ||
| EXPECT_NE(ParamType::notEquals(b, a), 0UL); | ||
| } | ||
| } | ||
|
|
||
| TYPED_TEST(LlvmLibcMemoryBackend, threeWayCmp, FunctionTypes) { | ||
| alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>(); | ||
| EXPECT_EQ(ParamType::threeWayCmp(a, a), 0); | ||
| for (size_t i = 0; i < a.size(); ++i) { | ||
| alignas(64) auto b = a; | ||
| ++b[i]; | ||
| const auto cmp = memcmp(&a, &b, sizeof(a)); | ||
| ASSERT_NE(cmp, 0); | ||
| if (cmp > 0) { | ||
| EXPECT_GT(ParamType::threeWayCmp(a, b), 0); | ||
| EXPECT_LT(ParamType::threeWayCmp(b, a), 0); | ||
| } else { | ||
| EXPECT_LT(ParamType::threeWayCmp(a, b), 0); | ||
| EXPECT_GT(ParamType::threeWayCmp(b, a), 0); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| template <typename Backend, size_t Size, Temporality TS, Aligned AS> | ||
| struct LoadStoreConf { | ||
| static_assert(Backend::IS_BACKEND_TYPE); | ||
| using BufferT = Buffer<Size>; | ||
| using T = typename Backend::template getNextType<Size>; | ||
| static_assert(sizeof(T) == Size); | ||
| static constexpr size_t SIZE = Size; | ||
|
|
||
| static BufferT load(const BufferT &ref) { | ||
| const auto *ptr = bit_cast<const T *>(ref.data()); | ||
| const T value = Backend::template load<T, TS, AS>(ptr); | ||
| return bit_cast<BufferT>(value); | ||
| } | ||
|
|
||
| static void store(BufferT &ref, const BufferT value) { | ||
| auto *ptr = bit_cast<T *>(ref.data()); | ||
| Backend::template store<T, TS, AS>(ptr, bit_cast<T>(value)); | ||
| } | ||
| }; | ||
|
|
||
| using LoadStoreTypes = testing::TypeList< // | ||
| #if defined(LLVM_LIBC_ARCH_X86) // | ||
| LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::YES>, // | ||
| LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::YES>, // | ||
| LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::YES>, // | ||
| LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::YES>, // | ||
| #if HAS_M128 | ||
| LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::YES>, // | ||
| LoadStoreConf<X86Backend, 16, Temporality::NON_TEMPORAL, Aligned::YES>, // | ||
| #endif | ||
| #if HAS_M256 | ||
| LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::YES>, // | ||
| LoadStoreConf<X86Backend, 32, Temporality::NON_TEMPORAL, Aligned::YES>, // | ||
| #endif | ||
| #if HAS_M512 | ||
| LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::YES>, // | ||
| LoadStoreConf<X86Backend, 64, Temporality::NON_TEMPORAL, Aligned::YES>, // | ||
| #endif | ||
| #endif // defined(LLVM_LIBC_ARCH_X86) | ||
| LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL, | ||
| Aligned::YES>, // | ||
| LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL, | ||
| Aligned::YES>, // | ||
| LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL, | ||
| Aligned::YES>, // | ||
| LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::NO>, // | ||
| LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::YES> // | ||
| >; | ||
|
|
||
| TYPED_TEST(LlvmLibcMemoryBackend, load, LoadStoreTypes) { | ||
| alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>(); | ||
| const auto loaded = ParamType::load(expected); | ||
| for (size_t i = 0; i < ParamType::SIZE; ++i) | ||
| EXPECT_EQ(loaded[i], expected[i]); | ||
| } | ||
|
|
||
| TYPED_TEST(LlvmLibcMemoryBackend, store, LoadStoreTypes) { | ||
| alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>(); | ||
| alignas(64) typename ParamType::BufferT stored; | ||
| ParamType::store(stored, expected); | ||
| for (size_t i = 0; i < ParamType::SIZE; ++i) | ||
| EXPECT_EQ(stored[i], expected[i]); | ||
| } | ||
|
|
||
| } // namespace __llvm_libc |