60 changes: 60 additions & 0 deletions libc/src/string/memory_utils/backends.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//===-- Elementary operations to compose memory primitives ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the concept of a Backend.
// It constitutes the lowest level of the framework and is akin to instruction
// selection. It defines how to implement aligned/unaligned,
// temporal/non-temporal native loads and stores for a particular architecture
// as well as efficient ways to fill and compare types.
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H

#include "src/string/memory_utils/address.h" // Temporality, Aligned
#include "src/string/memory_utils/sized_op.h" // SizedOp
#include <stddef.h> // size_t
#include <stdint.h> // uint##_t

namespace __llvm_libc {

// Backends must implement the following interface.
struct NoBackend {
static constexpr bool IS_BACKEND_TYPE = true;

// Loads a T from `src` honoring Temporality and Alignment.
template <typename T, Temporality, Aligned> static T load(const T *src);

// Stores a T to `dst` honoring Temporality and Alignment.
template <typename T, Temporality, Aligned>
static void store(T *dst, T value);

// Returns a T filled with `value` bytes.
template <typename T> static T splat(ubyte value);

// Returns zero iff v1 == v2.
template <typename T> static uint64_t notEquals(T v1, T v2);

// Returns zero iff v1 == v2, a negative number if v1 < v2 and a positive
// number otherwise.
template <typename T> static int32_t threeWayCmp(T v1, T v2);

// Returns the type to use to consume Size bytes.
// If no type handles Size bytes at once
template <size_t Size> using getNextType = void;
};

} // namespace __llvm_libc

// We inline all backend implementations here to simplify the build system.
// Each file need to be guarded with the appropriate LLVM_LIBC_ARCH_XXX ifdef.
#include "src/string/memory_utils/backend_aarch64.h"
#include "src/string/memory_utils/backend_scalar.h"
#include "src/string/memory_utils/backend_x86.h"

#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKENDS_H
177 changes: 177 additions & 0 deletions libc/src/string/memory_utils/sized_op.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
//===-- Sized Operations --------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file defines the SizedOp struct that serves as the middle end of the
// framework. It implements sized memory operations by breaking them down into
// simpler types whose availability is described in the Backend. It also
// provides a way to load and store sized chunks of memory (necessary for the
// move operation). SizedOp are the building blocks of higher order algorithms
// like HeadTail, Align or Loop.
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H

#include <stddef.h> // size_t

#ifndef LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE
#define LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE \
__has_builtin(__builtin_memcpy_inline)
#endif // LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE

#ifndef LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE
#define LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE \
__has_builtin(__builtin_memset_inline)
#endif // LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE

namespace __llvm_libc {

template <typename Backend, size_t Size> struct SizedOp {
static constexpr size_t SIZE = Size;

private:
static_assert(Backend::IS_BACKEND_TYPE);
static_assert(SIZE > 0);
using type = typename Backend::template getNextType<Size>;
static constexpr size_t TYPE_SIZE = sizeof(type);
static_assert(SIZE >= TYPE_SIZE);
static constexpr size_t NEXT_SIZE = Size - TYPE_SIZE;
using NextBlock = SizedOp<Backend, NEXT_SIZE>;

// Returns whether we can use an aligned operations.
// This is possible because the address type carries known compile-time
// alignment informations.
template <typename T, typename AddrT> static constexpr Aligned isAligned() {
static_assert(IsAddressType<AddrT>::Value);
return AddrT::ALIGNMENT > 1 && AddrT::ALIGNMENT >= sizeof(T) ? Aligned::YES
: Aligned::NO;
}

// Loads a value of the current `type` from `src`.
// This function is responsible for extracting Temporality and Alignment from
// the Address type.
template <typename SrcAddrT> static inline auto nativeLoad(SrcAddrT src) {
static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ);
constexpr auto AS = isAligned<type, SrcAddrT>();
constexpr auto TS = SrcAddrT::TEMPORALITY;
return Backend::template load<type, TS, AS>(as<const type>(src));
}

// Stores a value of the current `type` to `dst`.
// This function is responsible for extracting Temporality and Alignment from
// the Address type.
template <typename DstAddrT>
static inline void nativeStore(type value, DstAddrT dst) {
static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE);
constexpr auto AS = isAligned<type, DstAddrT>();
constexpr auto TS = DstAddrT::TEMPORALITY;
return Backend::template store<type, TS, AS>(as<type>(dst), value);
}

// A well aligned POD structure to store Size bytes.
// This is used to implement the move operations.
struct Value {
alignas(alignof(type)) ubyte payload[Size];
};

public:
template <typename DstAddrT, typename SrcAddrT>
static inline void copy(DstAddrT dst, SrcAddrT src) {
static_assert(IsAddressType<DstAddrT>::Value && DstAddrT::IS_WRITE);
static_assert(IsAddressType<SrcAddrT>::Value && SrcAddrT::IS_READ);
if constexpr (LLVM_LIBC_USE_BUILTIN_MEMCPY_INLINE &&
DstAddrT::TEMPORALITY == Temporality::TEMPORAL &&
SrcAddrT::TEMPORALITY == Temporality::TEMPORAL) {
// delegate optimized copy to compiler.
__builtin_memcpy_inline(dst.ptr(), src.ptr(), Size);
return;
}
nativeStore(nativeLoad(src), dst);
if constexpr (NEXT_SIZE > 0)
NextBlock::copy(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
}

template <typename DstAddrT, typename SrcAddrT>
static inline void move(DstAddrT dst, SrcAddrT src) {
const auto payload = nativeLoad(src);
if constexpr (NEXT_SIZE > 0)
NextBlock::move(offsetAddr<TYPE_SIZE>(dst), offsetAddr<TYPE_SIZE>(src));
nativeStore(payload, dst);
}

template <typename DstAddrT>
static inline void set(DstAddrT dst, ubyte value) {
if constexpr (LLVM_LIBC_USE_BUILTIN_MEMSET_INLINE &&
DstAddrT::TEMPORALITY == Temporality::TEMPORAL) {
// delegate optimized set to compiler.
__builtin_memset_inline(dst.ptr(), value, Size);
return;
}
nativeStore(Backend::template splat<type>(value), dst);
if constexpr (NEXT_SIZE > 0)
NextBlock::set(offsetAddr<TYPE_SIZE>(dst), value);
}

template <typename SrcAddrT1, typename SrcAddrT2>
static inline uint64_t isDifferent(SrcAddrT1 src1, SrcAddrT2 src2) {
const uint64_t current =
Backend::template notEquals<type>(nativeLoad(src1), nativeLoad(src2));
if constexpr (NEXT_SIZE > 0) {
// In the case where we cannot handle Size with single operation (e.g.
// Size == 3) we can either return early if current is non zero or
// aggregate all the operations through the bitwise or operator.
// We chose the later to reduce branching.
return current | (NextBlock::isDifferent(offsetAddr<TYPE_SIZE>(src1),
offsetAddr<TYPE_SIZE>(src2)));
} else {
return current;
}
}

template <typename SrcAddrT1, typename SrcAddrT2>
static inline int32_t threeWayCmp(SrcAddrT1 src1, SrcAddrT2 src2) {
const auto a = nativeLoad(src1);
const auto b = nativeLoad(src2);
// If we cannot handle Size as a single operation we have two choices:
// - Either use Backend's threeWayCmp directly and return it is non
// zero.
//
// if (int32_t res = Backend::template threeWayCmp<type>(a, b))
// return res;
//
// - Or use Backend's notEquals first and use threeWayCmp only if
// different, the assumption here is that notEquals is faster than
// threeWayCmp and that we can save cycles when the Size needs to be
// decomposed in many sizes (e.g. Size == 7 => 4 + 2 + 1)
//
// if (Backend::template notEquals<type>(a, b))
// return Backend::template threeWayCmp<type>(a, b);
//
// We chose the former to reduce code bloat and branching.
if (int32_t res = Backend::template threeWayCmp<type>(a, b))
return res;
if constexpr (NEXT_SIZE > 0)
return NextBlock::threeWayCmp(offsetAddr<TYPE_SIZE>(src1),
offsetAddr<TYPE_SIZE>(src2));
return 0;
}

template <typename SrcAddrT> static Value load(SrcAddrT src) {
Value output;
copy(DstAddr<alignof(type)>(output.payload), src);
return output;
}

template <typename DstAddrT> static void store(DstAddrT dst, Value value) {
copy(dst, SrcAddr<alignof(type)>(value.payload));
}
};

} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_SIZED_OP_H
1 change: 1 addition & 0 deletions libc/test/src/string/memory_utils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ add_libc_unittest(
libc_string_unittests
SRCS
address_test.cpp
backend_test.cpp
elements_test.cpp
memory_access_test.cpp
utils_test.cpp
Expand Down
197 changes: 197 additions & 0 deletions libc/test/src/string/memory_utils/backend_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
//===-- Unittests for backends --------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/CPP/Array.h"
#include "src/__support/CPP/ArrayRef.h"
#include "src/__support/CPP/Bit.h"
#include "src/__support/architectures.h"
#include "src/string/memory_utils/backends.h"
#include "utils/UnitTest/Test.h"
#include <string.h>

namespace __llvm_libc {

template <size_t Size> using Buffer = cpp::Array<char, Size>;

static char GetRandomChar() {
// Implementation of C++ minstd_rand seeded with 123456789.
// https://en.cppreference.com/w/cpp/numeric/random
// "Minimum standard", recommended by Park, Miller, and Stockmeyer in 1993
static constexpr const uint64_t a = 48271;
static constexpr const uint64_t c = 0;
static constexpr const uint64_t m = 2147483647;
static uint64_t seed = 123456789;
seed = (a * seed + c) % m;
return seed;
}

static void Randomize(cpp::MutableArrayRef<char> buffer) {
for (auto &current : buffer)
current = GetRandomChar();
}

template <size_t Size> static Buffer<Size> GetRandomBuffer() {
Buffer<Size> buffer;
Randomize(buffer);
return buffer;
}

template <typename Backend, size_t Size> struct Conf {
static_assert(Backend::IS_BACKEND_TYPE);
using BufferT = Buffer<Size>;
using T = typename Backend::template getNextType<Size>;
static_assert(sizeof(T) == Size);
static constexpr size_t SIZE = Size;

static BufferT splat(ubyte value) {
return bit_cast<BufferT>(Backend::template splat<T>(value));
}

static uint64_t notEquals(const BufferT &v1, const BufferT &v2) {
return Backend::template notEquals<T>(bit_cast<T>(v1), bit_cast<T>(v2));
}

static int32_t threeWayCmp(const BufferT &v1, const BufferT &v2) {
return Backend::template threeWayCmp<T>(bit_cast<T>(v1), bit_cast<T>(v2));
}
};

using FunctionTypes = testing::TypeList< //
#if defined(LLVM_LIBC_ARCH_X86) //
Conf<X86Backend, 1>, //
Conf<X86Backend, 2>, //
Conf<X86Backend, 4>, //
Conf<X86Backend, 8>, //
#if HAS_M128
Conf<X86Backend, 16>, //
#endif
#if HAS_M256
Conf<X86Backend, 32>, //
#endif
#if HAS_M512
Conf<X86Backend, 64>, //
#endif
#endif // defined(LLVM_LIBC_ARCH_X86)
Conf<Scalar64BitBackend, 1>, //
Conf<Scalar64BitBackend, 2>, //
Conf<Scalar64BitBackend, 4>, //
Conf<Scalar64BitBackend, 8> //
>;

TYPED_TEST(LlvmLibcMemoryBackend, splat, FunctionTypes) {
for (auto value : cpp::Array<uint8_t, 3>{0u, 1u, 255u}) {
alignas(64) const auto stored = ParamType::splat(bit_cast<ubyte>(value));
for (size_t i = 0; i < ParamType::SIZE; ++i)
EXPECT_EQ(bit_cast<uint8_t>(stored[i]), value);
}
}

TYPED_TEST(LlvmLibcMemoryBackend, notEquals, FunctionTypes) {
alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>();
EXPECT_EQ(ParamType::notEquals(a, a), 0UL);
for (size_t i = 0; i < a.size(); ++i) {
alignas(64) auto b = a;
++b[i];
EXPECT_NE(ParamType::notEquals(a, b), 0UL);
EXPECT_NE(ParamType::notEquals(b, a), 0UL);
}
}

TYPED_TEST(LlvmLibcMemoryBackend, threeWayCmp, FunctionTypes) {
alignas(64) const auto a = GetRandomBuffer<ParamType::SIZE>();
EXPECT_EQ(ParamType::threeWayCmp(a, a), 0);
for (size_t i = 0; i < a.size(); ++i) {
alignas(64) auto b = a;
++b[i];
const auto cmp = memcmp(&a, &b, sizeof(a));
ASSERT_NE(cmp, 0);
if (cmp > 0) {
EXPECT_GT(ParamType::threeWayCmp(a, b), 0);
EXPECT_LT(ParamType::threeWayCmp(b, a), 0);
} else {
EXPECT_LT(ParamType::threeWayCmp(a, b), 0);
EXPECT_GT(ParamType::threeWayCmp(b, a), 0);
}
}
}

template <typename Backend, size_t Size, Temporality TS, Aligned AS>
struct LoadStoreConf {
static_assert(Backend::IS_BACKEND_TYPE);
using BufferT = Buffer<Size>;
using T = typename Backend::template getNextType<Size>;
static_assert(sizeof(T) == Size);
static constexpr size_t SIZE = Size;

static BufferT load(const BufferT &ref) {
const auto *ptr = bit_cast<const T *>(ref.data());
const T value = Backend::template load<T, TS, AS>(ptr);
return bit_cast<BufferT>(value);
}

static void store(BufferT &ref, const BufferT value) {
auto *ptr = bit_cast<T *>(ref.data());
Backend::template store<T, TS, AS>(ptr, bit_cast<T>(value));
}
};

using LoadStoreTypes = testing::TypeList< //
#if defined(LLVM_LIBC_ARCH_X86) //
LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<X86Backend, 1, Temporality::TEMPORAL, Aligned::YES>, //
LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<X86Backend, 2, Temporality::TEMPORAL, Aligned::YES>, //
LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<X86Backend, 4, Temporality::TEMPORAL, Aligned::YES>, //
LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<X86Backend, 8, Temporality::TEMPORAL, Aligned::YES>, //
#if HAS_M128
LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<X86Backend, 16, Temporality::TEMPORAL, Aligned::YES>, //
LoadStoreConf<X86Backend, 16, Temporality::NON_TEMPORAL, Aligned::YES>, //
#endif
#if HAS_M256
LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<X86Backend, 32, Temporality::TEMPORAL, Aligned::YES>, //
LoadStoreConf<X86Backend, 32, Temporality::NON_TEMPORAL, Aligned::YES>, //
#endif
#if HAS_M512
LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<X86Backend, 64, Temporality::TEMPORAL, Aligned::YES>, //
LoadStoreConf<X86Backend, 64, Temporality::NON_TEMPORAL, Aligned::YES>, //
#endif
#endif // defined(LLVM_LIBC_ARCH_X86)
LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<Scalar64BitBackend, 1, Temporality::TEMPORAL,
Aligned::YES>, //
LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<Scalar64BitBackend, 2, Temporality::TEMPORAL,
Aligned::YES>, //
LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<Scalar64BitBackend, 4, Temporality::TEMPORAL,
Aligned::YES>, //
LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::NO>, //
LoadStoreConf<Scalar64BitBackend, 8, Temporality::TEMPORAL, Aligned::YES> //
>;

TYPED_TEST(LlvmLibcMemoryBackend, load, LoadStoreTypes) {
alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>();
const auto loaded = ParamType::load(expected);
for (size_t i = 0; i < ParamType::SIZE; ++i)
EXPECT_EQ(loaded[i], expected[i]);
}

TYPED_TEST(LlvmLibcMemoryBackend, store, LoadStoreTypes) {
alignas(64) const auto expected = GetRandomBuffer<ParamType::SIZE>();
alignas(64) typename ParamType::BufferT stored;
ParamType::store(stored, expected);
for (size_t i = 0; i < ParamType::SIZE; ++i)
EXPECT_EQ(stored[i], expected[i]);
}

} // namespace __llvm_libc