Skip to content

Commit

Permalink
[libc][mem*] Introduce Sized/Backends for new mem framework
Browse files Browse the repository at this point in the history
This patch is a subpart of D125768 intented to make the review easier.

The `SizedOp` struct represents operations to be performed on a certain number of bytes.
It is responsible for breaking them down into platform types and forwarded to the `Backend`.

The `Backend` struct represents a lower level abstraction that works only on types (`uint8_t`, `__m128i`, ...).
It is similar to instruction selection.

Differential Revision: https://reviews.llvm.org/D126768
  • Loading branch information
gchatelet committed Jun 22, 2022
1 parent fc655a9 commit 67fe3bd
Show file tree
Hide file tree
Showing 7 changed files with 831 additions and 0 deletions.
71 changes: 71 additions & 0 deletions libc/src/string/memory_utils/backend_aarch64.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
//===-- Elementary operations for aarch64 ---------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H

#if !defined(LLVM_LIBC_ARCH_AARCH64)
#include "src/string/memory_utils/backend_scalar.h"

#ifdef __ARM_NEON
#include <arm_neon.h>
#endif

namespace __llvm_libc {

struct Aarch64Backend : public Scalar64BitBackend {
static constexpr bool IS_BACKEND_TYPE = true;

template <typename T, Temporality TS, Aligned AS,
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
static inline T load(const T *src) {
return Scalar64BitBackend::template load<T, TS, AS>(src);
}
};

// Implementation of the SizedOp abstraction for the set operation.
struct Zva64 {
static constexpr size_t SIZE = 64;

template <typename DstAddrT>
static inline void set(DstAddrT dst, ubyte value) {
#if __SIZEOF_POINTER__ == 4
asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory");
#else
asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory");
#endif
}
};

inline static bool hasZva() {
uint64_t zva_val;
asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val));
// DC ZVA is permitted if DZP, bit [4] is zero.
// BS, bits [3:0] is log2 of the block size in words.
// So the next line checks whether the instruction is permitted and block size
// is 16 words (i.e. 64 bytes).
return (zva_val & 0b11111) == 0b00100;
}

namespace aarch64 {
using _1 = SizedOp<Aarch64Backend, 1>;
using _2 = SizedOp<Aarch64Backend, 2>;
using _3 = SizedOp<Aarch64Backend, 3>;
using _4 = SizedOp<Aarch64Backend, 4>;
using _8 = SizedOp<Aarch64Backend, 8>;
using _16 = SizedOp<Aarch64Backend, 16>;
using _32 = SizedOp<Aarch64Backend, 32>;
using _64 = SizedOp<Aarch64Backend, 64>;
using _128 = SizedOp<Aarch64Backend, 128>;
} // namespace aarch64

} // namespace __llvm_libc

#endif // LLVM_LIBC_ARCH_AARCH64

#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H
104 changes: 104 additions & 0 deletions libc/src/string/memory_utils/backend_scalar.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//===-- Elementary operations for native scalar types ---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H

#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType
#include "src/__support/endian.h"

namespace __llvm_libc {

struct Scalar64BitBackend {
static constexpr bool IS_BACKEND_TYPE = true;

template <typename T>
static constexpr bool IsScalarType =
cpp::IsSameV<T, uint8_t> || cpp::IsSameV<T, uint16_t> ||
cpp::IsSameV<T, uint32_t> || cpp::IsSameV<T, uint64_t>;

template <typename T, Temporality TS, Aligned AS>
static inline T load(const T *src) {
static_assert(IsScalarType<T>);
static_assert(TS == Temporality::TEMPORAL,
"Scalar load does not support non-temporal access");
return *src;
}

template <typename T, Temporality TS, Aligned AS>
static inline void store(T *dst, T value) {
static_assert(IsScalarType<T>);
static_assert(TS == Temporality::TEMPORAL,
"Scalar store does not support non-temporal access");
*dst = value;
}

template <typename T> static inline T splat(ubyte value) {
static_assert(IsScalarType<T>);
return (T(~0ULL) / T(0xFF)) * T(value);
}

template <typename T> static inline uint64_t notEquals(T v1, T v2) {
static_assert(IsScalarType<T>);
return v1 ^ v2;
}

template <typename T> static inline int32_t threeWayCmp(T v1, T v2) {
DeferredStaticAssert("not implemented");
}

// Returns the type to use to consume Size bytes.
template <size_t Size>
using getNextType = cpp::ConditionalType<
Size >= 8, uint64_t,
cpp::ConditionalType<Size >= 4, uint32_t,
cpp::ConditionalType<Size >= 2, uint16_t, uint8_t>>>;
};

template <>
int32_t inline Scalar64BitBackend::threeWayCmp<uint8_t>(uint8_t a, uint8_t b) {
const int16_t la = Endian::to_big_endian(a);
const int16_t lb = Endian::to_big_endian(b);
return la - lb;
}
template <>
int32_t inline Scalar64BitBackend::threeWayCmp<uint16_t>(uint16_t a,
uint16_t b) {
const int32_t la = Endian::to_big_endian(a);
const int32_t lb = Endian::to_big_endian(b);
return la - lb;
}
template <>
int32_t inline Scalar64BitBackend::threeWayCmp<uint32_t>(uint32_t a,
uint32_t b) {
const uint32_t la = Endian::to_big_endian(a);
const uint32_t lb = Endian::to_big_endian(b);
return la > lb ? 1 : la < lb ? -1 : 0;
}
template <>
int32_t inline Scalar64BitBackend::threeWayCmp<uint64_t>(uint64_t a,
uint64_t b) {
const uint64_t la = Endian::to_big_endian(a);
const uint64_t lb = Endian::to_big_endian(b);
return la > lb ? 1 : la < lb ? -1 : 0;
}

namespace scalar {
using _1 = SizedOp<Scalar64BitBackend, 1>;
using _2 = SizedOp<Scalar64BitBackend, 2>;
using _3 = SizedOp<Scalar64BitBackend, 3>;
using _4 = SizedOp<Scalar64BitBackend, 4>;
using _8 = SizedOp<Scalar64BitBackend, 8>;
using _16 = SizedOp<Scalar64BitBackend, 16>;
using _32 = SizedOp<Scalar64BitBackend, 32>;
using _64 = SizedOp<Scalar64BitBackend, 64>;
using _128 = SizedOp<Scalar64BitBackend, 128>;
} // namespace scalar

} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H
221 changes: 221 additions & 0 deletions libc/src/string/memory_utils/backend_x86.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
//===-- Elementary operations for x86 -------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H

#if defined(LLVM_LIBC_ARCH_X86)
#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType
#include "src/string/memory_utils/backend_scalar.h"

#ifdef __SSE2__
#include <immintrin.h>
#endif // __SSE2__

#if defined(__SSE2__)
#define HAS_M128 true
#else
#define HAS_M128 false
#endif

#if defined(__AVX2__)
#define HAS_M256 true
#else
#define HAS_M256 false
#endif

#if defined(__AVX512F__) and defined(__AVX512BW__)
#define HAS_M512 true
#else
#define HAS_M512 false
#endif

namespace __llvm_libc {
struct X86Backend : public Scalar64BitBackend {
static constexpr bool IS_BACKEND_TYPE = true;

// Scalar types use base class implementations.
template <typename T, Temporality TS, Aligned AS,
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
static inline T load(const T *src) {
return Scalar64BitBackend::template load<T, TS, AS>(src);
}

// Scalar types use base class implementations.
template <typename T, Temporality TS, Aligned AS,
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
static inline void store(T *dst, T value) {
Scalar64BitBackend::template store<T, TS, AS>(dst, value);
}

// Scalar types use base class implementations.
template <typename T,
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
static inline uint64_t notEquals(T v1, T v2) {
return Scalar64BitBackend::template notEquals<T>(v1, v2);
}

// Scalar types use base class implementations.
template <typename T,
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
static inline T splat(ubyte value) {
return Scalar64BitBackend::template splat<T>(value);
}

// Scalar types use base class implementations.
template <typename T,
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true>
static inline int32_t threeWayCmp(T v1, T v2) {
return Scalar64BitBackend::template threeWayCmp<T>(v1, v2);
}

// X86 types are specialized below.
template <
typename T, Temporality TS, Aligned AS,
cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
static inline T load(const T *src);

// X86 types are specialized below.
template <
typename T, Temporality TS, Aligned AS,
cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, bool> = true>
static inline void store(T *dst, T value);

// X86 types are specialized below.
template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>,
bool> = true>
static inline T splat(ubyte value);

// X86 types are specialized below.
template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>,
bool> = true>
static inline uint64_t notEquals(T v1, T v2);

template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>,
bool> = true>
static inline int32_t threeWayCmp(T v1, T v2) {
return char_diff(reinterpret_cast<char *>(&v1),
reinterpret_cast<char *>(&v2), notEquals(v1, v2));
}

// Returns the type to use to consume Size bytes.
template <size_t Size>
using getNextType = cpp::ConditionalType<
(HAS_M512 && Size >= 64), __m512i,
cpp::ConditionalType<
(HAS_M256 && Size >= 32), __m256i,
cpp::ConditionalType<(HAS_M128 && Size >= 16), __m128i,
Scalar64BitBackend::getNextType<Size>>>>;

private:
static inline int32_t char_diff(const char *a, const char *b, uint64_t mask) {
const size_t diff_index = mask == 0 ? 0 : __builtin_ctzll(mask);
const int16_t ca = (unsigned char)a[diff_index];
const int16_t cb = (unsigned char)b[diff_index];
return ca - cb;
}
};

static inline void repmovsb(void *dst, const void *src, size_t runtime_size) {
asm volatile("rep movsb"
: "+D"(dst), "+S"(src), "+c"(runtime_size)
:
: "memory");
}

#define SPECIALIZE_LOAD(T, OS, AS, INTRISIC) \
template <> inline T X86Backend::load<T, OS, AS>(const T *src) { \
return INTRISIC(const_cast<T *>(src)); \
}
#define SPECIALIZE_STORE(T, OS, AS, INTRISIC) \
template <> inline void X86Backend::store<T, OS, AS>(T * dst, T value) { \
INTRISIC(dst, value); \
}

#if HAS_M128
SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_load_si128)
SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_loadu_si128)
SPECIALIZE_LOAD(__m128i, Temporality::NON_TEMPORAL, Aligned::YES,
_mm_stream_load_si128)
// X86 non-temporal load needs aligned access
SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_store_si128)
SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_storeu_si128)
SPECIALIZE_STORE(__m128i, Temporality::NON_TEMPORAL, Aligned::YES,
_mm_stream_si128)
// X86 non-temporal store needs aligned access
template <> inline __m128i X86Backend::splat<__m128i>(ubyte value) {
return _mm_set1_epi8(__builtin_bit_cast(char, value));
}
template <>
inline uint64_t X86Backend::notEquals<__m128i>(__m128i a, __m128i b) {
using T = char __attribute__((__vector_size__(16)));
return _mm_movemask_epi8(T(a) != T(b));
}
#endif // HAS_M128

#if HAS_M256
SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::YES, _mm256_load_si256)
SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::NO, _mm256_loadu_si256)
SPECIALIZE_LOAD(__m256i, Temporality::NON_TEMPORAL, Aligned::YES,
_mm256_stream_load_si256)
// X86 non-temporal load needs aligned access
SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::YES,
_mm256_store_si256)
SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::NO,
_mm256_storeu_si256)
SPECIALIZE_STORE(__m256i, Temporality::NON_TEMPORAL, Aligned::YES,
_mm256_stream_si256)
// X86 non-temporal store needs aligned access
template <> inline __m256i X86Backend::splat<__m256i>(ubyte value) {
return _mm256_set1_epi8(__builtin_bit_cast(char, value));
}
template <>
inline uint64_t X86Backend::notEquals<__m256i>(__m256i a, __m256i b) {
using T = char __attribute__((__vector_size__(32)));
return _mm256_movemask_epi8(T(a) != T(b));
}
#endif // HAS_M256

#if HAS_M512
SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::YES, _mm512_load_si512)
SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::NO, _mm512_loadu_si512)
SPECIALIZE_LOAD(__m512i, Temporality::NON_TEMPORAL, Aligned::YES,
_mm512_stream_load_si512)
// X86 non-temporal load needs aligned access
SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::YES,
_mm512_store_si512)
SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::NO,
_mm512_storeu_si512)
SPECIALIZE_STORE(__m512i, Temporality::NON_TEMPORAL, Aligned::YES,
_mm512_stream_si512)
// X86 non-temporal store needs aligned access
template <> inline __m512i X86Backend::splat<__m512i>(ubyte value) {
return _mm512_broadcastb_epi8(_mm_set1_epi8(__builtin_bit_cast(char, value)));
}
template <>
inline uint64_t X86Backend::notEquals<__m512i>(__m512i a, __m512i b) {
return _mm512_cmpneq_epi8_mask(a, b);
}
#endif // HAS_M512

namespace x86 {
using _1 = SizedOp<X86Backend, 1>;
using _2 = SizedOp<X86Backend, 2>;
using _3 = SizedOp<X86Backend, 3>;
using _4 = SizedOp<X86Backend, 4>;
using _8 = SizedOp<X86Backend, 8>;
using _16 = SizedOp<X86Backend, 16>;
using _32 = SizedOp<X86Backend, 32>;
using _64 = SizedOp<X86Backend, 64>;
using _128 = SizedOp<X86Backend, 128>;
} // namespace x86

} // namespace __llvm_libc

#endif // defined(LLVM_LIBC_ARCH_X86)

#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H
Loading

0 comments on commit 67fe3bd

Please sign in to comment.