-
Notifications
You must be signed in to change notification settings - Fork 11k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[libc][mem*] Introduce Sized/Backends for new mem framework
This patch is a subpart of D125768 intented to make the review easier. The `SizedOp` struct represents operations to be performed on a certain number of bytes. It is responsible for breaking them down into platform types and forwarded to the `Backend`. The `Backend` struct represents a lower level abstraction that works only on types (`uint8_t`, `__m128i`, ...). It is similar to instruction selection. Differential Revision: https://reviews.llvm.org/D126768
- Loading branch information
Showing
7 changed files
with
831 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
//===-- Elementary operations for aarch64 ---------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H | ||
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H | ||
|
||
#if !defined(LLVM_LIBC_ARCH_AARCH64) | ||
#include "src/string/memory_utils/backend_scalar.h" | ||
|
||
#ifdef __ARM_NEON | ||
#include <arm_neon.h> | ||
#endif | ||
|
||
namespace __llvm_libc { | ||
|
||
struct Aarch64Backend : public Scalar64BitBackend { | ||
static constexpr bool IS_BACKEND_TYPE = true; | ||
|
||
template <typename T, Temporality TS, Aligned AS, | ||
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true> | ||
static inline T load(const T *src) { | ||
return Scalar64BitBackend::template load<T, TS, AS>(src); | ||
} | ||
}; | ||
|
||
// Implementation of the SizedOp abstraction for the set operation. | ||
struct Zva64 { | ||
static constexpr size_t SIZE = 64; | ||
|
||
template <typename DstAddrT> | ||
static inline void set(DstAddrT dst, ubyte value) { | ||
#if __SIZEOF_POINTER__ == 4 | ||
asm("dc zva, %w[dst]" : : [dst] "r"(dst) : "memory"); | ||
#else | ||
asm("dc zva, %[dst]" : : [dst] "r"(dst) : "memory"); | ||
#endif | ||
} | ||
}; | ||
|
||
inline static bool hasZva() { | ||
uint64_t zva_val; | ||
asm("mrs %[zva_val], dczid_el0" : [zva_val] "=r"(zva_val)); | ||
// DC ZVA is permitted if DZP, bit [4] is zero. | ||
// BS, bits [3:0] is log2 of the block size in words. | ||
// So the next line checks whether the instruction is permitted and block size | ||
// is 16 words (i.e. 64 bytes). | ||
return (zva_val & 0b11111) == 0b00100; | ||
} | ||
|
||
namespace aarch64 { | ||
using _1 = SizedOp<Aarch64Backend, 1>; | ||
using _2 = SizedOp<Aarch64Backend, 2>; | ||
using _3 = SizedOp<Aarch64Backend, 3>; | ||
using _4 = SizedOp<Aarch64Backend, 4>; | ||
using _8 = SizedOp<Aarch64Backend, 8>; | ||
using _16 = SizedOp<Aarch64Backend, 16>; | ||
using _32 = SizedOp<Aarch64Backend, 32>; | ||
using _64 = SizedOp<Aarch64Backend, 64>; | ||
using _128 = SizedOp<Aarch64Backend, 128>; | ||
} // namespace aarch64 | ||
|
||
} // namespace __llvm_libc | ||
|
||
#endif // LLVM_LIBC_ARCH_AARCH64 | ||
|
||
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_AARCH64_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,104 @@ | ||
//===-- Elementary operations for native scalar types ---------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H | ||
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H | ||
|
||
#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType | ||
#include "src/__support/endian.h" | ||
|
||
namespace __llvm_libc { | ||
|
||
struct Scalar64BitBackend { | ||
static constexpr bool IS_BACKEND_TYPE = true; | ||
|
||
template <typename T> | ||
static constexpr bool IsScalarType = | ||
cpp::IsSameV<T, uint8_t> || cpp::IsSameV<T, uint16_t> || | ||
cpp::IsSameV<T, uint32_t> || cpp::IsSameV<T, uint64_t>; | ||
|
||
template <typename T, Temporality TS, Aligned AS> | ||
static inline T load(const T *src) { | ||
static_assert(IsScalarType<T>); | ||
static_assert(TS == Temporality::TEMPORAL, | ||
"Scalar load does not support non-temporal access"); | ||
return *src; | ||
} | ||
|
||
template <typename T, Temporality TS, Aligned AS> | ||
static inline void store(T *dst, T value) { | ||
static_assert(IsScalarType<T>); | ||
static_assert(TS == Temporality::TEMPORAL, | ||
"Scalar store does not support non-temporal access"); | ||
*dst = value; | ||
} | ||
|
||
template <typename T> static inline T splat(ubyte value) { | ||
static_assert(IsScalarType<T>); | ||
return (T(~0ULL) / T(0xFF)) * T(value); | ||
} | ||
|
||
template <typename T> static inline uint64_t notEquals(T v1, T v2) { | ||
static_assert(IsScalarType<T>); | ||
return v1 ^ v2; | ||
} | ||
|
||
template <typename T> static inline int32_t threeWayCmp(T v1, T v2) { | ||
DeferredStaticAssert("not implemented"); | ||
} | ||
|
||
// Returns the type to use to consume Size bytes. | ||
template <size_t Size> | ||
using getNextType = cpp::ConditionalType< | ||
Size >= 8, uint64_t, | ||
cpp::ConditionalType<Size >= 4, uint32_t, | ||
cpp::ConditionalType<Size >= 2, uint16_t, uint8_t>>>; | ||
}; | ||
|
||
template <> | ||
int32_t inline Scalar64BitBackend::threeWayCmp<uint8_t>(uint8_t a, uint8_t b) { | ||
const int16_t la = Endian::to_big_endian(a); | ||
const int16_t lb = Endian::to_big_endian(b); | ||
return la - lb; | ||
} | ||
template <> | ||
int32_t inline Scalar64BitBackend::threeWayCmp<uint16_t>(uint16_t a, | ||
uint16_t b) { | ||
const int32_t la = Endian::to_big_endian(a); | ||
const int32_t lb = Endian::to_big_endian(b); | ||
return la - lb; | ||
} | ||
template <> | ||
int32_t inline Scalar64BitBackend::threeWayCmp<uint32_t>(uint32_t a, | ||
uint32_t b) { | ||
const uint32_t la = Endian::to_big_endian(a); | ||
const uint32_t lb = Endian::to_big_endian(b); | ||
return la > lb ? 1 : la < lb ? -1 : 0; | ||
} | ||
template <> | ||
int32_t inline Scalar64BitBackend::threeWayCmp<uint64_t>(uint64_t a, | ||
uint64_t b) { | ||
const uint64_t la = Endian::to_big_endian(a); | ||
const uint64_t lb = Endian::to_big_endian(b); | ||
return la > lb ? 1 : la < lb ? -1 : 0; | ||
} | ||
|
||
namespace scalar { | ||
using _1 = SizedOp<Scalar64BitBackend, 1>; | ||
using _2 = SizedOp<Scalar64BitBackend, 2>; | ||
using _3 = SizedOp<Scalar64BitBackend, 3>; | ||
using _4 = SizedOp<Scalar64BitBackend, 4>; | ||
using _8 = SizedOp<Scalar64BitBackend, 8>; | ||
using _16 = SizedOp<Scalar64BitBackend, 16>; | ||
using _32 = SizedOp<Scalar64BitBackend, 32>; | ||
using _64 = SizedOp<Scalar64BitBackend, 64>; | ||
using _128 = SizedOp<Scalar64BitBackend, 128>; | ||
} // namespace scalar | ||
|
||
} // namespace __llvm_libc | ||
|
||
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_SCALAR_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
//===-- Elementary operations for x86 -------------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H | ||
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H | ||
|
||
#if defined(LLVM_LIBC_ARCH_X86) | ||
#include "src/__support/CPP/TypeTraits.h" // ConditionalType, EnableIfType | ||
#include "src/string/memory_utils/backend_scalar.h" | ||
|
||
#ifdef __SSE2__ | ||
#include <immintrin.h> | ||
#endif // __SSE2__ | ||
|
||
#if defined(__SSE2__) | ||
#define HAS_M128 true | ||
#else | ||
#define HAS_M128 false | ||
#endif | ||
|
||
#if defined(__AVX2__) | ||
#define HAS_M256 true | ||
#else | ||
#define HAS_M256 false | ||
#endif | ||
|
||
#if defined(__AVX512F__) and defined(__AVX512BW__) | ||
#define HAS_M512 true | ||
#else | ||
#define HAS_M512 false | ||
#endif | ||
|
||
namespace __llvm_libc { | ||
struct X86Backend : public Scalar64BitBackend { | ||
static constexpr bool IS_BACKEND_TYPE = true; | ||
|
||
// Scalar types use base class implementations. | ||
template <typename T, Temporality TS, Aligned AS, | ||
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true> | ||
static inline T load(const T *src) { | ||
return Scalar64BitBackend::template load<T, TS, AS>(src); | ||
} | ||
|
||
// Scalar types use base class implementations. | ||
template <typename T, Temporality TS, Aligned AS, | ||
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true> | ||
static inline void store(T *dst, T value) { | ||
Scalar64BitBackend::template store<T, TS, AS>(dst, value); | ||
} | ||
|
||
// Scalar types use base class implementations. | ||
template <typename T, | ||
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true> | ||
static inline uint64_t notEquals(T v1, T v2) { | ||
return Scalar64BitBackend::template notEquals<T>(v1, v2); | ||
} | ||
|
||
// Scalar types use base class implementations. | ||
template <typename T, | ||
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true> | ||
static inline T splat(ubyte value) { | ||
return Scalar64BitBackend::template splat<T>(value); | ||
} | ||
|
||
// Scalar types use base class implementations. | ||
template <typename T, | ||
cpp::EnableIfType<Scalar64BitBackend::IsScalarType<T>, bool> = true> | ||
static inline int32_t threeWayCmp(T v1, T v2) { | ||
return Scalar64BitBackend::template threeWayCmp<T>(v1, v2); | ||
} | ||
|
||
// X86 types are specialized below. | ||
template < | ||
typename T, Temporality TS, Aligned AS, | ||
cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, bool> = true> | ||
static inline T load(const T *src); | ||
|
||
// X86 types are specialized below. | ||
template < | ||
typename T, Temporality TS, Aligned AS, | ||
cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, bool> = true> | ||
static inline void store(T *dst, T value); | ||
|
||
// X86 types are specialized below. | ||
template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, | ||
bool> = true> | ||
static inline T splat(ubyte value); | ||
|
||
// X86 types are specialized below. | ||
template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, | ||
bool> = true> | ||
static inline uint64_t notEquals(T v1, T v2); | ||
|
||
template <typename T, cpp::EnableIfType<!Scalar64BitBackend::IsScalarType<T>, | ||
bool> = true> | ||
static inline int32_t threeWayCmp(T v1, T v2) { | ||
return char_diff(reinterpret_cast<char *>(&v1), | ||
reinterpret_cast<char *>(&v2), notEquals(v1, v2)); | ||
} | ||
|
||
// Returns the type to use to consume Size bytes. | ||
template <size_t Size> | ||
using getNextType = cpp::ConditionalType< | ||
(HAS_M512 && Size >= 64), __m512i, | ||
cpp::ConditionalType< | ||
(HAS_M256 && Size >= 32), __m256i, | ||
cpp::ConditionalType<(HAS_M128 && Size >= 16), __m128i, | ||
Scalar64BitBackend::getNextType<Size>>>>; | ||
|
||
private: | ||
static inline int32_t char_diff(const char *a, const char *b, uint64_t mask) { | ||
const size_t diff_index = mask == 0 ? 0 : __builtin_ctzll(mask); | ||
const int16_t ca = (unsigned char)a[diff_index]; | ||
const int16_t cb = (unsigned char)b[diff_index]; | ||
return ca - cb; | ||
} | ||
}; | ||
|
||
static inline void repmovsb(void *dst, const void *src, size_t runtime_size) { | ||
asm volatile("rep movsb" | ||
: "+D"(dst), "+S"(src), "+c"(runtime_size) | ||
: | ||
: "memory"); | ||
} | ||
|
||
#define SPECIALIZE_LOAD(T, OS, AS, INTRISIC) \ | ||
template <> inline T X86Backend::load<T, OS, AS>(const T *src) { \ | ||
return INTRISIC(const_cast<T *>(src)); \ | ||
} | ||
#define SPECIALIZE_STORE(T, OS, AS, INTRISIC) \ | ||
template <> inline void X86Backend::store<T, OS, AS>(T * dst, T value) { \ | ||
INTRISIC(dst, value); \ | ||
} | ||
|
||
#if HAS_M128 | ||
SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_load_si128) | ||
SPECIALIZE_LOAD(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_loadu_si128) | ||
SPECIALIZE_LOAD(__m128i, Temporality::NON_TEMPORAL, Aligned::YES, | ||
_mm_stream_load_si128) | ||
// X86 non-temporal load needs aligned access | ||
SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::YES, _mm_store_si128) | ||
SPECIALIZE_STORE(__m128i, Temporality::TEMPORAL, Aligned::NO, _mm_storeu_si128) | ||
SPECIALIZE_STORE(__m128i, Temporality::NON_TEMPORAL, Aligned::YES, | ||
_mm_stream_si128) | ||
// X86 non-temporal store needs aligned access | ||
template <> inline __m128i X86Backend::splat<__m128i>(ubyte value) { | ||
return _mm_set1_epi8(__builtin_bit_cast(char, value)); | ||
} | ||
template <> | ||
inline uint64_t X86Backend::notEquals<__m128i>(__m128i a, __m128i b) { | ||
using T = char __attribute__((__vector_size__(16))); | ||
return _mm_movemask_epi8(T(a) != T(b)); | ||
} | ||
#endif // HAS_M128 | ||
|
||
#if HAS_M256 | ||
SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::YES, _mm256_load_si256) | ||
SPECIALIZE_LOAD(__m256i, Temporality::TEMPORAL, Aligned::NO, _mm256_loadu_si256) | ||
SPECIALIZE_LOAD(__m256i, Temporality::NON_TEMPORAL, Aligned::YES, | ||
_mm256_stream_load_si256) | ||
// X86 non-temporal load needs aligned access | ||
SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::YES, | ||
_mm256_store_si256) | ||
SPECIALIZE_STORE(__m256i, Temporality::TEMPORAL, Aligned::NO, | ||
_mm256_storeu_si256) | ||
SPECIALIZE_STORE(__m256i, Temporality::NON_TEMPORAL, Aligned::YES, | ||
_mm256_stream_si256) | ||
// X86 non-temporal store needs aligned access | ||
template <> inline __m256i X86Backend::splat<__m256i>(ubyte value) { | ||
return _mm256_set1_epi8(__builtin_bit_cast(char, value)); | ||
} | ||
template <> | ||
inline uint64_t X86Backend::notEquals<__m256i>(__m256i a, __m256i b) { | ||
using T = char __attribute__((__vector_size__(32))); | ||
return _mm256_movemask_epi8(T(a) != T(b)); | ||
} | ||
#endif // HAS_M256 | ||
|
||
#if HAS_M512 | ||
SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::YES, _mm512_load_si512) | ||
SPECIALIZE_LOAD(__m512i, Temporality::TEMPORAL, Aligned::NO, _mm512_loadu_si512) | ||
SPECIALIZE_LOAD(__m512i, Temporality::NON_TEMPORAL, Aligned::YES, | ||
_mm512_stream_load_si512) | ||
// X86 non-temporal load needs aligned access | ||
SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::YES, | ||
_mm512_store_si512) | ||
SPECIALIZE_STORE(__m512i, Temporality::TEMPORAL, Aligned::NO, | ||
_mm512_storeu_si512) | ||
SPECIALIZE_STORE(__m512i, Temporality::NON_TEMPORAL, Aligned::YES, | ||
_mm512_stream_si512) | ||
// X86 non-temporal store needs aligned access | ||
template <> inline __m512i X86Backend::splat<__m512i>(ubyte value) { | ||
return _mm512_broadcastb_epi8(_mm_set1_epi8(__builtin_bit_cast(char, value))); | ||
} | ||
template <> | ||
inline uint64_t X86Backend::notEquals<__m512i>(__m512i a, __m512i b) { | ||
return _mm512_cmpneq_epi8_mask(a, b); | ||
} | ||
#endif // HAS_M512 | ||
|
||
namespace x86 { | ||
using _1 = SizedOp<X86Backend, 1>; | ||
using _2 = SizedOp<X86Backend, 2>; | ||
using _3 = SizedOp<X86Backend, 3>; | ||
using _4 = SizedOp<X86Backend, 4>; | ||
using _8 = SizedOp<X86Backend, 8>; | ||
using _16 = SizedOp<X86Backend, 16>; | ||
using _32 = SizedOp<X86Backend, 32>; | ||
using _64 = SizedOp<X86Backend, 64>; | ||
using _128 = SizedOp<X86Backend, 128>; | ||
} // namespace x86 | ||
|
||
} // namespace __llvm_libc | ||
|
||
#endif // defined(LLVM_LIBC_ARCH_X86) | ||
|
||
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_BACKEND_X86_H |
Oops, something went wrong.