-
Notifications
You must be signed in to change notification settings - Fork 10.8k
/
memcpy_implementations.h
143 lines (129 loc) · 5.2 KB
/
memcpy_implementations.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
//===-- Memcpy implementation -----------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H
#include "src/__support/architectures.h"
#include "src/__support/common.h"
#include "src/string/memory_utils/op_aarch64.h"
#include "src/string/memory_utils/op_builtin.h"
#include "src/string/memory_utils/op_generic.h"
#include "src/string/memory_utils/op_x86.h"
#include "src/string/memory_utils/utils.h"
#include <stddef.h> // size_t
namespace __llvm_libc {
[[maybe_unused]] static inline void
inline_memcpy_embedded_tiny(Ptr __restrict dst, CPtr __restrict src,
size_t count) {
#pragma nounroll
for (size_t offset = 0; offset < count; ++offset)
builtin::Memcpy<1>::block(dst + offset, src + offset);
}
#if defined(LLVM_LIBC_ARCH_X86)
[[maybe_unused]] static inline void
inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) {
if (count == 0)
return;
if (count == 1)
return builtin::Memcpy<1>::block(dst, src);
if (count == 2)
return builtin::Memcpy<2>::block(dst, src);
if (count == 3)
return builtin::Memcpy<3>::block(dst, src);
if (count == 4)
return builtin::Memcpy<4>::block(dst, src);
if (count < 8)
return builtin::Memcpy<4>::head_tail(dst, src, count);
if (count < 16)
return builtin::Memcpy<8>::head_tail(dst, src, count);
if (count < 32)
return builtin::Memcpy<16>::head_tail(dst, src, count);
if (count < 64)
return builtin::Memcpy<32>::head_tail(dst, src, count);
if (count < 128)
return builtin::Memcpy<64>::head_tail(dst, src, count);
if (x86::kAvx && count < 256)
return builtin::Memcpy<128>::head_tail(dst, src, count);
builtin::Memcpy<32>::block(dst, src);
align_to_next_boundary<32, Arg::Dst>(dst, src, count);
static constexpr size_t kBlockSize = x86::kAvx ? 64 : 32;
return builtin::Memcpy<kBlockSize>::loop_and_tail(dst, src, count);
}
[[maybe_unused]] static inline void
inline_memcpy_x86_maybe_interpose_repmovsb(Ptr __restrict dst,
CPtr __restrict src, size_t count) {
// Whether to use rep;movsb exclusively, not at all, or only above a certain
// threshold.
#ifndef LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE
#define LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE -1
#endif
#ifdef LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB
#error LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB is deprecated use LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE=0 instead.
#endif // LLVM_LIBC_MEMCPY_X86_USE_ONLY_REPMOVSB
static constexpr size_t kRepMovsbThreshold =
LLVM_LIBC_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE;
if constexpr (kRepMovsbThreshold == 0)
return x86::Memcpy::repmovsb(dst, src, count);
else if constexpr (kRepMovsbThreshold > 0) {
if (unlikely(count >= kRepMovsbThreshold))
return x86::Memcpy::repmovsb(dst, src, count);
else
return inline_memcpy_x86(dst, src, count);
} else {
return inline_memcpy_x86(dst, src, count);
}
}
#endif // defined(LLVM_LIBC_ARCH_X86)
#if defined(LLVM_LIBC_ARCH_AARCH64)
[[maybe_unused]] static inline void
inline_memcpy_aarch64(Ptr __restrict dst, CPtr __restrict src, size_t count) {
if (count == 0)
return;
if (count == 1)
return builtin::Memcpy<1>::block(dst, src);
if (count == 2)
return builtin::Memcpy<2>::block(dst, src);
if (count == 3)
return builtin::Memcpy<3>::block(dst, src);
if (count == 4)
return builtin::Memcpy<4>::block(dst, src);
if (count < 8)
return builtin::Memcpy<4>::head_tail(dst, src, count);
if (count < 16)
return builtin::Memcpy<8>::head_tail(dst, src, count);
if (count < 32)
return builtin::Memcpy<16>::head_tail(dst, src, count);
if (count < 64)
return builtin::Memcpy<32>::head_tail(dst, src, count);
if (count < 128)
return builtin::Memcpy<64>::head_tail(dst, src, count);
builtin::Memcpy<16>::block(dst, src);
align_to_next_boundary<16, Arg::Src>(dst, src, count);
return builtin::Memcpy<64>::loop_and_tail(dst, src, count);
}
#endif // defined(LLVM_LIBC_ARCH_AARCH64)
static inline void inline_memcpy(Ptr __restrict dst, CPtr __restrict src,
size_t count) {
using namespace __llvm_libc::builtin;
#if defined(LLVM_LIBC_ARCH_X86)
return inline_memcpy_x86_maybe_interpose_repmovsb(dst, src, count);
#elif defined(LLVM_LIBC_ARCH_AARCH64)
return inline_memcpy_aarch64(dst, src, count);
#elif defined(LLVM_LIBC_ARCH_ARM)
return inline_memcpy_embedded_tiny(dst, src, count);
#elif defined(LLVM_LIBC_ARCH_GPU)
return inline_memcpy_embedded_tiny(dst, src, count);
#else
#error "Unsupported platform"
#endif
}
static inline void inline_memcpy(void *__restrict dst,
const void *__restrict src, size_t count) {
inline_memcpy(reinterpret_cast<Ptr>(dst), reinterpret_cast<CPtr>(src), count);
}
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCPY_IMPLEMENTATIONS_H