-
Notifications
You must be signed in to change notification settings - Fork 10.8k
/
memcmp_implementations.h
155 lines (142 loc) · 5.68 KB
/
memcmp_implementations.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
//===-- Implementation of memcmp ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP_IMPLEMENTATIONS_H
#define LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP_IMPLEMENTATIONS_H
#include "src/__support/architectures.h"
#include "src/__support/common.h"
#include "src/string/memory_utils/op_aarch64.h"
#include "src/string/memory_utils/op_builtin.h"
#include "src/string/memory_utils/op_generic.h"
#include "src/string/memory_utils/op_x86.h"
#include "src/string/memory_utils/utils.h"
#include <stddef.h> // size_t
namespace __llvm_libc {
[[maybe_unused]] static inline MemcmpReturnType
inline_memcmp_embedded_tiny(CPtr p1, CPtr p2, size_t count) {
#pragma nounroll
for (size_t offset = 0; offset < count; ++offset)
if (auto value = generic::Memcmp<1>::block(p1 + offset, p2 + offset))
return value;
return MemcmpReturnType::ZERO();
}
#if defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)
[[maybe_unused]] static inline MemcmpReturnType
inline_memcmp_generic_gt16(CPtr p1, CPtr p2, size_t count) {
if (unlikely(count >= 384)) {
if (auto value = generic::Memcmp<16>::block(p1, p2))
return value;
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
}
return generic::Memcmp<16>::loop_and_tail(p1, p2, count);
}
#endif // defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)
#if defined(LLVM_LIBC_ARCH_X86)
[[maybe_unused]] static inline MemcmpReturnType
inline_memcmp_x86_sse2_gt16(CPtr p1, CPtr p2, size_t count) {
if (unlikely(count >= 384)) {
if (auto value = x86::sse2::Memcmp<16>::block(p1, p2))
return value;
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
}
return x86::sse2::Memcmp<16>::loop_and_tail(p1, p2, count);
}
[[maybe_unused]] static inline MemcmpReturnType
inline_memcmp_x86_avx2_gt16(CPtr p1, CPtr p2, size_t count) {
if (count <= 32)
return x86::sse2::Memcmp<16>::head_tail(p1, p2, count);
if (count <= 64)
return x86::avx2::Memcmp<32>::head_tail(p1, p2, count);
if (count <= 128)
return x86::avx2::Memcmp<64>::head_tail(p1, p2, count);
if (unlikely(count >= 384)) {
if (auto value = x86::avx2::Memcmp<32>::block(p1, p2))
return value;
align_to_next_boundary<32, Arg::P1>(p1, p2, count);
}
return x86::avx2::Memcmp<32>::loop_and_tail(p1, p2, count);
}
[[maybe_unused]] static inline MemcmpReturnType
inline_memcmp_x86_avx512bw_gt16(CPtr p1, CPtr p2, size_t count) {
if (count <= 32)
return x86::sse2::Memcmp<16>::head_tail(p1, p2, count);
if (count <= 64)
return x86::avx2::Memcmp<32>::head_tail(p1, p2, count);
if (count <= 128)
return x86::avx512bw::Memcmp<64>::head_tail(p1, p2, count);
if (unlikely(count >= 384)) {
if (auto value = x86::avx512bw::Memcmp<64>::block(p1, p2))
return value;
align_to_next_boundary<64, Arg::P1>(p1, p2, count);
}
return x86::avx512bw::Memcmp<64>::loop_and_tail(p1, p2, count);
}
#endif // defined(LLVM_LIBC_ARCH_X86)
#if defined(LLVM_LIBC_ARCH_AARCH64)
[[maybe_unused]] static inline MemcmpReturnType
inline_memcmp_aarch64_neon_gt16(CPtr p1, CPtr p2, size_t count) {
if (unlikely(count >= 128)) { // [128, ∞]
if (auto value = generic::Memcmp<16>::block(p1, p2))
return value;
align_to_next_boundary<16, Arg::P1>(p1, p2, count);
return generic::Memcmp<32>::loop_and_tail(p1, p2, count);
}
if (generic::Bcmp<16>::block(p1, p2)) // [16, 16]
return generic::Memcmp<16>::block(p1, p2);
if (count < 32) // [17, 31]
return generic::Memcmp<16>::tail(p1, p2, count);
if (generic::Bcmp<16>::block(p1 + 16, p2 + 16)) // [32, 32]
return generic::Memcmp<16>::block(p1 + 16, p2 + 16);
if (count < 64) // [33, 63]
return generic::Memcmp<32>::tail(p1, p2, count);
// [64, 127]
return generic::Memcmp<16>::loop_and_tail(p1 + 32, p2 + 32, count - 32);
}
#endif // defined(LLVM_LIBC_ARCH_AARCH64)
static inline MemcmpReturnType inline_memcmp(CPtr p1, CPtr p2, size_t count) {
#if defined(LLVM_LIBC_ARCH_X86) || defined(LLVM_LIBC_ARCH_AARCH64)
if (count == 0)
return MemcmpReturnType::ZERO();
if (count == 1)
return generic::Memcmp<1>::block(p1, p2);
if (count == 2)
return generic::Memcmp<2>::block(p1, p2);
if (count == 3)
return generic::Memcmp<3>::block(p1, p2);
if (count <= 8)
return generic::Memcmp<4>::head_tail(p1, p2, count);
if (count <= 16)
return generic::Memcmp<8>::head_tail(p1, p2, count);
#if defined(LLVM_LIBC_ARCH_X86)
if constexpr (x86::kAvx512BW)
return inline_memcmp_x86_avx512bw_gt16(p1, p2, count);
else if constexpr (x86::kAvx2)
return inline_memcmp_x86_avx2_gt16(p1, p2, count);
else if constexpr (x86::kSse2)
return inline_memcmp_x86_sse2_gt16(p1, p2, count);
else
return inline_memcmp_generic_gt16(p1, p2, count);
#elif defined(LLVM_LIBC_ARCH_AARCH64)
if constexpr (aarch64::kNeon)
return inline_memcmp_aarch64_neon_gt16(p1, p2, count);
else
return inline_memcmp_generic_gt16(p1, p2, count);
#endif
#elif defined(LLVM_LIBC_ARCH_ARM)
return inline_memcmp_embedded_tiny(p1, p2, count);
#elif defined(LLVM_LIBC_ARCH_GPU)
return inline_memcmp_embedded_tiny(p1, p2, count);
#else
#error "Unsupported platform"
#endif
}
static inline int inline_memcmp(const void *p1, const void *p2, size_t count) {
return static_cast<int>(inline_memcmp(reinterpret_cast<CPtr>(p1),
reinterpret_cast<CPtr>(p2), count));
}
} // namespace __llvm_libc
#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_MEMCMP_IMPLEMENTATIONS_H