| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,22 @@ | ||
| //===--------------------- Implementation of memcpy -----------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/string/memcpy.h" | ||
| #include "src/__support/common.h" | ||
| #include "src/string/memcpy_arch_specific.h" | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| void *LLVM_LIBC_ENTRYPOINT(memcpy)(void *__restrict dst, | ||
| const void *__restrict src, size_t size) { | ||
| memcpy_no_return(reinterpret_cast<char *>(dst), | ||
| reinterpret_cast<const char *>(src), size); | ||
| return dst; | ||
| } | ||
|
|
||
| } // namespace __llvm_libc |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| //===----------------- Implementation header for memcpy -------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_SRC_STRING_MEMCPY_H | ||
| #define LLVM_LIBC_SRC_STRING_MEMCPY_H | ||
|
|
||
| #include "include/string.h" | ||
| #include <stddef.h> // size_t | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| void *memcpy(void *__restrict, const void *__restrict, size_t); | ||
|
|
||
| } // namespace __llvm_libc | ||
|
|
||
| #endif // LLVM_LIBC_SRC_STRING_MEMCPY_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,65 @@ | ||
| //===-------------- Implementation of arch specific memcpy ----------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H | ||
| #define LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H | ||
|
|
||
| %%include_file(${memcpy_arch_specific}) | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| // Design rationale | ||
| // ================ | ||
| // | ||
| // Using a profiler to observe size distributions for calls into libc | ||
| // functions, it was found most operations act on a small number of bytes. | ||
| // This makes it important to favor small sizes. | ||
| // | ||
| // The tests for `count` are in ascending order so the cost of branching is | ||
| // proportional to the cost of copying. | ||
| // | ||
| // The function is written in C++ for several reasons: | ||
| // - The compiler can __see__ the code, this is useful when performing Profile | ||
| // Guided Optimization as the optimized code can take advantage of branching | ||
| // probabilities. | ||
| // - It also allows for easier customization and favors testing multiple | ||
| // implementation parameters. | ||
| // - As compilers and processors get better, the generated code is improved | ||
| // with little change on the code side. | ||
| static void memcpy_no_return(char *__restrict dst, const char *__restrict src, | ||
| size_t count) { | ||
| if (count == 0) | ||
| return; | ||
| if (count == 1) | ||
| return Copy<1>(dst, src); | ||
| if (count == 2) | ||
| return Copy<2>(dst, src); | ||
| if (count == 3) | ||
| return Copy<3>(dst, src); | ||
| if (count == 4) | ||
| return Copy<4>(dst, src); | ||
| if (count < 8) | ||
| return CopyOverlap<4>(dst, src, count); | ||
| if (count == 8) | ||
| return Copy<8>(dst, src); | ||
| if (count < 16) | ||
| return CopyOverlap<8>(dst, src, count); | ||
| if (count == 16) | ||
| return Copy<16>(dst, src); | ||
| if (count < 32) | ||
| return CopyOverlap<16>(dst, src, count); | ||
| if (count < 64) | ||
| return CopyOverlap<32>(dst, src, count); | ||
| if (count < 128) | ||
| return CopyOverlap<64>(dst, src, count); | ||
| CopyGE128(dst, src, count); | ||
| } | ||
|
|
||
| } // namespace __llvm_libc | ||
|
|
||
| #endif // LLVM_LIBC_SRC_STRING_MEMORY_ARCH_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -12,6 +12,9 @@ add_gen_header( | |
|
|
||
| add_header_library( | ||
| memory_utils | ||
| HDRS | ||
| utils.h | ||
| memcpy_utils.h | ||
| DEPENDS | ||
| cacheline_size | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| //===---------------------------- Memcpy utils ----------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H | ||
| #define LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H | ||
|
|
||
| #include "src/string/memory_utils/utils.h" | ||
| #include <stddef.h> // size_t | ||
|
|
||
| // __builtin_memcpy_inline guarantees to never call external functions. | ||
| // Unfortunately it is not widely available. | ||
| #if defined(__clang__) && __has_builtin(__builtin_memcpy_inline) | ||
| #define USE_BUILTIN_MEMCPY_INLINE | ||
| #elif defined(__GNUC__) | ||
| #define USE_BUILTIN_MEMCPY | ||
| #endif | ||
|
|
||
| // This is useful for testing. | ||
| #if defined(LLVM_LIBC_MEMCPY_MONITOR) | ||
| extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict, | ||
| const char *__restrict, size_t); | ||
| #endif | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| // Copies `kBlockSize` bytes from `src` to `dst`. | ||
| template <size_t kBlockSize> | ||
| static void Copy(char *__restrict dst, const char *__restrict src) { | ||
| #if defined(LLVM_LIBC_MEMCPY_MONITOR) | ||
| LLVM_LIBC_MEMCPY_MONITOR(dst, src, kBlockSize); | ||
| #elif defined(USE_BUILTIN_MEMCPY_INLINE) | ||
| __builtin_memcpy_inline(dst, src, kBlockSize); | ||
| #elif defined(USE_BUILTIN_MEMCPY) | ||
| __builtin_memcpy(dst, src, kBlockSize); | ||
| #else | ||
| for (size_t i = 0; i < kBlockSize; ++i) | ||
| dst[i] = src[i]; | ||
| #endif | ||
| } | ||
|
|
||
| // Copies `kBlockSize` bytes from `src + count - kBlockSize` to | ||
| // `dst + count - kBlockSize`. | ||
| // Precondition: `count >= kBlockSize`. | ||
| template <size_t kBlockSize> | ||
| static void CopyLastBlock(char *__restrict dst, const char *__restrict src, | ||
| size_t count) { | ||
| const size_t offset = count - kBlockSize; | ||
| Copy<kBlockSize>(dst + offset, src + offset); | ||
| } | ||
|
|
||
| // Copies `kBlockSize` bytes twice with an overlap between the two. | ||
| // | ||
| // [1234567812345678123] | ||
| // [__XXXXXXXXXXXXXX___] | ||
| // [__XXXXXXXX_________] | ||
| // [________XXXXXXXX___] | ||
| // | ||
| // Precondition: `count >= kBlockSize && count <= kBlockSize`. | ||
| template <size_t kBlockSize> | ||
| static void CopyOverlap(char *__restrict dst, const char *__restrict src, | ||
| size_t count) { | ||
| Copy<kBlockSize>(dst, src); | ||
| CopyLastBlock<kBlockSize>(dst, src, count); | ||
| } | ||
|
|
||
| // Copies `count` bytes by blocks of `kBlockSize` bytes. | ||
| // Copies at the start and end of the buffer are unaligned. | ||
| // Copies in the middle of the buffer are aligned to `kBlockSize`. | ||
| // | ||
| // e.g. with | ||
| // [12345678123456781234567812345678] | ||
| // [__XXXXXXXXXXXXXXXXXXXXXXXXXXX___] | ||
| // [__XXXXXXXX______________________] | ||
| // [________XXXXXXXX________________] | ||
| // [________________XXXXXXXX________] | ||
| // [_____________________XXXXXXXX___] | ||
| // | ||
| // Precondition: `count > 2 * kBlockSize` for efficiency. | ||
| // `count >= kBlockSize` for correctness. | ||
| template <size_t kBlockSize> | ||
| static void CopyAligned(char *__restrict dst, const char *__restrict src, | ||
| size_t count) { | ||
| Copy<kBlockSize>(dst, src); // Copy first block | ||
|
|
||
| // Copy aligned blocks | ||
| size_t offset = kBlockSize - offset_from_last_aligned<kBlockSize>(dst); | ||
| for (; offset + kBlockSize < count; offset += kBlockSize) | ||
| Copy<kBlockSize>(dst + offset, src + offset); | ||
|
|
||
| CopyLastBlock<kBlockSize>(dst, src, count); // Copy last block | ||
| } | ||
|
|
||
| } // namespace __llvm_libc | ||
|
|
||
| #endif // LLVM_LIBC_SRC_MEMORY_UTILS_MEMCPY_UTILS_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_none" REJECT "${ALL_CPU_FEATURES}") | ||
| add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_sse" REQUIRE "SSE" REJECT "SSE2") | ||
| add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_avx" REQUIRE "AVX" REJECT "AVX2") | ||
| add_memcpy("memcpy_${LIBC_TARGET_MACHINE}_opt_avx512f" REQUIRE "AVX512F") |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| #include "src/string/memory_utils/memcpy_utils.h" | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| static void CopyRepMovsb(char *__restrict dst, const char *__restrict src, | ||
| size_t count) { | ||
| // FIXME: Add MSVC suppport with | ||
| // #include <intrin.h> | ||
| // __movsb(reinterpret_cast<unsigned char *>(dst), | ||
| // reinterpret_cast<const unsigned char *>(src), count); | ||
| asm volatile("rep movsb" : "+D"(dst), "+S"(src), "+c"(count) : : "memory"); | ||
| } | ||
|
|
||
| #if defined(__AVX__) | ||
| #define BEST_SIZE 64 | ||
| #else | ||
| #define BEST_SIZE 32 | ||
| #endif | ||
|
|
||
| static void CopyGE128(char *__restrict dst, const char *__restrict src, | ||
| size_t count) { | ||
| #if defined(__AVX__) | ||
| if (count < 256) | ||
| return CopyOverlap<128>(dst, src, count); | ||
| #endif | ||
| // kRepMovsBSize == -1 : Only CopyAligned is used. | ||
| // kRepMovsBSize == 0 : Only RepMovsb is used. | ||
| // else CopyAligned is used to to kRepMovsBSize and then RepMovsb. | ||
| constexpr size_t kRepMovsBSize = -1; | ||
| if (count <= kRepMovsBSize) | ||
| return CopyAligned<BEST_SIZE>(dst, src, count); | ||
| CopyRepMovsb(dst, src, count); | ||
| } | ||
|
|
||
| } // namespace __llvm_libc |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,53 @@ | ||
| //===----------------------- Unittests for memcpy -------------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "utils/CPP/ArrayRef.h" | ||
| #include "utils/UnitTest/Test.h" | ||
| #include "src/string/memcpy.h" | ||
|
|
||
| using __llvm_libc::cpp::Array; | ||
| using __llvm_libc::cpp::ArrayRef; | ||
| using __llvm_libc::cpp::MutableArrayRef; | ||
| using Data = Array<char, 2048>; | ||
|
|
||
| static const ArrayRef<char> kNumbers("0123456789", 10); | ||
| static const ArrayRef<char> kDeadcode("DEADC0DE", 8); | ||
|
|
||
| // Returns a Data object filled with a repetition of `filler`. | ||
| Data getData(ArrayRef<char> filler) { | ||
| Data out; | ||
| for (size_t i = 0; i < out.size(); ++i) | ||
| out[i] = filler[i % filler.size()]; | ||
| return out; | ||
| } | ||
|
|
||
| TEST(MemcpyTest, Thorough) { | ||
| const Data groundtruth = getData(kNumbers); | ||
| const Data dirty = getData(kDeadcode); | ||
| for (size_t count = 0; count < 1024; ++count) { | ||
| for (size_t align = 0; align < 64; ++align) { | ||
| auto buffer = dirty; | ||
| const char *const src = groundtruth.data(); | ||
| char *const dst = &buffer[align]; | ||
| __llvm_libc::memcpy(dst, src, count); | ||
| // Everything before copy is untouched. | ||
| for (size_t i = 0; i < align; ++i) | ||
| ASSERT_EQ(buffer[i], dirty[i]); | ||
| // Everything in between is copied. | ||
| for (size_t i = 0; i < count; ++i) | ||
| ASSERT_EQ(buffer[align + i], groundtruth[i]); | ||
| // Everything after copy is untouched. | ||
| for (size_t i = align + count; i < dirty.size(); ++i) | ||
| ASSERT_EQ(buffer[i], dirty[i]); | ||
| } | ||
| } | ||
| } | ||
|
|
||
| // FIXME: Add tests with reads and writes on the boundary of a read/write | ||
| // protected page to check we're not reading nor writing prior/past the allowed | ||
| // regions. |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,208 @@ | ||
| //===-------------------- Unittests for memory_utils ----------------------===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #include "src/string/memory_utils/memcpy_utils.h" | ||
| #include "utils/CPP/Array.h" | ||
| #include "utils/UnitTest/Test.h" | ||
|
|
||
| #include <assert.h> | ||
| #include <stdint.h> // uintptr_t | ||
|
|
||
| #ifndef LLVM_LIBC_MEMCPY_MONITOR | ||
| #error LLVM_LIBC_MEMCPY_MONITOR must be defined for this test. | ||
| #endif | ||
|
|
||
| namespace __llvm_libc { | ||
|
|
||
| struct Buffer { | ||
| static constexpr size_t kMaxBuffer = 1024; | ||
| char buffer[kMaxBuffer + 1]; | ||
| size_t last = 0; | ||
|
|
||
| void Clear() { | ||
| last = 0; | ||
| for (size_t i = 0; i < kMaxBuffer; ++i) | ||
| buffer[i] = '0'; | ||
| buffer[kMaxBuffer] = '\0'; | ||
| } | ||
|
|
||
| void Increment(const void *ptr) { | ||
| const auto offset = reinterpret_cast<uintptr_t>(ptr); | ||
| assert(offset < kMaxBuffer); | ||
| ++buffer[offset]; | ||
| if (offset > last) | ||
| last = offset; | ||
| } | ||
|
|
||
| char *Finish() { | ||
| assert(last < kMaxBuffer); | ||
| buffer[last + 1] = '\0'; | ||
| return buffer; | ||
| } | ||
| }; | ||
|
|
||
| struct Trace { | ||
| Buffer read; | ||
| Buffer write; | ||
|
|
||
| void Add(char *__restrict dst, const char *__restrict src, size_t count) { | ||
| for (size_t i = 0; i < count; ++i) | ||
| read.Increment(src + i); | ||
| for (size_t i = 0; i < count; ++i) | ||
| write.Increment(dst + i); | ||
| } | ||
|
|
||
| void Clear() { | ||
| read.Clear(); | ||
| write.Clear(); | ||
| } | ||
|
|
||
| char *Read() { return read.Finish(); } | ||
| char *Write() { return write.Finish(); } | ||
| }; | ||
|
|
||
| static Trace &GetTrace() { | ||
| static thread_local Trace events; | ||
| return events; | ||
| } | ||
|
|
||
| extern "C" void LLVM_LIBC_MEMCPY_MONITOR(char *__restrict dst, | ||
| const char *__restrict src, | ||
| size_t count) { | ||
| GetTrace().Add(dst, src, count); | ||
| } | ||
|
|
||
| char *I(uintptr_t offset) { return reinterpret_cast<char *>(offset); } | ||
|
|
||
| TEST(MemcpyUtilsTest, CopyTrivial) { | ||
| auto &trace = GetTrace(); | ||
|
|
||
| trace.Clear(); | ||
| Copy<1>(I(0), I(0)); | ||
| EXPECT_STREQ(trace.Write(), "1"); | ||
| EXPECT_STREQ(trace.Read(), "1"); | ||
|
|
||
| trace.Clear(); | ||
| Copy<2>(I(0), I(0)); | ||
| EXPECT_STREQ(trace.Write(), "11"); | ||
| EXPECT_STREQ(trace.Read(), "11"); | ||
|
|
||
| trace.Clear(); | ||
| Copy<4>(I(0), I(0)); | ||
| EXPECT_STREQ(trace.Write(), "1111"); | ||
| EXPECT_STREQ(trace.Read(), "1111"); | ||
|
|
||
| trace.Clear(); | ||
| Copy<8>(I(0), I(0)); | ||
| EXPECT_STREQ(trace.Write(), "11111111"); | ||
| EXPECT_STREQ(trace.Read(), "11111111"); | ||
|
|
||
| trace.Clear(); | ||
| Copy<16>(I(0), I(0)); | ||
| EXPECT_STREQ(trace.Write(), "1111111111111111"); | ||
| EXPECT_STREQ(trace.Read(), "1111111111111111"); | ||
|
|
||
| trace.Clear(); | ||
| Copy<32>(I(0), I(0)); | ||
| EXPECT_STREQ(trace.Write(), "11111111111111111111111111111111"); | ||
| EXPECT_STREQ(trace.Read(), "11111111111111111111111111111111"); | ||
|
|
||
| trace.Clear(); | ||
| Copy<64>(I(0), I(0)); | ||
| EXPECT_STREQ( | ||
| trace.Write(), | ||
| "1111111111111111111111111111111111111111111111111111111111111111"); | ||
| EXPECT_STREQ( | ||
| trace.Read(), | ||
| "1111111111111111111111111111111111111111111111111111111111111111"); | ||
| } | ||
|
|
||
| TEST(MemcpyUtilsTest, CopyOffset) { | ||
| auto &trace = GetTrace(); | ||
|
|
||
| trace.Clear(); | ||
| Copy<1>(I(3), I(1)); | ||
| EXPECT_STREQ(trace.Write(), "0001"); | ||
| EXPECT_STREQ(trace.Read(), "01"); | ||
|
|
||
| trace.Clear(); | ||
| Copy<1>(I(2), I(1)); | ||
| EXPECT_STREQ(trace.Write(), "001"); | ||
| EXPECT_STREQ(trace.Read(), "01"); | ||
| } | ||
|
|
||
| TEST(MemcpyUtilsTest, CopyOverlap) { | ||
| auto &trace = GetTrace(); | ||
|
|
||
| trace.Clear(); | ||
| CopyOverlap<2>(I(0), I(0), 2); | ||
| EXPECT_STREQ(trace.Write(), "22"); | ||
| EXPECT_STREQ(trace.Read(), "22"); | ||
|
|
||
| trace.Clear(); | ||
| CopyOverlap<2>(I(0), I(0), 3); | ||
| EXPECT_STREQ(trace.Write(), "121"); | ||
| EXPECT_STREQ(trace.Read(), "121"); | ||
|
|
||
| trace.Clear(); | ||
| CopyOverlap<2>(I(0), I(0), 4); | ||
| EXPECT_STREQ(trace.Write(), "1111"); | ||
| EXPECT_STREQ(trace.Read(), "1111"); | ||
|
|
||
| trace.Clear(); | ||
| CopyOverlap<4>(I(2), I(1), 7); | ||
| EXPECT_STREQ(trace.Write(), "001112111"); | ||
| EXPECT_STREQ(trace.Read(), "01112111"); | ||
| } | ||
|
|
||
| TEST(MemcpyUtilsTest, CopyAligned) { | ||
| auto &trace = GetTrace(); | ||
| // Destination is aligned already. | ||
| // "1111000000000" | ||
| // + "0000111100000" | ||
| // + "0000000011110" | ||
| // + "0000000001111" | ||
| // = "1111111112221" | ||
| trace.Clear(); | ||
| CopyAligned<4>(I(0), I(0), 13); | ||
| EXPECT_STREQ(trace.Write(), "1111111112221"); | ||
| EXPECT_STREQ(trace.Read(), "1111111112221"); | ||
|
|
||
| // Misaligned destination | ||
| // "01111000000000" | ||
| // + "00001111000000" | ||
| // + "00000000111100" | ||
| // + "00000000001111" | ||
| // = "01112111112211" | ||
| trace.Clear(); | ||
| CopyAligned<4>(I(1), I(0), 13); | ||
| EXPECT_STREQ(trace.Write(), "01112111112211"); | ||
| EXPECT_STREQ(trace.Read(), "1112111112211"); | ||
| } | ||
|
|
||
| TEST(MemcpyUtilsTest, MaxReloads) { | ||
| auto &trace = GetTrace(); | ||
| for (size_t alignment = 0; alignment < 32; ++alignment) { | ||
| for (size_t count = 64; count < 768; ++count) { | ||
| trace.Clear(); | ||
| // We should never reload more than twice when copying from count = 2x32. | ||
| CopyAligned<32>(I(alignment), I(0), count); | ||
| const char *const written = trace.Write(); | ||
| // First bytes are untouched. | ||
| for (size_t i = 0; i < alignment; ++i) | ||
| EXPECT_EQ(written[i], '0'); | ||
| // Next bytes are loaded once or twice but no more. | ||
| for (size_t i = alignment; i < count; ++i) { | ||
| EXPECT_GE(written[i], '1'); | ||
| EXPECT_LE(written[i], '2'); | ||
| } | ||
| } | ||
| } | ||
| } | ||
|
|
||
| } // namespace __llvm_libc |