Skip to content

Commit

Permalink
[libc] Extract an architecture independent copy of memcpy implementat…
Browse files Browse the repository at this point in the history
…ion.

Along that way, platform specific options to memcpy, memset and bzero
builds have been enclosed in conditionals. Also, the optimization level
has been set to -O2 for the memory function builds to actually see the
static functions inlined.

Reviewers: gchatelet

Differential Revision: https://reviews.llvm.org/D81621
  • Loading branch information
Siva Chandra Reddy committed Jun 15, 2020
1 parent 28a00ac commit 051f0ff
Show file tree
Hide file tree
Showing 2 changed files with 88 additions and 5 deletions.
24 changes: 19 additions & 5 deletions libc/src/string/CMakeLists.txt
Expand Up @@ -56,7 +56,7 @@ function(add_implementation name impl_name)
SRCS ${ADD_IMPL_SRCS}
HDRS ${ADD_IMPL_HDRS}
DEPENDS ${ADD_IMPL_DEPENDS}
COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags}
COMPILE_OPTIONS ${ADD_IMPL_COMPILE_OPTIONS} ${flags} -O2
)
get_fq_target_name(${impl_name} fq_target_name)
set_target_properties(${fq_target_name} PROPERTIES REQUIRE_CPU_FEATURES "${ADD_IMPL_REQUIRE}")
Expand All @@ -70,13 +70,15 @@ endfunction()
# include the relevant architecture specific implementations
if(${LIBC_TARGET_MACHINE} STREQUAL "x86_64")
set(LIBC_STRING_TARGET_ARCH "x86")
set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/x86/memcpy.cpp)
else()
set(LIBC_STRING_TARGET_ARCH ${LIBC_TARGET_MACHINE})
set(MEMCPY_SRC ${LIBC_SOURCE_DIR}/src/string/memcpy.cpp)
endif()

function(add_memcpy memcpy_name)
add_implementation(memcpy ${memcpy_name}
SRCS ${LIBC_SOURCE_DIR}/src/string/${LIBC_STRING_TARGET_ARCH}/memcpy.cpp
SRCS ${MEMCPY_SRC}
HDRS ${LIBC_SOURCE_DIR}/src/string/memcpy.h
DEPENDS
.memory_utils.memory_utils
Expand All @@ -87,7 +89,11 @@ function(add_memcpy memcpy_name)
)
endfunction()

add_memcpy(memcpy MARCH native)
if(${LIBC_STRING_TARGET_ARCH} STREQUAL "x86")
add_memcpy(memcpy MARCH native)
else()
add_memcpy(memcpy)
endif()

# ------------------------------------------------------------------------------
# memset
Expand All @@ -106,7 +112,11 @@ function(add_memset memset_name)
)
endfunction()

add_memset(memset MARCH native)
if(${LIBC_STRING_TARGET_ARCH} STREQUAL "x86")
add_memset(memset MARCH native)
else()
add_memset(memset)
endif()

# ------------------------------------------------------------------------------
# bzero
Expand All @@ -126,7 +136,11 @@ function(add_bzero bzero_name)
)
endfunction()

add_bzero(bzero MARCH native)
if(${LIBC_STRING_TARGET_ARCH} STREQUAL "x86")
add_bzero(bzero MARCH native)
else()
add_bzero(bzero)
endif()

# ------------------------------------------------------------------------------
# Add all other relevant implementations for the native target.
Expand Down
69 changes: 69 additions & 0 deletions libc/src/string/memcpy.cpp
@@ -0,0 +1,69 @@
//===-- Implementation of memcpy ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/string/memcpy.h"
#include "src/__support/common.h"
#include "src/string/memory_utils/memcpy_utils.h"

namespace __llvm_libc {

// Design rationale
// ================
//
// Using a profiler to observe size distributions for calls into libc
// functions, it was found most operations act on a small number of bytes.
// This makes it important to favor small sizes.
//
// The tests for `count` are in ascending order so the cost of branching is
// proportional to the cost of copying.
//
// The function is written in C++ for several reasons:
// - The compiler can __see__ the code, this is useful when performing Profile
// Guided Optimization as the optimized code can take advantage of branching
// probabilities.
// - It also allows for easier customization and favors testing multiple
// implementation parameters.
// - As compilers and processors get better, the generated code is improved
// with little change on the code side.
static void memcpy_impl(char *__restrict dst, const char *__restrict src,
size_t count) {
if (count == 0)
return;
if (count == 1)
return CopyBlock<1>(dst, src);
if (count == 2)
return CopyBlock<2>(dst, src);
if (count == 3)
return CopyBlock<3>(dst, src);
if (count == 4)
return CopyBlock<4>(dst, src);
if (count < 8)
return CopyBlockOverlap<4>(dst, src, count);
if (count == 8)
return CopyBlock<8>(dst, src);
if (count < 16)
return CopyBlockOverlap<8>(dst, src, count);
if (count == 16)
return CopyBlock<16>(dst, src);
if (count < 32)
return CopyBlockOverlap<16>(dst, src, count);
if (count < 64)
return CopyBlockOverlap<32>(dst, src, count);
if (count < 128)
return CopyBlockOverlap<64>(dst, src, count);
return CopyAlignedBlocks<32>(dst, src, count);
}

void *LLVM_LIBC_ENTRYPOINT(memcpy)(void *__restrict dst,
const void *__restrict src, size_t size) {
memcpy_impl(reinterpret_cast<char *>(dst),
reinterpret_cast<const char *>(src), size);
return dst;
}

} // namespace __llvm_libc

0 comments on commit 051f0ff

Please sign in to comment.