Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions libc/cmake/modules/LLVMLibCCompileOptionRules.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,8 @@ function(_get_compile_options_from_config output_var)
list(APPEND config_options "-DLIBC_QSORT_IMPL=${LIBC_CONF_QSORT_IMPL}")
endif()

if(LIBC_CONF_STRING_UNSAFE_WIDE_READ)
list(APPEND config_options "-DLIBC_COPT_STRING_UNSAFE_WIDE_READ")
endif()
list(APPEND config_options "-DLIBC_COPT_STRING_LENGTH_IMPL=${LIBC_CONF_STRING_LENGTH_IMPL}")
list(APPEND config_options "-DLIBC_COPT_FIND_FIRST_CHARACTER_IMPL=${LIBC_CONF_FIND_FIRST_CHARACTER_IMPL}")

if(LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING)
list(APPEND config_options "-DLIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING")
Expand Down
11 changes: 8 additions & 3 deletions libc/config/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"value": false,
"doc": "Use an alternative printf float implementation based on 320-bit floats"
},

"LIBC_CONF_PRINTF_DISABLE_FIXED_POINT": {
"value": false,
"doc": "Disable printing fixed point values in printf and friends."
Expand All @@ -64,9 +65,13 @@
}
},
"string": {
"LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
"value": false,
"doc": "Read more than a byte at a time to perform byte-string operations like strlen."
"LIBC_CONF_STRING_LENGTH_IMPL": {
"value": "element",
"doc": "Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'."
},
"LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
"value": "element",
"doc": "Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'."
},
"LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING": {
"value": false,
Expand Down
7 changes: 5 additions & 2 deletions libc/config/linux/arm/config.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
{
"string": {
"LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
"value": false
"LIBC_CONF_STRING_LENGTH_IMPL": {
"value": "element"
}
"LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
"value": "element"
}
}
}
7 changes: 5 additions & 2 deletions libc/config/linux/config.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
{
"string": {
"LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
"value": true
"LIBC_CONF_STRING_LENGTH_IMPL": {
"value": "generic",
},
"LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
"value": "wide",
}
}
}
7 changes: 5 additions & 2 deletions libc/config/linux/riscv/config.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
{
"string": {
"LIBC_CONF_STRING_UNSAFE_WIDE_READ": {
"value": false
"LIBC_CONF_STRING_LENGTH_IMPL": {
"value": "element"
}
"LIBC_CONF_FIND_FIRST_CHARACTER_IMPL": {
"value": "element"
}
}
}
3 changes: 2 additions & 1 deletion libc/docs/configure.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ to learn about the defaults for your platform and target.
* **"setjmp" options**
- ``LIBC_CONF_SETJMP_AARCH64_RESTORE_PLATFORM_REGISTER``: Make setjmp save the value of x18, and longjmp restore it. The AArch64 ABI delegates this register to platform ABIs, which can choose whether to make it caller-saved.
* **"string" options**
- ``LIBC_CONF_FIND_FIRST_CHARACTER_IMPL``: Selects the implementation for find-first-character-related functions: 'element', 'wide', 'generic' (vector), or 'arch'.
- ``LIBC_CONF_MEMSET_X86_USE_SOFTWARE_PREFETCHING``: Inserts prefetch for write instructions (PREFETCHW) for memset on x86 to recover performance when hardware prefetcher is disabled.
- ``LIBC_CONF_STRING_UNSAFE_WIDE_READ``: Read more than a byte at a time to perform byte-string operations like strlen.
- ``LIBC_CONF_STRING_LENGTH_IMPL``: Selects the implementation for string-length: 'element', 'wide', 'generic' (vector), or 'arch'.
* **"threads" options**
- ``LIBC_CONF_THREAD_MODE``: The implementation used for Mutex, acceptable values are LIBC_THREAD_MODE_PLATFORM, LIBC_THREAD_MODE_SINGLE, and LIBC_THREAD_MODE_EXTERNAL.
* **"time" options**
Expand Down
6 changes: 2 additions & 4 deletions libc/src/string/memory_utils/aarch64/inline_strlen.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

namespace LIBC_NAMESPACE_DECL {

namespace neon {
namespace internal::arch {
[[maybe_unused]] LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static size_t
string_length(const char *src) {
using Vector __attribute__((may_alias)) = uint8x8_t;
Expand Down Expand Up @@ -44,9 +44,7 @@ string_length(const char *src) {
(cpp::countr_zero(cmp) >> 3));
}
}
} // namespace neon

namespace string_length_impl = neon;
} // namespace internal::arch

} // namespace LIBC_NAMESPACE_DECL
#endif // __ARM_NEON
Expand Down
5 changes: 2 additions & 3 deletions libc/src/string/memory_utils/generic/inline_strlen.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
#include "src/__support/common.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {
namespace generic {

// Exploit the underlying integer representation to do a variable shift.
LIBC_INLINE constexpr cpp::simd_mask<char> shift_mask(cpp::simd_mask<char> m,
Expand Down Expand Up @@ -46,9 +46,8 @@ LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE size_t string_length(const char *src) {
cpp::find_first_set(mask);
}
}
} // namespace internal
} // namespace generic

namespace string_length_impl = internal;
} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_STRING_MEMORY_UTILS_GENERIC_INLINE_STRLEN_H
14 changes: 9 additions & 5 deletions libc/src/string/memory_utils/x86_64/inline_strlen.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@

namespace LIBC_NAMESPACE_DECL {

namespace string_length_internal {
namespace internal::arch {

// Return a bit-mask with the nth bit set if the nth-byte in block_ptr is zero.
template <typename Vector, typename Mask>
LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE static Mask
Expand Down Expand Up @@ -92,15 +93,18 @@ namespace avx512 {
}
} // namespace avx512
#endif
} // namespace string_length_internal

[[maybe_unused]] LIBC_INLINE size_t string_length(const char *src) {
#if defined(__AVX512F__)
namespace string_length_impl = string_length_internal::avx512;
return avx512::string_length(src);
#elif defined(__AVX2__)
namespace string_length_impl = string_length_internal::avx2;
return avx2::string_length(src);
#else
namespace string_length_impl = string_length_internal::sse2;
return sse2::string_length(src);
#endif
}

} // namespace internal::arch

} // namespace LIBC_NAMESPACE_DECL

Expand Down
138 changes: 84 additions & 54 deletions libc/src/string/string_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,21 +24,56 @@
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
#include "src/string/memory_utils/inline_memcpy.h"

#if defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)
#if LIBC_HAS_VECTOR_TYPE
#include "src/string/memory_utils/generic/inline_strlen.h"
#elif defined(LIBC_TARGET_ARCH_IS_X86)
#endif
#if defined(LIBC_TARGET_ARCH_IS_X86)
#include "src/string/memory_utils/x86_64/inline_strlen.h"
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64) && defined(__ARM_NEON)
#elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
#include "src/string/memory_utils/aarch64/inline_strlen.h"
#else
namespace string_length_impl = LIBC_NAMESPACE::wide_read;
#endif
#endif // defined(LIBC_COPT_STRING_UNSAFE_WIDE_READ)

namespace LIBC_NAMESPACE_DECL {
namespace internal {

#if !LIBC_HAS_VECTOR_TYPE
// Forward any generic vector impls to architecture specific ones
namespace arch {}
namespace generic = arch;
#endif

namespace element {
// Element-by-element (usually a byte, but wider for wchar) implementations of
// functions that search for data. Slow, but easy to understand and analyze.

// Returns the length of a string, denoted by the first occurrence
// of a null terminator.
LIBC_INLINE size_t string_length(const char *src) {
size_t length;
for (length = 0; *src; ++src, ++length)
;
return length;
}

template <typename T> LIBC_INLINE size_t string_length_element(const T *src) {
size_t length;
for (length = 0; *src; ++src, ++length)
;
return length;
}

LIBC_INLINE void *find_first_character(const unsigned char *src,
unsigned char ch, size_t n) {
for (; n && *src != ch; --n, ++src)
;
return n ? const_cast<unsigned char *>(src) : nullptr;
}
} // namespace element

namespace wide {
// Generic, non-vector, implementations of functions that search for data
// by reading from memory block-by-block.

template <typename Word> LIBC_INLINE constexpr Word repeat_byte(Word byte) {
static_assert(CHAR_BIT == 8, "repeat_byte assumes a byte is 8 bits.");
constexpr size_t BITS_IN_BYTE = CHAR_BIT;
Expand Down Expand Up @@ -74,8 +109,13 @@ template <typename Word> LIBC_INLINE constexpr bool has_zeroes(Word block) {
return (subtracted & inverted & HIGH_BITS) != 0;
}

template <typename Word>
LIBC_INLINE size_t string_length_wide_read(const char *src) {
// Unsigned int is the default size for most processors, and on x86-64 it
// performs better than larger sizes when the src pointer can't be assumed to
// be aligned to a word boundary, so it's the size we use for reading the
// string a block at a time.

LIBC_INLINE size_t string_length(const char *src) {
using Word = unsigned int;
const char *char_ptr = src;
// Step 1: read 1 byte at a time to align to block size
for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0;
Expand All @@ -95,37 +135,23 @@ LIBC_INLINE size_t string_length_wide_read(const char *src) {
return static_cast<size_t>(char_ptr - src);
}

namespace wide_read {
LIBC_INLINE size_t string_length(const char *src) {
// Unsigned int is the default size for most processors, and on x86-64 it
// performs better than larger sizes when the src pointer can't be assumed to
// be aligned to a word boundary, so it's the size we use for reading the
// string a block at a time.
return string_length_wide_read<unsigned int>(src);
}

} // namespace wide_read

// Returns the length of a string, denoted by the first occurrence
// of a null terminator.
template <typename T> LIBC_INLINE size_t string_length(const T *src) {
#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
if constexpr (cpp::is_same_v<T, char>)
return string_length_impl::string_length(src);
#endif
size_t length;
for (length = 0; *src; ++src, ++length)
;
return length;
}

template <typename Word>
LIBC_NO_SANITIZE_OOB_ACCESS LIBC_INLINE void *
find_first_character_wide_read(const unsigned char *src, unsigned char ch,
size_t n) {
find_first_character(const unsigned char *src, unsigned char ch,
size_t max_strlen = cpp::numeric_limits<size_t>::max()) {
using Word = unsigned int;
const unsigned char *char_ptr = src;
size_t cur = 0;

// If the maximum size of the string is small, the overhead of aligning to a
// word boundary and generating a bitmask of the appropriate size may be
// greater than the gains from reading larger chunks. Based on some testing,
// the crossover point between when it's faster to just read bytewise and read
// blocks is somewhere between 16 and 32, so 4 times the size of the block
// should be in that range.
if (max_strlen < (sizeof(Word) * 4)) {
return element::find_first_character(src, ch, max_strlen);
}
size_t n = max_strlen;
// Step 1: read 1 byte at a time to align to block size
for (; reinterpret_cast<uintptr_t>(char_ptr) % sizeof(Word) != 0 && cur < n;
++char_ptr, ++cur) {
Expand Down Expand Up @@ -153,31 +179,35 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch,
return const_cast<unsigned char *>(char_ptr);
}

LIBC_INLINE void *find_first_character_byte_read(const unsigned char *src,
unsigned char ch, size_t n) {
for (; n && *src != ch; --n, ++src)
;
return n ? const_cast<unsigned char *>(src) : nullptr;
} // namespace wide

// Dispatch mechanism for implementations of performance-sensitive
// functions. Always measure, but generally from lower- to higher-performance
// order:
//
// 1. element - read char-by-char or wchar-by-wchar
// 3. wide - read word-by-word
// 3. generic - read using clang's internal vector types
// 4. arch - hand-coded per architecture. Possibly in asm, or with intrinsics.
//
// The called implemenation is chosen at build-time by setting
// LIBC_CONF_{FUNC}_IMPL in config.json
static constexpr auto &string_length_impl =
LIBC_COPT_STRING_LENGTH_IMPL::string_length;
static constexpr auto &find_first_character_impl =
LIBC_COPT_FIND_FIRST_CHARACTER_IMPL::find_first_character;

template <typename T> LIBC_INLINE size_t string_length(const T *src) {
if constexpr (cpp::is_same_v<T, char>)
return string_length_impl(src);
return element::string_length_element<T>(src);
}

// Returns the first occurrence of 'ch' within the first 'n' characters of
// 'src'. If 'ch' is not found, returns nullptr.
LIBC_INLINE void *find_first_character(const unsigned char *src,
unsigned char ch, size_t max_strlen) {
#ifdef LIBC_COPT_STRING_UNSAFE_WIDE_READ
// If the maximum size of the string is small, the overhead of aligning to a
// word boundary and generating a bitmask of the appropriate size may be
// greater than the gains from reading larger chunks. Based on some testing,
// the crossover point between when it's faster to just read bytewise and read
// blocks is somewhere between 16 and 32, so 4 times the size of the block
// should be in that range.
// Unsigned int is used for the same reason as in strlen.
using BlockType = unsigned int;
if (max_strlen > (sizeof(BlockType) * 4)) {
return find_first_character_wide_read<BlockType>(src, ch, max_strlen);
}
#endif
return find_first_character_byte_read(src, ch, max_strlen);
return find_first_character_impl(src, ch, max_strlen);
}

// Returns the maximum length span that contains only characters not found in
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ LIBC_CONFIGURE_OPTIONS = [
# "LIBC_COPT_SCANF_DISABLE_FLOAT",
# "LIBC_COPT_SCANF_DISABLE_INDEX_MODE",
"LIBC_COPT_STDIO_USE_SYSTEM_FILE",
"LIBC_COPT_STRING_UNSAFE_WIDE_READ",
"LIBC_COPT_STRING_LENGTH_IMPL=generic",
"LIBC_COPT_FIND_FIRST_CHARACTER_IMPL=wide",
# "LIBC_COPT_STRTOFLOAT_DISABLE_CLINGER_FAST_PATH",
# "LIBC_COPT_STRTOFLOAT_DISABLE_EISEL_LEMIRE",
# "LIBC_COPT_STRTOFLOAT_DISABLE_SIMPLE_DECIMAL_CONVERSION",
Expand Down
Loading