diff --git a/libc/src/__support/blockstore.h b/libc/src/__support/blockstore.h index d78e4be5fa9ca4..dc5fdd1b92fc29 100644 --- a/libc/src/__support/blockstore.h +++ b/libc/src/__support/blockstore.h @@ -45,7 +45,7 @@ class BlockStore { struct Pair { Block *first, *second; }; - Pair getLastBlocks() { + Pair get_last_blocks() { if (REVERSE_ORDER) return {current, current->next}; Block *prev = nullptr; @@ -56,20 +56,20 @@ class BlockStore { return {curr, prev}; } - Block *getLastBlock() { return getLastBlocks().first; } + Block *get_last_block() { return get_last_blocks().first; } public: constexpr BlockStore() = default; ~BlockStore() = default; - class iterator { + class Iterator { Block *block; size_t index; public: - constexpr iterator(Block *b, size_t i) : block(b), index(i) {} + constexpr Iterator(Block *b, size_t i) : block(b), index(i) {} - iterator &operator++() { + Iterator &operator++() { if (REVERSE_ORDER) { if (index == 0) return *this; @@ -98,11 +98,11 @@ class BlockStore { return *reinterpret_cast(block->data + sizeof(T) * true_index); } - bool operator==(const iterator &rhs) const { + bool operator==(const Iterator &rhs) const { return block == rhs.block && index == rhs.index; } - bool operator!=(const iterator &rhs) const { + bool operator!=(const Iterator &rhs) const { return block != rhs.block || index != rhs.index; } }; @@ -138,7 +138,7 @@ class BlockStore { } T &back() { - return *reinterpret_cast(getLastBlock()->data + + return *reinterpret_cast(get_last_block()->data + sizeof(T) * (fill_count - 1)); } @@ -146,7 +146,7 @@ class BlockStore { fill_count--; if (fill_count || current == &first) return; - auto [last, prev] = getLastBlocks(); + auto [last, prev] = get_last_blocks(); if (REVERSE_ORDER) { LIBC_ASSERT(last == current); current = current->next; @@ -162,18 +162,18 @@ class BlockStore { bool empty() const { return current == &first && !fill_count; } - iterator begin() { + Iterator begin() { if (REVERSE_ORDER) - return iterator(current, fill_count); + return Iterator(current, fill_count); else - return iterator(&first, 0); + return Iterator(&first, 0); } - iterator end() { + Iterator end() { if (REVERSE_ORDER) - return iterator(&first, 0); + return Iterator(&first, 0); else - return iterator(current, fill_count); + return Iterator(current, fill_count); } }; diff --git a/libc/src/__support/integer_to_string.h b/libc/src/__support/integer_to_string.h index 8d3859c8eb0ca5..81ed21ccfca166 100644 --- a/libc/src/__support/integer_to_string.h +++ b/libc/src/__support/integer_to_string.h @@ -166,7 +166,7 @@ template class IntegerToString { static_assert(cpp::is_integral_v); LIBC_INLINE static constexpr size_t compute_buffer_size() { - constexpr auto max_digits = []() -> size_t { + constexpr auto MAX_DIGITS = []() -> size_t { // We size the string buffer for base 10 using an approximation algorithm: // // size = ceil(sizeof(T) * 5 / 2) @@ -188,19 +188,19 @@ template class IntegerToString { // For other bases, we approximate by rounding down to the nearest power // of two base, since the space needed is easy to calculate and it won't // overestimate by too much. - constexpr auto floor_log_2 = [](size_t num) -> size_t { + constexpr auto FLOOR_LOG_2 = [](size_t num) -> size_t { size_t i = 0; for (; num > 1; num /= 2) ++i; return i; }; - constexpr size_t BITS_PER_DIGIT = floor_log_2(Fmt::BASE); + constexpr size_t BITS_PER_DIGIT = FLOOR_LOG_2(Fmt::BASE); return ((sizeof(T) * 8 + (BITS_PER_DIGIT - 1)) / BITS_PER_DIGIT); }; - constexpr size_t digit_size = cpp::max(max_digits(), Fmt::MIN_DIGITS); - constexpr size_t sign_size = Fmt::BASE == 10 ? 1 : 0; - constexpr size_t prefix_size = Fmt::PREFIX ? 2 : 0; - return digit_size + sign_size + prefix_size; + constexpr size_t DIGIT_SIZE = cpp::max(MAX_DIGITS(), Fmt::MIN_DIGITS); + constexpr size_t SIGN_SIZE = Fmt::BASE == 10 ? 1 : 0; + constexpr size_t PREFIX_SIZE = Fmt::PREFIX ? 2 : 0; + return DIGIT_SIZE + SIGN_SIZE + PREFIX_SIZE; } static constexpr size_t BUFFER_SIZE = compute_buffer_size(); diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index ae367994706c0f..7a89fbb11b2a9e 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -22,21 +22,21 @@ namespace LIBC_NAMESPACE { template LIBC_INLINE constexpr T mask_trailing_ones() { static_assert(cpp::is_unsigned_v); - constexpr unsigned t_bits = CHAR_BIT * sizeof(T); - static_assert(count <= t_bits && "Invalid bit index"); + constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); + static_assert(count <= T_BITS && "Invalid bit index"); // It's important not to initialize T with -1, since T may be BigInt which // will take -1 as a uint64_t and only initialize the low 64 bits. - constexpr T all_zeroes(0); - constexpr T all_ones(~all_zeroes); // bitwise NOT performs integer promotion. - return count == 0 ? 0 : (all_ones >> (t_bits - count)); + constexpr T ALL_ZEROES(0); + constexpr T ALL_ONES(~ALL_ZEROES); // bitwise NOT performs integer promotion. + return count == 0 ? 0 : (ALL_ONES >> (T_BITS - count)); } // Create a bitmask with the count left-most bits set to 1, and all other bits // set to 0. Only unsigned types are allowed. template LIBC_INLINE constexpr T mask_leading_ones() { - constexpr T mask(mask_trailing_ones()); - return T(~mask); // bitwise NOT performs integer promotion. + constexpr T MASK(mask_trailing_ones()); + return T(~MASK); // bitwise NOT performs integer promotion. } // Add with carry diff --git a/libc/src/math/generic/hypotf.cpp b/libc/src/math/generic/hypotf.cpp index 614aa399fcc21b..ffbf706aefaf6b 100644 --- a/libc/src/math/generic/hypotf.cpp +++ b/libc/src/math/generic/hypotf.cpp @@ -48,8 +48,8 @@ LLVM_LIBC_FUNCTION(float, hypotf, (float x, float y)) { // Correct rounding. double r_sq = result.get_val() * result.get_val(); double diff = sum_sq - r_sq; - constexpr uint64_t mask = 0x0000'0000'3FFF'FFFFULL; - uint64_t lrs = result.uintval() & mask; + constexpr uint64_t MASK = 0x0000'0000'3FFF'FFFFULL; + uint64_t lrs = result.uintval() & MASK; if (lrs == 0x0000'0000'1000'0000ULL && err < diff) { result.set_uintval(result.uintval() | 1ULL); diff --git a/libc/src/string/memory_utils/op_generic.h b/libc/src/string/memory_utils/op_generic.h index c7dbd5dd1d6cce..28243c7a18163b 100644 --- a/libc/src/string/memory_utils/op_generic.h +++ b/libc/src/string/memory_utils/op_generic.h @@ -95,10 +95,10 @@ template T load(CPtr src) { return ::LIBC_NAMESPACE::load(src); } else if constexpr (is_array_v) { using value_type = typename T::value_type; - T Value; - for (size_t I = 0; I < array_size_v; ++I) - Value[I] = load(src + (I * sizeof(value_type))); - return Value; + T value; + for (size_t i = 0; i < array_size_v; ++i) + value[i] = load(src + (i * sizeof(value_type))); + return value; } } @@ -108,8 +108,8 @@ template void store(Ptr dst, T value) { ::LIBC_NAMESPACE::store(dst, value); } else if constexpr (is_array_v) { using value_type = typename T::value_type; - for (size_t I = 0; I < array_size_v; ++I) - store(dst + (I * sizeof(value_type)), value[I]); + for (size_t i = 0; i < array_size_v; ++i) + store(dst + (i * sizeof(value_type)), value[i]); } } @@ -118,11 +118,11 @@ template T splat(uint8_t value) { if constexpr (is_scalar_v) return T(~0) / T(0xFF) * T(value); else if constexpr (is_vector_v) { - T Out; + T out; // This for loop is optimized out for vector types. for (size_t i = 0; i < sizeof(T); ++i) - Out[i] = value; - return Out; + out[i] = value; + return out; } } @@ -140,8 +140,8 @@ template struct Memset { } else if constexpr (is_array_v) { using value_type = typename T::value_type; const auto Splat = splat(value); - for (size_t I = 0; I < array_size_v; ++I) - store(dst + (I * sizeof(value_type)), Splat); + for (size_t i = 0; i < array_size_v; ++i) + store(dst + (i * sizeof(value_type)), Splat); } } diff --git a/libc/src/string/memory_utils/op_x86.h b/libc/src/string/memory_utils/op_x86.h index 2852636c48a74d..1afa91f20e6591 100644 --- a/libc/src/string/memory_utils/op_x86.h +++ b/libc/src/string/memory_utils/op_x86.h @@ -40,12 +40,12 @@ namespace LIBC_NAMESPACE::x86 { // A set of constants to check compile time features. -LIBC_INLINE_VAR constexpr bool kSse2 = LLVM_LIBC_IS_DEFINED(__SSE2__); -LIBC_INLINE_VAR constexpr bool kSse41 = LLVM_LIBC_IS_DEFINED(__SSE4_1__); -LIBC_INLINE_VAR constexpr bool kAvx = LLVM_LIBC_IS_DEFINED(__AVX__); -LIBC_INLINE_VAR constexpr bool kAvx2 = LLVM_LIBC_IS_DEFINED(__AVX2__); -LIBC_INLINE_VAR constexpr bool kAvx512F = LLVM_LIBC_IS_DEFINED(__AVX512F__); -LIBC_INLINE_VAR constexpr bool kAvx512BW = LLVM_LIBC_IS_DEFINED(__AVX512BW__); +LIBC_INLINE_VAR constexpr bool K_SSE2 = LLVM_LIBC_IS_DEFINED(__SSE2__); +LIBC_INLINE_VAR constexpr bool K_SSE41 = LLVM_LIBC_IS_DEFINED(__SSE4_1__); +LIBC_INLINE_VAR constexpr bool K_AVX = LLVM_LIBC_IS_DEFINED(__AVX__); +LIBC_INLINE_VAR constexpr bool K_AVX2 = LLVM_LIBC_IS_DEFINED(__AVX2__); +LIBC_INLINE_VAR constexpr bool K_AVX512_F = LLVM_LIBC_IS_DEFINED(__AVX512F__); +LIBC_INLINE_VAR constexpr bool K_AVX512_BW = LLVM_LIBC_IS_DEFINED(__AVX512BW__); /////////////////////////////////////////////////////////////////////////////// // Memcpy repmovsb implementation diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h index 701a84375ea8e7..6e26e0c6a54dda 100644 --- a/libc/src/string/memory_utils/utils.h +++ b/libc/src/string/memory_utils/utils.h @@ -205,9 +205,9 @@ LIBC_INLINE MemcmpReturnType cmp_neq_uint64_t(uint64_t a, uint64_t b) { // Loads bytes from memory (possibly unaligned) and materializes them as // type. template LIBC_INLINE T load(CPtr ptr) { - T Out; - memcpy_inline(&Out, ptr); - return Out; + T out; + memcpy_inline(&out, ptr); + return out; } // Stores a value of type T in memory (possibly unaligned). @@ -228,12 +228,12 @@ LIBC_INLINE ValueType load_aligned(CPtr src) { static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS))); const ValueType value = load(assume_aligned(src)); if constexpr (sizeof...(TS) > 0) { - constexpr size_t shift = sizeof(T) * 8; + constexpr size_t SHIFT = sizeof(T) * 8; const ValueType next = load_aligned(src + sizeof(T)); if constexpr (Endian::IS_LITTLE) - return value | (next << shift); + return value | (next << SHIFT); else if constexpr (Endian::IS_BIG) - return (value << shift) | next; + return (value << SHIFT) | next; else static_assert(cpp::always_false, "Invalid endianness"); } else { @@ -261,16 +261,16 @@ LIBC_INLINE auto load64_aligned(CPtr src, size_t offset) { template LIBC_INLINE void store_aligned(ValueType value, Ptr dst) { static_assert(sizeof(ValueType) >= (sizeof(T) + ... + sizeof(TS))); - constexpr size_t shift = sizeof(T) * 8; + constexpr size_t SHIFT = sizeof(T) * 8; if constexpr (Endian::IS_LITTLE) { store(assume_aligned(dst), value & ~T(0)); if constexpr (sizeof...(TS) > 0) - store_aligned(value >> shift, dst + sizeof(T)); + store_aligned(value >> SHIFT, dst + sizeof(T)); } else if constexpr (Endian::IS_BIG) { constexpr size_t OFFSET = (0 + ... + sizeof(TS)); store(assume_aligned(dst + OFFSET), value & ~T(0)); if constexpr (sizeof...(TS) > 0) - store_aligned(value >> shift, dst); + store_aligned(value >> SHIFT, dst); } else { static_assert(cpp::always_false, "Invalid endianness"); } diff --git a/libc/src/string/memory_utils/x86_64/inline_memcpy.h b/libc/src/string/memory_utils/x86_64/inline_memcpy.h index dd09d4f3e812b0..ae61b1235bd08c 100644 --- a/libc/src/string/memory_utils/x86_64/inline_memcpy.h +++ b/libc/src/string/memory_utils/x86_64/inline_memcpy.h @@ -30,11 +30,11 @@ namespace LIBC_NAMESPACE { namespace x86 { -LIBC_INLINE_VAR constexpr size_t kOneCacheline = 64; -LIBC_INLINE_VAR constexpr size_t kTwoCachelines = 2 * kOneCacheline; -LIBC_INLINE_VAR constexpr size_t kThreeCachelines = 3 * kOneCacheline; +LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE = 64; +LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES = 2 * K_ONE_CACHELINE; +LIBC_INLINE_VAR constexpr size_t K_THREE_CACHELINES = 3 * K_ONE_CACHELINE; -LIBC_INLINE_VAR constexpr bool kUseSoftwarePrefetching = +LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING = LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMCPY_X86_USE_SOFTWARE_PREFETCHING); // Whether to use rep;movsb exclusively (0), not at all (SIZE_MAX), or only @@ -42,7 +42,7 @@ LIBC_INLINE_VAR constexpr bool kUseSoftwarePrefetching = #ifndef LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE #define LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE SIZE_MAX #endif -LIBC_INLINE_VAR constexpr size_t kRepMovsbThreshold = +LIBC_INLINE_VAR constexpr size_t K_REP_MOVSB_THRESHOLD = LIBC_COPT_MEMCPY_X86_USE_REPMOVSB_FROM_SIZE; } // namespace x86 @@ -73,10 +73,10 @@ inline_memcpy_x86_avx_ge64(Ptr __restrict dst, CPtr __restrict src, inline_memcpy_x86_sse2_ge64_sw_prefetching(Ptr __restrict dst, CPtr __restrict src, size_t count) { using namespace LIBC_NAMESPACE::x86; - prefetch_to_local_cache(src + kOneCacheline); + prefetch_to_local_cache(src + K_ONE_CACHELINE); if (count <= 128) return builtin::Memcpy<64>::head_tail(dst, src, count); - prefetch_to_local_cache(src + kTwoCachelines); + prefetch_to_local_cache(src + K_TWO_CACHELINES); // Aligning 'dst' on a 32B boundary. builtin::Memcpy<32>::block(dst, src); align_to_next_boundary<32, Arg::Dst>(dst, src, count); @@ -89,22 +89,22 @@ inline_memcpy_x86_sse2_ge64_sw_prefetching(Ptr __restrict dst, // - count >= 128. if (count < 352) { // Two cache lines at a time. - while (offset + kTwoCachelines + 32 <= count) { - prefetch_to_local_cache(src + offset + kOneCacheline); - prefetch_to_local_cache(src + offset + kTwoCachelines); - builtin::Memcpy::block_offset(dst, src, offset); - offset += kTwoCachelines; + while (offset + K_TWO_CACHELINES + 32 <= count) { + prefetch_to_local_cache(src + offset + K_ONE_CACHELINE); + prefetch_to_local_cache(src + offset + K_TWO_CACHELINES); + builtin::Memcpy::block_offset(dst, src, offset); + offset += K_TWO_CACHELINES; } } else { // Three cache lines at a time. - while (offset + kThreeCachelines + 32 <= count) { - prefetch_to_local_cache(src + offset + kOneCacheline); - prefetch_to_local_cache(src + offset + kTwoCachelines); - prefetch_to_local_cache(src + offset + kThreeCachelines); + while (offset + K_THREE_CACHELINES + 32 <= count) { + prefetch_to_local_cache(src + offset + K_ONE_CACHELINE); + prefetch_to_local_cache(src + offset + K_TWO_CACHELINES); + prefetch_to_local_cache(src + offset + K_THREE_CACHELINES); // It is likely that this copy will be turned into a 'rep;movsb' on // non-AVX machines. - builtin::Memcpy::block_offset(dst, src, offset); - offset += kThreeCachelines; + builtin::Memcpy::block_offset(dst, src, offset); + offset += K_THREE_CACHELINES; } } return builtin::Memcpy<32>::loop_and_tail_offset(dst, src, count, offset); @@ -114,11 +114,11 @@ inline_memcpy_x86_sse2_ge64_sw_prefetching(Ptr __restrict dst, inline_memcpy_x86_avx_ge64_sw_prefetching(Ptr __restrict dst, CPtr __restrict src, size_t count) { using namespace LIBC_NAMESPACE::x86; - prefetch_to_local_cache(src + kOneCacheline); + prefetch_to_local_cache(src + K_ONE_CACHELINE); if (count <= 128) return builtin::Memcpy<64>::head_tail(dst, src, count); - prefetch_to_local_cache(src + kTwoCachelines); - prefetch_to_local_cache(src + kThreeCachelines); + prefetch_to_local_cache(src + K_TWO_CACHELINES); + prefetch_to_local_cache(src + K_THREE_CACHELINES); if (count < 256) return builtin::Memcpy<128>::head_tail(dst, src, count); // Aligning 'dst' on a 32B boundary. @@ -131,13 +131,13 @@ inline_memcpy_x86_avx_ge64_sw_prefetching(Ptr __restrict dst, // - we prefetched cachelines at 'src + 64', 'src + 128', and 'src + 196' // - 'dst' is 32B aligned, // - count >= 128. - while (offset + kThreeCachelines + 64 <= count) { + while (offset + K_THREE_CACHELINES + 64 <= count) { // Three cache lines at a time. - prefetch_to_local_cache(src + offset + kOneCacheline); - prefetch_to_local_cache(src + offset + kTwoCachelines); - prefetch_to_local_cache(src + offset + kThreeCachelines); - builtin::Memcpy::block_offset(dst, src, offset); - offset += kThreeCachelines; + prefetch_to_local_cache(src + offset + K_ONE_CACHELINE); + prefetch_to_local_cache(src + offset + K_TWO_CACHELINES); + prefetch_to_local_cache(src + offset + K_THREE_CACHELINES); + builtin::Memcpy::block_offset(dst, src, offset); + offset += K_THREE_CACHELINES; } return builtin::Memcpy<64>::loop_and_tail_offset(dst, src, count, offset); } @@ -145,13 +145,13 @@ inline_memcpy_x86_avx_ge64_sw_prefetching(Ptr __restrict dst, [[maybe_unused]] LIBC_INLINE void inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) { #if defined(__AVX512F__) - constexpr size_t vector_size = 64; + constexpr size_t VECTOR_SIZE = 64; #elif defined(__AVX__) - constexpr size_t vector_size = 32; + constexpr size_t VECTOR_SIZE = 32; #elif defined(__SSE2__) - constexpr size_t vector_size = 16; + constexpr size_t VECTOR_SIZE = 16; #else - constexpr size_t vector_size = 8; + constexpr size_t VECTOR_SIZE = 8; #endif if (count == 0) return; @@ -174,20 +174,20 @@ inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) { // But it's not profitable to use larger size if it's not natively // supported: we will both use more instructions and handle fewer // sizes in earlier branches. - if (vector_size >= 16 ? count < 16 : count <= 16) + if (VECTOR_SIZE >= 16 ? count < 16 : count <= 16) return builtin::Memcpy<8>::head_tail(dst, src, count); - if (vector_size >= 32 ? count < 32 : count <= 32) + if (VECTOR_SIZE >= 32 ? count < 32 : count <= 32) return builtin::Memcpy<16>::head_tail(dst, src, count); - if (vector_size >= 64 ? count < 64 : count <= 64) + if (VECTOR_SIZE >= 64 ? count < 64 : count <= 64) return builtin::Memcpy<32>::head_tail(dst, src, count); - if constexpr (x86::kAvx) { - if constexpr (x86::kUseSoftwarePrefetching) { + if constexpr (x86::K_AVX) { + if constexpr (x86::K_USE_SOFTWARE_PREFETCHING) { return inline_memcpy_x86_avx_ge64_sw_prefetching(dst, src, count); } else { return inline_memcpy_x86_avx_ge64(dst, src, count); } } else { - if constexpr (x86::kUseSoftwarePrefetching) { + if constexpr (x86::K_USE_SOFTWARE_PREFETCHING) { return inline_memcpy_x86_sse2_ge64_sw_prefetching(dst, src, count); } else { return inline_memcpy_x86_sse2_ge64(dst, src, count); @@ -198,12 +198,12 @@ inline_memcpy_x86(Ptr __restrict dst, CPtr __restrict src, size_t count) { [[maybe_unused]] LIBC_INLINE void inline_memcpy_x86_maybe_interpose_repmovsb(Ptr __restrict dst, CPtr __restrict src, size_t count) { - if constexpr (x86::kRepMovsbThreshold == 0) { + if constexpr (x86::K_REP_MOVSB_THRESHOLD == 0) { return x86::Memcpy::repmovsb(dst, src, count); - } else if constexpr (x86::kRepMovsbThreshold == SIZE_MAX) { + } else if constexpr (x86::K_REP_MOVSB_THRESHOLD == SIZE_MAX) { return inline_memcpy_x86(dst, src, count); } else { - if (LIBC_UNLIKELY(count >= x86::kRepMovsbThreshold)) + if (LIBC_UNLIKELY(count >= x86::K_REP_MOVSB_THRESHOLD)) return x86::Memcpy::repmovsb(dst, src, count); else return inline_memcpy_x86(dst, src, count); diff --git a/libc/src/string/memory_utils/x86_64/inline_memset.h b/libc/src/string/memory_utils/x86_64/inline_memset.h index 41eadf2dcc00cc..584efcbea4be37 100644 --- a/libc/src/string/memory_utils/x86_64/inline_memset.h +++ b/libc/src/string/memory_utils/x86_64/inline_memset.h @@ -18,11 +18,13 @@ namespace LIBC_NAMESPACE { namespace x86 { // Size of one cache line for software prefetching -LIBC_INLINE_VAR constexpr size_t kOneCachelineSize = 64; -LIBC_INLINE_VAR constexpr size_t kTwoCachelinesSize = kOneCachelineSize * 2; -LIBC_INLINE_VAR constexpr size_t kFiveCachelinesSize = kOneCachelineSize * 5; +LIBC_INLINE_VAR constexpr size_t K_ONE_CACHELINE_SIZE = 64; +LIBC_INLINE_VAR constexpr size_t K_TWO_CACHELINES_SIZE = + K_ONE_CACHELINE_SIZE * 2; +LIBC_INLINE_VAR constexpr size_t K_FIVE_CACHELINES_SIZE = + K_ONE_CACHELINE_SIZE * 5; -LIBC_INLINE_VAR constexpr bool kUseSoftwarePrefetchingMemset = +LIBC_INLINE_VAR constexpr bool K_USE_SOFTWARE_PREFETCHING_MEMSET = LLVM_LIBC_IS_DEFINED(LIBC_COPT_MEMSET_X86_USE_SOFTWARE_PREFETCHING); } // namespace x86 @@ -47,15 +49,15 @@ using uint512_t = cpp::array; [[maybe_unused]] LIBC_INLINE static void inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) { - constexpr size_t PREFETCH_DISTANCE = x86::kFiveCachelinesSize; - constexpr size_t PREFETCH_DEGREE = x86::kTwoCachelinesSize; + constexpr size_t PREFETCH_DISTANCE = x86::K_FIVE_CACHELINES_SIZE; + constexpr size_t PREFETCH_DEGREE = x86::K_TWO_CACHELINES_SIZE; constexpr size_t SIZE = sizeof(uint256_t); // Prefetch one cache line - prefetch_for_write(dst + x86::kOneCachelineSize); + prefetch_for_write(dst + x86::K_ONE_CACHELINE_SIZE); if (count <= 128) return generic::Memset::head_tail(dst, value, count); // Prefetch the second cache line - prefetch_for_write(dst + x86::kTwoCachelinesSize); + prefetch_for_write(dst + x86::K_TWO_CACHELINES_SIZE); // Aligned loop generic::Memset::block(dst, value); align_to_next_boundary<32>(dst, count); @@ -67,7 +69,7 @@ inline_memset_x86_gt64_sw_prefetching(Ptr dst, uint8_t value, size_t count) { while (offset + PREFETCH_DEGREE + SIZE <= count) { prefetch_for_write(dst + offset + PREFETCH_DISTANCE); prefetch_for_write(dst + offset + PREFETCH_DISTANCE + - x86::kOneCachelineSize); + x86::K_ONE_CACHELINE_SIZE); for (size_t i = 0; i < PREFETCH_DEGREE; i += SIZE, offset += SIZE) generic::Memset::block(dst + offset, value); } @@ -93,7 +95,7 @@ inline_memset_x86(Ptr dst, uint8_t value, size_t count) { return generic::Memset::head_tail(dst, value, count); if (count <= 64) return generic::Memset::head_tail(dst, value, count); - if constexpr (x86::kUseSoftwarePrefetchingMemset) + if constexpr (x86::K_USE_SOFTWARE_PREFETCHING_MEMSET) return inline_memset_x86_gt64_sw_prefetching(dst, value, count); if (count <= 128) return generic::Memset::head_tail(dst, value, count);