From 2c3cac28ab48576def27c32d75ca70fc96c81784 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Wed, 5 Nov 2025 08:53:33 -0800 Subject: [PATCH 1/3] Fix stale char_ptr for find_first_character_wide read On exit from the loop, char_ptr had not been updated to match block_ptr, resulting in erroneous results. Moving all updates out of the loop fixes that. --- libc/src/string/string_utils.h | 10 +++++----- libc/test/src/string/memchr_test.cpp | 5 +++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 7feef56fb3676..c9a720bef98a0 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -136,11 +136,11 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch, const Word ch_mask = repeat_byte(ch); // Step 2: read blocks - for (const Word *block_ptr = reinterpret_cast(char_ptr); - !has_zeroes((*block_ptr) ^ ch_mask) && cur < n; - ++block_ptr, cur += sizeof(Word)) { - char_ptr = reinterpret_cast(block_ptr); - } + const Word *block_ptr = reinterpret_cast(char_ptr); + for (; !has_zeroes((*block_ptr) ^ ch_mask) && cur < n; + ++block_ptr, cur += sizeof(Word)) + ; + char_ptr = reinterpret_cast(block_ptr); // Step 3: find the match in the block for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) { diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp index ede841118fe03..1db5ecaed40cd 100644 --- a/libc/test/src/string/memchr_test.cpp +++ b/libc/test/src/string/memchr_test.cpp @@ -21,6 +21,11 @@ const char *call_memchr(const void *src, int c, size_t size) { return reinterpret_cast(LIBC_NAMESPACE::memchr(src, c, size)); } +TEST(LlvmLibcMemChrTest, FromProtoC) { + const char *src = "protobuf_cpp_version$\n"; + ASSERT_STREQ(call_memchr(src, '$', 22), "$\n"); +} + TEST(LlvmLibcMemChrTest, FindsCharacterAfterNullTerminator) { // memchr should continue searching after a null terminator. const size_t size = 5; From 1f8ef4b4e8f490ea1ee828d4ec529513002e51a0 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Wed, 5 Nov 2025 09:44:03 -0800 Subject: [PATCH 2/3] Improve test. --- libc/test/src/string/memchr_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/test/src/string/memchr_test.cpp b/libc/test/src/string/memchr_test.cpp index 1db5ecaed40cd..a92c5fe80be98 100644 --- a/libc/test/src/string/memchr_test.cpp +++ b/libc/test/src/string/memchr_test.cpp @@ -21,8 +21,8 @@ const char *call_memchr(const void *src, int c, size_t size) { return reinterpret_cast(LIBC_NAMESPACE::memchr(src, c, size)); } -TEST(LlvmLibcMemChrTest, FromProtoC) { - const char *src = "protobuf_cpp_version$\n"; +TEST(LlvmLibcMemChrTest, WideReadMultiIteration) { + const char *src = "abcdefghijklmnopqrst$\n"; ASSERT_STREQ(call_memchr(src, '$', 22), "$\n"); } From e55a8138f49f932c5547fbcd7594bc4267cf4147 Mon Sep 17 00:00:00 2001 From: Sterling Augustine Date: Wed, 5 Nov 2025 12:53:16 -0800 Subject: [PATCH 3/3] [libc] Do bounds-check before any dereference --- libc/src/string/string_utils.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index c9a720bef98a0..cbce62ead0328 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -127,8 +127,8 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch, size_t cur = 0; // Step 1: read 1 byte at a time to align to block size - for (; reinterpret_cast(char_ptr) % sizeof(Word) != 0 && cur < n; - ++char_ptr, ++cur) { + for (; cur < n && reinterpret_cast(char_ptr) % sizeof(Word) != 0; + ++cur, ++char_ptr) { if (*char_ptr == ch) return const_cast(char_ptr); } @@ -137,17 +137,17 @@ find_first_character_wide_read(const unsigned char *src, unsigned char ch, // Step 2: read blocks const Word *block_ptr = reinterpret_cast(char_ptr); - for (; !has_zeroes((*block_ptr) ^ ch_mask) && cur < n; - ++block_ptr, cur += sizeof(Word)) + for (; cur < n && !has_zeroes((*block_ptr) ^ ch_mask); + cur += sizeof(Word), ++block_ptr) ; char_ptr = reinterpret_cast(block_ptr); // Step 3: find the match in the block - for (; *char_ptr != ch && cur < n; ++char_ptr, ++cur) { + for (; cur < n && *char_ptr != ch; ++cur, ++char_ptr) { ; } - if (*char_ptr != ch || cur >= n) + if (cur >= n || *char_ptr != ch) return static_cast(nullptr); return const_cast(char_ptr);