diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h index 26e9adde0d66e..10803488b6cf5 100644 --- a/libc/src/string/string_utils.h +++ b/libc/src/string/string_utils.h @@ -212,28 +212,28 @@ LIBC_INLINE char *string_token(char *__restrict src, static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits"); cpp::bitset<256> delims; for (; *delimiter_string != '\0'; ++delimiter_string) - delims.set(static_cast(*delimiter_string)); + delims.set(*reinterpret_cast(delimiter_string)); - char *tok_start = src; + unsigned char *tok_start = reinterpret_cast(src); if constexpr (SkipDelim) - while (*tok_start != '\0' && delims.test(static_cast(*tok_start))) + while (*tok_start != '\0' && delims.test(*tok_start)) ++tok_start; if (*tok_start == '\0' && SkipDelim) { *context = nullptr; return nullptr; } - char *tok_end = tok_start; - while (*tok_end != '\0' && !delims.test(static_cast(*tok_end))) + unsigned char *tok_end = tok_start; + while (*tok_end != '\0' && !delims.test(*tok_end)) ++tok_end; if (*tok_end == '\0') { *context = nullptr; } else { *tok_end = '\0'; - *context = tok_end + 1; + *context = reinterpret_cast(tok_end + 1); } - return tok_start; + return reinterpret_cast(tok_start); } LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src, diff --git a/libc/test/src/string/strcspn_test.cpp b/libc/test/src/string/strcspn_test.cpp index d83b3cf4fdfe8..ec98f72e37113 100644 --- a/libc/test/src/string/strcspn_test.cpp +++ b/libc/test/src/string/strcspn_test.cpp @@ -48,3 +48,7 @@ TEST(LlvmLibcStrCSpnTest, DuplicatedCharactersNotPartOfComplementarySpan) { EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "aa"), size_t{0}); EXPECT_EQ(LIBC_NAMESPACE::strcspn("aaaa", "baa"), size_t{0}); } + +TEST(LlvmLibcStrCSpnTest, TopBitSet) { + EXPECT_EQ(LIBC_NAMESPACE::strcspn("hello\x80world", "\x80"), size_t{5}); +} diff --git a/libc/test/src/string/strpbrk_test.cpp b/libc/test/src/string/strpbrk_test.cpp index fbe14da12ac10..cc802460d10be 100644 --- a/libc/test/src/string/strpbrk_test.cpp +++ b/libc/test/src/string/strpbrk_test.cpp @@ -60,3 +60,7 @@ TEST(LlvmLibcStrPBrkTest, FindsFirstOfRepeated) { TEST(LlvmLibcStrPBrkTest, FindsFirstInBreakset) { EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("12345", "34"), "345"); } + +TEST(LlvmLibcStrPBrkTest, TopBitSet) { + EXPECT_STREQ(LIBC_NAMESPACE::strpbrk("hello\x80world", "\x80 "), "\x80world"); +} diff --git a/libc/test/src/string/strsep_test.cpp b/libc/test/src/string/strsep_test.cpp index e2a5d52bbeddb..553edd99604ef 100644 --- a/libc/test/src/string/strsep_test.cpp +++ b/libc/test/src/string/strsep_test.cpp @@ -61,6 +61,14 @@ TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) { ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr); } +TEST(LlvmLibcStrsepTest, TopBitSet) { + char top_bit_set_str[] = "hello\x80world"; + char *p = top_bit_set_str; + ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "hello"); + ASSERT_STREQ(LIBC_NAMESPACE::strsep(&p, "\x80"), "world"); + ASSERT_EQ(LIBC_NAMESPACE::strsep(&p, "\x80"), nullptr); +} + #if defined(LIBC_ADD_NULL_CHECKS) TEST(LlvmLibcStrsepTest, CrashOnNullPtr) { diff --git a/libc/test/src/string/strspn_test.cpp b/libc/test/src/string/strspn_test.cpp index 82f9b2aef0dfd..813612f09fc16 100644 --- a/libc/test/src/string/strspn_test.cpp +++ b/libc/test/src/string/strspn_test.cpp @@ -85,6 +85,10 @@ TEST(LlvmLibcStrSpnTest, DuplicatedCharactersToBeSearchedForShouldStillMatch) { EXPECT_EQ(LIBC_NAMESPACE::strspn("aaaa", "aa"), size_t{4}); } +TEST(LlvmLibcStrSpnTest, TopBitSet) { + EXPECT_EQ(LIBC_NAMESPACE::strspn("hello\x80world", "helo\x80rld"), size_t{6}); +} + #if defined(LIBC_ADD_NULL_CHECKS) TEST(LlvmLibcStrSpnTest, CrashOnNullPtr) { diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp index a19390d0b0c2d..8c4d3c362f778 100644 --- a/libc/test/src/string/strtok_r_test.cpp +++ b/libc/test/src/string/strtok_r_test.cpp @@ -131,3 +131,11 @@ TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) { ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr); ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr); } + +TEST(LlvmLibcStrTokReentrantTest, TopBitSet) { + char top_bit_set_str[] = "hello\x80world"; + char *p; + ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(top_bit_set_str, "\x80", &p), "hello"); + ASSERT_STREQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), "world"); + ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, "\x80", &p), nullptr); +} diff --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp index 76efeddda6f4a..3c097fdee0713 100644 --- a/libc/test/src/string/strtok_test.cpp +++ b/libc/test/src/string/strtok_test.cpp @@ -83,3 +83,10 @@ TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) { ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr); ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr); } + +TEST(LlvmLibcStrTokTest, TopBitSet) { + char top_bit_set_str[] = "hello\x80world"; + ASSERT_STREQ(LIBC_NAMESPACE::strtok(top_bit_set_str, "\x80"), "hello"); + ASSERT_STREQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), "world"); + ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, "\x80"), nullptr); +} diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index b2cd3fdd468af..acfd0d96a28bf 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -5251,6 +5251,16 @@ libc_function( ], ) +libc_function( + name = "strtok_r", + srcs = ["src/string/strtok_r.cpp"], + hdrs = ["src/string/strtok_r.h"], + deps = [ + ":__support_common", + ":string_utils", + ], +) + ################################ fcntl targets ################################# libc_function( diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel index d90992417a721..1a95dece8bf20 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel @@ -59,6 +59,14 @@ libc_test( ], ) +libc_test( + name = "strpbrk_test", + srcs = ["strpbrk_test.cpp"], + deps = [ + "//libc:strpbrk", + ], +) + libc_test( name = "strsep_test", srcs = ["strsep_test.cpp"], @@ -127,6 +135,14 @@ libc_test( ], ) +libc_test( + name = "strtok_r_test", + srcs = ["strtok_r_test.cpp"], + deps = [ + "//libc:strtok_r", + ], +) + libc_test_library( name = "memory_check_utils", hdrs = ["memory_utils/memory_check_utils.h"],