Skip to content

Conversation

@vonosmas
Copy link
Contributor

ctype_utils/wctype_utils were chaged in 120689e and e7f7973, respectively to operate on char/wchar_t. Now we can switch to the overloaded names (e.g. have noth isspace(char and isspace(wchar_t)) to simplify the templatized strtointeger implementation from 315dfe5 and make it easier to potentially add templatized strtofloat implementation.

…t strings more generic.

ctype_utils/wctype_utils were chaged in 120689e
and e7f7973, respectively to operate on char/wchar_t. Now we can switch to the overloaded names
(e.g. have noth `isspace(char` and `isspace(wchar_t)`) to
simplify the templatized strtointeger implementation
from 315dfe5 and make
it easier to potentially add templatized strtofloat implementation.
@llvmbot
Copy link
Member

llvmbot commented Nov 10, 2025

@llvm/pr-subscribers-libc

Author: Alexey Samsonov (vonosmas)

Changes

ctype_utils/wctype_utils were chaged in 120689e and e7f7973, respectively to operate on char/wchar_t. Now we can switch to the overloaded names (e.g. have noth isspace(char and isspace(wchar_t)) to simplify the templatized strtointeger implementation from 315dfe5 and make it easier to potentially add templatized strtofloat implementation.


Full diff: https://github.com/llvm/llvm-project/pull/167417.diff

4 Files Affected:

  • (modified) libc/src/__support/ctype_utils.h (+7)
  • (modified) libc/src/__support/str_to_integer.h (+15-43)
  • (modified) libc/src/__support/wctype_utils.h (+17-10)
  • (modified) libc/src/wctype/iswalpha.cpp (+1-1)
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 61b7a0aeb5b67..b0f9ece5d1a25 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -578,6 +578,13 @@ LIBC_INLINE static constexpr bool isgraph(char ch) {
   return 0x20 < ch && ch < 0x7f;
 }
 
+// An overload which provides a way to compare input with specific character
+// values, when input can be of a regular or a wide character type.
+LIBC_INLINE static constexpr bool is_one_of(char ch, char c_value,
+                                            [[maybe_unused]] wchar_t) {
+  return (ch == c_value);
+}
+
 } // namespace internal
 } // namespace LIBC_NAMESPACE_DECL
 
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index ba3f49fa2f47b..f9154ffc90c7e 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -31,22 +31,14 @@ namespace LIBC_NAMESPACE_DECL {
 namespace internal {
 
 // Returns the idx to the first character in src that is not a whitespace
-// character (as determined by isspace() / iswspace())
+// character (as determined by isspace())
 template <typename CharType>
 LIBC_INLINE size_t
 first_non_whitespace(const CharType *__restrict src,
                      size_t src_len = cpp::numeric_limits<size_t>::max()) {
   size_t src_cur = 0;
-  while (src_cur < src_len) {
-    if constexpr (cpp::is_same_v<CharType, char>) {
-      if (!internal::isspace(src[src_cur]))
-        break;
-    } else {
-      if (!internal::iswspace(src[src_cur]))
-        break;
-    }
-    ++src_cur;
-  }
+  for (; src_cur < src_len && internal::isspace(src[src_cur]); ++src_cur)
+    ;
   return src_cur;
 }
 
@@ -54,11 +46,11 @@ first_non_whitespace(const CharType *__restrict src,
 // plus sign, minus sign, or neither.
 template <typename CharType>
 LIBC_INLINE static int get_sign(const CharType *__restrict src) {
-  if constexpr (cpp::is_same_v<CharType, char>) {
-    return (src[0] == '+') ? 1 : (src[0] == '-' ? -1 : 0);
-  } else {
-    return (src[0] == L'+') ? 1 : (src[0] == L'-' ? -1 : 0);
-  }
+  if (is_one_of(src[0], '+', L'+'))
+    return 1;
+  if (is_one_of(src[0], '-', L'-'))
+    return -1;
+  return 0;
 }
 
 // checks if the next 3 characters of the string pointer are the start of a
@@ -68,13 +60,9 @@ LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
                                      size_t src_len) {
   if (src_len < 3)
     return false;
-  if constexpr (cpp::is_same_v<CharType, char>) {
-    return src[0] == '0' && tolower(src[1]) == 'x' && isalnum(src[2]) &&
-           b36_char_to_int(src[2]) < 16;
-  } else {
-    return src[0] == L'0' && towlower(src[1]) == L'x' && iswalnum(src[2]) &&
-           b36_wchar_to_int(src[2]) < 16;
-  }
+  return is_one_of(src[0], '0', L'0') &&
+         is_one_of(tolower(src[1]), 'x', L'x') && isalnum(src[2]) &&
+         b36_char_to_int(src[2]) < 16;
 }
 
 // Takes the address of the string pointer and parses the base from the start of
@@ -90,14 +78,8 @@ LIBC_INLINE static int infer_base(const CharType *__restrict src,
   // An octal number is defined as "the prefix 0 optionally followed by a
   // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
   // number that starts with 0, including just 0, is an octal number.
-  if (src_len > 0) {
-    if constexpr (cpp::is_same_v<CharType, char>) {
-      if (src[0] == '0')
-        return 8;
-    } else {
-      if (src[0] == L'0')
-        return 8;
-    }
+  if (src_len > 0 && internal::is_one_of(src[0], '0', L'0')) {
+    return 8;
   }
   // A decimal number is defined as beginning "with a nonzero digit and
   // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
@@ -150,18 +132,8 @@ strtointeger(const CharType *__restrict src, int base,
   bool is_number = false;
   int error_val = 0;
   ResultType result = 0;
-  while (src_cur < src_len) {
-    int cur_digit;
-    if constexpr (cpp::is_same_v<CharType, char>) {
-      if (!isalnum(src[src_cur]))
-        break;
-      cur_digit = b36_char_to_int(src[src_cur]);
-    } else {
-      if (!iswalnum(src[src_cur]))
-        break;
-      cur_digit = b36_wchar_to_int(src[src_cur]);
-    }
-
+  while (src_cur < src_len && isalnum(src[src_cur])) {
+    int cur_digit = b36_char_to_int(src[src_cur]);
     if (cur_digit >= base)
       break;
 
diff --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h
index 60b6afb928475..f04efd21db595 100644
--- a/libc/src/__support/wctype_utils.h
+++ b/libc/src/__support/wctype_utils.h
@@ -31,7 +31,7 @@ namespace internal {
 
 // Similarly, do not change these fumarks to show your new solution is faster,
 // as well as a way to support non-Anctions to use case ranges. e.g.
-//  bool iswlower(wchar_t ch) {
+//  bool islower(wchar_t ch) {
 //    switch(ch) {
 //    case L'a'...L'z':
 //      return true;
@@ -41,7 +41,7 @@ namespace internal {
 // EBCDIC. Technically we could use some smaller ranges, but that's even harder
 // to read.
 
-LIBC_INLINE static constexpr bool iswlower(wchar_t wch) {
+LIBC_INLINE static constexpr bool islower(wchar_t wch) {
   switch (wch) {
   case L'a':
   case L'b':
@@ -75,7 +75,7 @@ LIBC_INLINE static constexpr bool iswlower(wchar_t wch) {
   }
 }
 
-LIBC_INLINE static constexpr bool iswupper(wchar_t wch) {
+LIBC_INLINE static constexpr bool isupper(wchar_t wch) {
   switch (wch) {
   case L'A':
   case L'B':
@@ -109,7 +109,7 @@ LIBC_INLINE static constexpr bool iswupper(wchar_t wch) {
   }
 }
 
-LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) {
+LIBC_INLINE static constexpr bool isdigit(wchar_t wch) {
   switch (wch) {
   case L'0':
   case L'1':
@@ -127,7 +127,7 @@ LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) {
   }
 }
 
-LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t tolower(wchar_t wch) {
   switch (wch) {
   case L'A':
     return L'a';
@@ -186,7 +186,7 @@ LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) {
   }
 }
 
-LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t toupper(wchar_t wch) {
   switch (wch) {
   case L'a':
     return L'A';
@@ -245,7 +245,7 @@ LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) {
   }
 }
 
-LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalpha(wchar_t wch) {
   switch (wch) {
   case L'a':
   case L'b':
@@ -305,7 +305,7 @@ LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) {
   }
 }
 
-LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalnum(wchar_t wch) {
   switch (wch) {
   case L'a':
   case L'b':
@@ -375,7 +375,7 @@ LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) {
   }
 }
 
-LIBC_INLINE static constexpr int b36_wchar_to_int(wchar_t wch) {
+LIBC_INLINE static constexpr int b36_char_to_int(wchar_t wch) {
   switch (wch) {
   case L'0':
     return 0;
@@ -563,7 +563,7 @@ LIBC_INLINE static constexpr wchar_t int_to_b36_wchar(int num) {
   }
 }
 
-LIBC_INLINE static constexpr bool iswspace(wchar_t wch) {
+LIBC_INLINE static constexpr bool isspace(wchar_t wch) {
   switch (wch) {
   case L' ':
   case L'\t':
@@ -577,6 +577,13 @@ LIBC_INLINE static constexpr bool iswspace(wchar_t wch) {
   }
 }
 
+// An overload which provides a way to compare input with specific character
+// values, when input can be of a regular or a wide character type.
+LIBC_INLINE static constexpr bool is_one_of(wchar_t ch, [[maybe_unused]] char,
+                                            wchar_t wc_value) {
+  return (ch == wc_value);
+}
+
 // ------------------------------------------------------
 // Rationale: Since these classification functions are
 // called in other functions, we will avoid the overhead
diff --git a/libc/src/wctype/iswalpha.cpp b/libc/src/wctype/iswalpha.cpp
index e151363b88d0b..01ceac8f68b23 100644
--- a/libc/src/wctype/iswalpha.cpp
+++ b/libc/src/wctype/iswalpha.cpp
@@ -15,7 +15,7 @@
 namespace LIBC_NAMESPACE_DECL {
 
 LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) {
-  return internal::iswalpha(static_cast<wchar_t>(c));
+  return internal::isalpha(static_cast<wchar_t>(c));
 }
 
 } // namespace LIBC_NAMESPACE_DECL

Copy link
Contributor

@michaelrj-google michaelrj-google left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM, with a nit on naming

@vonosmas vonosmas merged commit 124bfdf into llvm:main Nov 12, 2025
20 checks passed
@vonosmas vonosmas deleted the strtointeger-fix branch November 12, 2025 06:52
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants