-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[libc] Use function overloads to make string parsing code more generic. #167417
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…t strings more generic. ctype_utils/wctype_utils were chaged in 120689e and e7f7973, respectively to operate on char/wchar_t. Now we can switch to the overloaded names (e.g. have noth `isspace(char` and `isspace(wchar_t)`) to simplify the templatized strtointeger implementation from 315dfe5 and make it easier to potentially add templatized strtofloat implementation.
|
@llvm/pr-subscribers-libc Author: Alexey Samsonov (vonosmas) Changesctype_utils/wctype_utils were chaged in 120689e and e7f7973, respectively to operate on char/wchar_t. Now we can switch to the overloaded names (e.g. have noth Full diff: https://github.com/llvm/llvm-project/pull/167417.diff 4 Files Affected:
diff --git a/libc/src/__support/ctype_utils.h b/libc/src/__support/ctype_utils.h
index 61b7a0aeb5b67..b0f9ece5d1a25 100644
--- a/libc/src/__support/ctype_utils.h
+++ b/libc/src/__support/ctype_utils.h
@@ -578,6 +578,13 @@ LIBC_INLINE static constexpr bool isgraph(char ch) {
return 0x20 < ch && ch < 0x7f;
}
+// An overload which provides a way to compare input with specific character
+// values, when input can be of a regular or a wide character type.
+LIBC_INLINE static constexpr bool is_one_of(char ch, char c_value,
+ [[maybe_unused]] wchar_t) {
+ return (ch == c_value);
+}
+
} // namespace internal
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/__support/str_to_integer.h b/libc/src/__support/str_to_integer.h
index ba3f49fa2f47b..f9154ffc90c7e 100644
--- a/libc/src/__support/str_to_integer.h
+++ b/libc/src/__support/str_to_integer.h
@@ -31,22 +31,14 @@ namespace LIBC_NAMESPACE_DECL {
namespace internal {
// Returns the idx to the first character in src that is not a whitespace
-// character (as determined by isspace() / iswspace())
+// character (as determined by isspace())
template <typename CharType>
LIBC_INLINE size_t
first_non_whitespace(const CharType *__restrict src,
size_t src_len = cpp::numeric_limits<size_t>::max()) {
size_t src_cur = 0;
- while (src_cur < src_len) {
- if constexpr (cpp::is_same_v<CharType, char>) {
- if (!internal::isspace(src[src_cur]))
- break;
- } else {
- if (!internal::iswspace(src[src_cur]))
- break;
- }
- ++src_cur;
- }
+ for (; src_cur < src_len && internal::isspace(src[src_cur]); ++src_cur)
+ ;
return src_cur;
}
@@ -54,11 +46,11 @@ first_non_whitespace(const CharType *__restrict src,
// plus sign, minus sign, or neither.
template <typename CharType>
LIBC_INLINE static int get_sign(const CharType *__restrict src) {
- if constexpr (cpp::is_same_v<CharType, char>) {
- return (src[0] == '+') ? 1 : (src[0] == '-' ? -1 : 0);
- } else {
- return (src[0] == L'+') ? 1 : (src[0] == L'-' ? -1 : 0);
- }
+ if (is_one_of(src[0], '+', L'+'))
+ return 1;
+ if (is_one_of(src[0], '-', L'-'))
+ return -1;
+ return 0;
}
// checks if the next 3 characters of the string pointer are the start of a
@@ -68,13 +60,9 @@ LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
size_t src_len) {
if (src_len < 3)
return false;
- if constexpr (cpp::is_same_v<CharType, char>) {
- return src[0] == '0' && tolower(src[1]) == 'x' && isalnum(src[2]) &&
- b36_char_to_int(src[2]) < 16;
- } else {
- return src[0] == L'0' && towlower(src[1]) == L'x' && iswalnum(src[2]) &&
- b36_wchar_to_int(src[2]) < 16;
- }
+ return is_one_of(src[0], '0', L'0') &&
+ is_one_of(tolower(src[1]), 'x', L'x') && isalnum(src[2]) &&
+ b36_char_to_int(src[2]) < 16;
}
// Takes the address of the string pointer and parses the base from the start of
@@ -90,14 +78,8 @@ LIBC_INLINE static int infer_base(const CharType *__restrict src,
// An octal number is defined as "the prefix 0 optionally followed by a
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
// number that starts with 0, including just 0, is an octal number.
- if (src_len > 0) {
- if constexpr (cpp::is_same_v<CharType, char>) {
- if (src[0] == '0')
- return 8;
- } else {
- if (src[0] == L'0')
- return 8;
- }
+ if (src_len > 0 && internal::is_one_of(src[0], '0', L'0')) {
+ return 8;
}
// A decimal number is defined as beginning "with a nonzero digit and
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
@@ -150,18 +132,8 @@ strtointeger(const CharType *__restrict src, int base,
bool is_number = false;
int error_val = 0;
ResultType result = 0;
- while (src_cur < src_len) {
- int cur_digit;
- if constexpr (cpp::is_same_v<CharType, char>) {
- if (!isalnum(src[src_cur]))
- break;
- cur_digit = b36_char_to_int(src[src_cur]);
- } else {
- if (!iswalnum(src[src_cur]))
- break;
- cur_digit = b36_wchar_to_int(src[src_cur]);
- }
-
+ while (src_cur < src_len && isalnum(src[src_cur])) {
+ int cur_digit = b36_char_to_int(src[src_cur]);
if (cur_digit >= base)
break;
diff --git a/libc/src/__support/wctype_utils.h b/libc/src/__support/wctype_utils.h
index 60b6afb928475..f04efd21db595 100644
--- a/libc/src/__support/wctype_utils.h
+++ b/libc/src/__support/wctype_utils.h
@@ -31,7 +31,7 @@ namespace internal {
// Similarly, do not change these fumarks to show your new solution is faster,
// as well as a way to support non-Anctions to use case ranges. e.g.
-// bool iswlower(wchar_t ch) {
+// bool islower(wchar_t ch) {
// switch(ch) {
// case L'a'...L'z':
// return true;
@@ -41,7 +41,7 @@ namespace internal {
// EBCDIC. Technically we could use some smaller ranges, but that's even harder
// to read.
-LIBC_INLINE static constexpr bool iswlower(wchar_t wch) {
+LIBC_INLINE static constexpr bool islower(wchar_t wch) {
switch (wch) {
case L'a':
case L'b':
@@ -75,7 +75,7 @@ LIBC_INLINE static constexpr bool iswlower(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr bool iswupper(wchar_t wch) {
+LIBC_INLINE static constexpr bool isupper(wchar_t wch) {
switch (wch) {
case L'A':
case L'B':
@@ -109,7 +109,7 @@ LIBC_INLINE static constexpr bool iswupper(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) {
+LIBC_INLINE static constexpr bool isdigit(wchar_t wch) {
switch (wch) {
case L'0':
case L'1':
@@ -127,7 +127,7 @@ LIBC_INLINE static constexpr bool iswdigit(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t tolower(wchar_t wch) {
switch (wch) {
case L'A':
return L'a';
@@ -186,7 +186,7 @@ LIBC_INLINE static constexpr wchar_t towlower(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) {
+LIBC_INLINE static constexpr wchar_t toupper(wchar_t wch) {
switch (wch) {
case L'a':
return L'A';
@@ -245,7 +245,7 @@ LIBC_INLINE static constexpr wchar_t towupper(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalpha(wchar_t wch) {
switch (wch) {
case L'a':
case L'b':
@@ -305,7 +305,7 @@ LIBC_INLINE static constexpr bool iswalpha(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) {
+LIBC_INLINE static constexpr bool isalnum(wchar_t wch) {
switch (wch) {
case L'a':
case L'b':
@@ -375,7 +375,7 @@ LIBC_INLINE static constexpr bool iswalnum(wchar_t wch) {
}
}
-LIBC_INLINE static constexpr int b36_wchar_to_int(wchar_t wch) {
+LIBC_INLINE static constexpr int b36_char_to_int(wchar_t wch) {
switch (wch) {
case L'0':
return 0;
@@ -563,7 +563,7 @@ LIBC_INLINE static constexpr wchar_t int_to_b36_wchar(int num) {
}
}
-LIBC_INLINE static constexpr bool iswspace(wchar_t wch) {
+LIBC_INLINE static constexpr bool isspace(wchar_t wch) {
switch (wch) {
case L' ':
case L'\t':
@@ -577,6 +577,13 @@ LIBC_INLINE static constexpr bool iswspace(wchar_t wch) {
}
}
+// An overload which provides a way to compare input with specific character
+// values, when input can be of a regular or a wide character type.
+LIBC_INLINE static constexpr bool is_one_of(wchar_t ch, [[maybe_unused]] char,
+ wchar_t wc_value) {
+ return (ch == wc_value);
+}
+
// ------------------------------------------------------
// Rationale: Since these classification functions are
// called in other functions, we will avoid the overhead
diff --git a/libc/src/wctype/iswalpha.cpp b/libc/src/wctype/iswalpha.cpp
index e151363b88d0b..01ceac8f68b23 100644
--- a/libc/src/wctype/iswalpha.cpp
+++ b/libc/src/wctype/iswalpha.cpp
@@ -15,7 +15,7 @@
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(int, iswalpha, (wint_t c)) {
- return internal::iswalpha(static_cast<wchar_t>(c));
+ return internal::isalpha(static_cast<wchar_t>(c));
}
} // namespace LIBC_NAMESPACE_DECL
|
michaelrj-google
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, with a nit on naming
ctype_utils/wctype_utils were chaged in 120689e and e7f7973, respectively to operate on char/wchar_t. Now we can switch to the overloaded names (e.g. have noth
isspace(charandisspace(wchar_t)) to simplify the templatized strtointeger implementation from 315dfe5 and make it easier to potentially add templatized strtofloat implementation.