85 changes: 46 additions & 39 deletions libc/src/__support/str_to_integer.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,15 @@ namespace internal {

// Returns a pointer to the first character in src that is not a whitespace
// character (as determined by isspace())
LIBC_INLINE const char *first_non_whitespace(const char *__restrict src) {
while (internal::isspace(*src)) {
++src;
// TODO: Change from returning a pointer to returning a length.
LIBC_INLINE const char *
first_non_whitespace(const char *__restrict src,
size_t src_len = cpp::numeric_limits<size_t>::max()) {
size_t src_cur = 0;
while (src_cur < src_len && internal::isspace(src[src_cur])) {
++src_cur;
}
return src;
return src + src_cur;
}

LIBC_INLINE int b36_char_to_int(char input) {
Expand All @@ -38,61 +42,64 @@ LIBC_INLINE int b36_char_to_int(char input) {

// checks if the next 3 characters of the string pointer are the start of a
// hexadecimal number. Does not advance the string pointer.
LIBC_INLINE bool is_hex_start(const char *__restrict src) {
LIBC_INLINE bool
is_hex_start(const char *__restrict src,
size_t src_len = cpp::numeric_limits<size_t>::max()) {
if (src_len < 3)
return false;
return *src == '0' && (*(src + 1) | 32) == 'x' && isalnum(*(src + 2)) &&
b36_char_to_int(*(src + 2)) < 16;
}

// Takes the address of the string pointer and parses the base from the start of
// it. This function will advance |src| to the first valid digit in the inferred
// base.
LIBC_INLINE int infer_base(const char *__restrict *__restrict src) {
// it.
LIBC_INLINE int infer_base(const char *__restrict src, size_t src_len) {
// A hexadecimal number is defined as "the prefix 0x or 0X followed by a
// sequence of the decimal digits and the letters a (or A) through f (or F)
// with values 10 through 15 respectively." (C standard 6.4.4.1)
if (is_hex_start(*src)) {
(*src) += 2;
if (is_hex_start(src, src_len))
return 16;
} // An octal number is defined as "the prefix 0 optionally followed by a
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
// number that starts with 0, including just 0, is an octal number.
else if (**src == '0') {
// An octal number is defined as "the prefix 0 optionally followed by a
// sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
// number that starts with 0, including just 0, is an octal number.
if (src_len > 0 && src[0] == '0')
return 8;
} // A decimal number is defined as beginning "with a nonzero digit and
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
else {
return 10;
}
// A decimal number is defined as beginning "with a nonzero digit and
// consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
return 10;
}

// Takes a pointer to a string and the base to convert to. This function is used
// as the backend for all of the string to int functions.
template <class T>
LIBC_INLINE StrToNumResult<T> strtointeger(const char *__restrict src,
int base) {
LIBC_INLINE StrToNumResult<T>
strtointeger(const char *__restrict src, int base,
const size_t src_len = cpp::numeric_limits<size_t>::max()) {
// TODO: Rewrite to support numbers longer than long long
unsigned long long result = 0;
bool is_number = false;
const char *original_src = src;
size_t src_cur = 0;
int error_val = 0;

if (base < 0 || base == 1 || base > 36) {
error_val = EINVAL;
return {0, 0, error_val};
}
if (src_len == 0)
return {0, 0, 0};

if (base < 0 || base == 1 || base > 36)
return {0, 0, EINVAL};

src = first_non_whitespace(src);
src_cur = first_non_whitespace(src, src_len) - src;

char result_sign = '+';
if (*src == '+' || *src == '-') {
result_sign = *src;
++src;
if (src[src_cur] == '+' || src[src_cur] == '-') {
result_sign = src[src_cur];
++src_cur;
}

if (base == 0) {
base = infer_base(&src);
} else if (base == 16 && is_hex_start(src)) {
src = src + 2;
}
if (base == 0)
base = infer_base(src + src_cur, src_len - src_cur);

if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
src_cur = src_cur + 2;

constexpr bool IS_UNSIGNED = (cpp::numeric_limits<T>::min() == 0);
const bool is_positive = (result_sign == '+');
Expand All @@ -103,13 +110,13 @@ LIBC_INLINE StrToNumResult<T> strtointeger(const char *__restrict src,
unsigned long long const abs_max =
(is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
unsigned long long const abs_max_div_by_base = abs_max / base;
while (isalnum(*src)) {
int cur_digit = b36_char_to_int(*src);
while (src_cur < src_len && isalnum(src[src_cur])) {
int cur_digit = b36_char_to_int(src[src_cur]);
if (cur_digit >= base)
break;

is_number = true;
++src;
++src_cur;

// If the number has already hit the maximum value for the current type then
// the result cannot change, but we still need to advance src to the end of
Expand All @@ -133,7 +140,7 @@ LIBC_INLINE StrToNumResult<T> strtointeger(const char *__restrict src,
}
}

ptrdiff_t str_len = is_number ? (src - original_src) : 0;
ptrdiff_t str_len = is_number ? (src_cur) : 0;

if (error_val == ERANGE) {
if (is_positive || IS_UNSIGNED)
Expand Down
4 changes: 3 additions & 1 deletion libc/test/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,6 @@ add_subdirectory(__support)
add_subdirectory(ctype)
add_subdirectory(errno)
add_subdirectory(fenv)
add_subdirectory(inttypes)
add_subdirectory(math)
add_subdirectory(search)
add_subdirectory(stdbit)
Expand All @@ -50,6 +49,9 @@ add_subdirectory(stdlib)
add_subdirectory(string)
add_subdirectory(wchar)

# Depends on utilities in stdlib
add_subdirectory(inttypes)

if(${LIBC_TARGET_OS} STREQUAL "linux")
add_subdirectory(fcntl)
add_subdirectory(sched)
Expand Down
13 changes: 13 additions & 0 deletions libc/test/src/__support/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,19 @@ add_libc_test(
libc.src.errno.errno
)


add_libc_test(
str_to_integer_test
SUITE
libc-support-tests
SRCS
str_to_integer_test.cpp
DEPENDS
libc.src.__support.integer_literals
libc.src.__support.str_to_integer
libc.src.errno.errno
)

add_libc_test(
integer_to_string_test
SUITE
Expand Down
28 changes: 28 additions & 0 deletions libc/test/src/__support/high_precision_decimal_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -406,3 +406,31 @@ TEST(LlvmLibcHighPrecisionDecimalTest, BigExpTest) {
// Same, but since the number is negative the net result is -123456788
EXPECT_EQ(big_negative_hpd.get_decimal_point(), -123456789 + 1);
}

TEST(LlvmLibcHighPrecisionDecimalTest, NumLenExpTest) {
LIBC_NAMESPACE::internal::HighPrecisionDecimal hpd =
LIBC_NAMESPACE::internal::HighPrecisionDecimal("1e123456789", 5);

// The length of 5 includes things like the "e" so it only gets 3 digits of
// exponent.
EXPECT_EQ(hpd.get_decimal_point(), 123 + 1);

LIBC_NAMESPACE::internal::HighPrecisionDecimal negative_hpd =
LIBC_NAMESPACE::internal::HighPrecisionDecimal("1e-123456789", 5);

// The negative sign also counts as a character.
EXPECT_EQ(negative_hpd.get_decimal_point(), -12 + 1);
}

TEST(LlvmLibcHighPrecisionDecimalTest, NumLenDigitsTest) {
LIBC_NAMESPACE::internal::HighPrecisionDecimal hpd =
LIBC_NAMESPACE::internal::HighPrecisionDecimal("123456789e1", 5);

EXPECT_EQ(hpd.round_to_integer_type<uint64_t>(), uint64_t(12345));

LIBC_NAMESPACE::internal::HighPrecisionDecimal longer_hpd =
LIBC_NAMESPACE::internal::HighPrecisionDecimal("123456789e1", 10);

// With 10 characters it should see the e, but not actually act on it.
EXPECT_EQ(longer_hpd.round_to_integer_type<uint64_t>(), uint64_t(123456789));
}
240 changes: 240 additions & 0 deletions libc/test/src/__support/str_to_integer_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,240 @@
//===-- Unittests for str_to_integer --------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/str_to_integer.h"
#include "src/errno/libc_errno.h"
#include <stddef.h>

#include "test/UnitTest/Test.h"

// This file is for testing the src_len argument and other internal interface
// features. Primary testing is done in stdlib/StrolTest.cpp through the public
// interface.

TEST(LlvmLibcStrToIntegerTest, SimpleLength) {
auto result = LIBC_NAMESPACE::internal::strtointeger<int>("12345", 10, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
ASSERT_EQ(result.value, 12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("12345", 10, 2);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(2));
ASSERT_EQ(result.value, 12);

result = LIBC_NAMESPACE::internal::strtointeger<int>("12345", 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}

TEST(LlvmLibcStrToIntegerTest, LeadingSpaces) {
auto result =
LIBC_NAMESPACE::internal::strtointeger<int>(" 12345", 10, 15);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
ASSERT_EQ(result.value, 12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>(" 12345", 10, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
ASSERT_EQ(result.value, 12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>(" 12345", 10, 7);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 12);

result = LIBC_NAMESPACE::internal::strtointeger<int>(" 12345", 10, 5);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);

result = LIBC_NAMESPACE::internal::strtointeger<int>(" 12345", 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}

TEST(LlvmLibcStrToIntegerTest, LeadingSign) {
auto result = LIBC_NAMESPACE::internal::strtointeger<int>("+12345", 10, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("-12345", 10, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, -12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("+12345", 10, 6);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("-12345", 10, 6);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, -12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("+12345", 10, 3);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
ASSERT_EQ(result.value, 12);

result = LIBC_NAMESPACE::internal::strtointeger<int>("-12345", 10, 3);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
ASSERT_EQ(result.value, -12);

result = LIBC_NAMESPACE::internal::strtointeger<int>("+12345", 10, 1);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);

result = LIBC_NAMESPACE::internal::strtointeger<int>("-12345", 10, 1);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);

result = LIBC_NAMESPACE::internal::strtointeger<int>("+12345", 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);

result = LIBC_NAMESPACE::internal::strtointeger<int>("-12345", 10, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}

TEST(LlvmLibcStrToIntegerTest, Base16PrefixAutoSelect) {
auto result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 0, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 0x12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 0, 7);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 0x12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 0, 5);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
ASSERT_EQ(result.value, 0x123);

result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 0, 2);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
ASSERT_EQ(result.value, 0);

result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 0, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}

TEST(LlvmLibcStrToIntegerTest, Base16PrefixManualSelect) {
auto result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 16, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 0x12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 16, 7);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
ASSERT_EQ(result.value, 0x12345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 16, 5);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
ASSERT_EQ(result.value, 0x123);

result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 16, 2);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
ASSERT_EQ(result.value, 0);

result = LIBC_NAMESPACE::internal::strtointeger<int>("0x12345", 16, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}

TEST(LlvmLibcStrToIntegerTest, Base8PrefixAutoSelect) {
auto result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 0, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 012345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 0, 6);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 012345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 0, 4);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
ASSERT_EQ(result.value, 0123);

result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 0, 1);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
ASSERT_EQ(result.value, 0);

result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 0, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}

TEST(LlvmLibcStrToIntegerTest, Base8PrefixManualSelect) {
auto result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 8, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 012345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 8, 6);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 012345);

result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 8, 4);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
ASSERT_EQ(result.value, 0123);

result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 8, 1);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
ASSERT_EQ(result.value, 0);

result = LIBC_NAMESPACE::internal::strtointeger<int>("012345", 8, 0);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
ASSERT_EQ(result.value, 0);
}

TEST(LlvmLibcStrToIntegerTest, CombinedTests) {
auto result =
LIBC_NAMESPACE::internal::strtointeger<int>(" -0x123", 0, 10);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
ASSERT_EQ(result.value, -0x123);

result = LIBC_NAMESPACE::internal::strtointeger<int>(" -0x123", 0, 8);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(8));
ASSERT_EQ(result.value, -0x1);

result = LIBC_NAMESPACE::internal::strtointeger<int>(" -0x123", 0, 7);
EXPECT_FALSE(result.has_error());
EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
ASSERT_EQ(result.value, 0);
}