diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 8a46a7a1baae3..d3bcad470b3e1 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -398,6 +398,7 @@ set(TARGET_LIBC_ENTRYPOINTS libc.src.wchar.wmemchr libc.src.wchar.wcpcpy libc.src.wchar.wcpncpy + libc.src.wchar.wcstof libc.src.wchar.wcstok libc.src.wchar.wcstol libc.src.wchar.wcstoll diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index fb5b19b523b31..faceb9bb4e12d 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -360,3 +360,10 @@ functions: - type: const wchar_t *__restrict - type: wchar_t **__restrict - type: int + - name: wcstof + standards: + - stdc + return_type: float + arguments: + - type: const wchar_t *__restrict + - type: wchar_t **__restrict diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt index 96874702b1fdf..d33e7ae45c068 100644 --- a/libc/src/__support/CMakeLists.txt +++ b/libc/src/__support/CMakeLists.txt @@ -221,7 +221,9 @@ add_header_library( HDRS high_precision_decimal.h DEPENDS + .ctype_utils .str_to_integer + .wctype_utils libc.hdr.stdint_proxy ) @@ -236,6 +238,7 @@ add_header_library( .str_to_integer .str_to_num_result .uint128 + .wctype_utils libc.hdr.errno_macros libc.hdr.stdint_proxy libc.src.__support.common diff --git a/libc/src/__support/high_precision_decimal.h b/libc/src/__support/high_precision_decimal.h index 08af78602d2ab..75f2a7607b425 100644 --- a/libc/src/__support/high_precision_decimal.h +++ b/libc/src/__support/high_precision_decimal.h @@ -20,6 +20,7 @@ #include "src/__support/ctype_utils.h" #include "src/__support/macros/config.h" #include "src/__support/str_to_integer.h" +#include "src/__support/wctype_utils.h" namespace LIBC_NAMESPACE_DECL { namespace internal { @@ -38,6 +39,24 @@ struct LShiftTableEntry { // TODO: Figure out where to put this. enum class RoundDirection { Up, Down, Nearest }; +// These constants are used in both this file and in the main str_to_float.h. +// TODO: Figure out where to put this. +template struct constants; +template <> struct constants { + static constexpr char DECIMAL_POINT = '.'; + static constexpr char DECIMAL_EXPONENT_MARKER = 'e'; + static constexpr char HEX_EXPONENT_MARKER = 'p'; + static constexpr char INF_STRING[] = "infinity"; + static constexpr char NAN_STRING[] = "nan"; +}; +template <> struct constants { + static constexpr wchar_t DECIMAL_POINT = L'.'; + static constexpr wchar_t DECIMAL_EXPONENT_MARKER = L'e'; + static constexpr wchar_t HEX_EXPONENT_MARKER = L'p'; + static constexpr wchar_t INF_STRING[] = L"infinity"; + static constexpr wchar_t NAN_STRING[] = L"nan"; +}; + // This is based on the HPD data structure described as part of the Simple // Decimal Conversion algorithm by Nigel Tao, described at this link: // https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html @@ -314,9 +333,9 @@ class HighPrecisionDecimal { public: // num_string is assumed to be a string of numeric characters. It doesn't // handle leading spaces. - LIBC_INLINE - HighPrecisionDecimal( - const char *__restrict num_string, + template + LIBC_INLINE HighPrecisionDecimal( + const CharType *__restrict num_string, const size_t num_len = cpp::numeric_limits::max()) { bool saw_dot = false; size_t num_cur = 0; @@ -324,25 +343,26 @@ class HighPrecisionDecimal { // them all. uint32_t total_digits = 0; while (num_cur < num_len && - (isdigit(num_string[num_cur]) || num_string[num_cur] == '.')) { - if (num_string[num_cur] == '.') { + (isdigit(num_string[num_cur]) || + num_string[num_cur] == constants::DECIMAL_POINT)) { + if (num_string[num_cur] == constants::DECIMAL_POINT) { if (saw_dot) { break; } this->decimal_point = static_cast(total_digits); saw_dot = true; } else { - if (num_string[num_cur] == '0' && this->num_digits == 0) { + int digit = b36_char_to_int(num_string[num_cur]); + if (digit == 0 && this->num_digits == 0) { --this->decimal_point; ++num_cur; continue; } ++total_digits; if (this->num_digits < MAX_NUM_DIGITS) { - this->digits[this->num_digits] = static_cast( - internal::b36_char_to_int(num_string[num_cur])); + this->digits[this->num_digits] = static_cast(digit); ++this->num_digits; - } else if (num_string[num_cur] != '0') { + } else if (digit != 0) { this->truncated = true; } } @@ -352,11 +372,10 @@ class HighPrecisionDecimal { if (!saw_dot) this->decimal_point = static_cast(total_digits); - if (num_cur < num_len && - (num_string[num_cur] == 'e' || num_string[num_cur] == 'E')) { + if (num_cur < num_len && tolower(num_string[num_cur]) == + constants::DECIMAL_EXPONENT_MARKER) { ++num_cur; - if (isdigit(num_string[num_cur]) || num_string[num_cur] == '+' || - num_string[num_cur] == '-') { + if (isdigit(num_string[num_cur]) || get_sign(num_string + num_cur) != 0) { auto result = strtointeger(num_string + num_cur, 10, num_len - num_cur); if (result.has_error()) { diff --git a/libc/src/__support/str_to_float.h b/libc/src/__support/str_to_float.h index 3d35d8a30afff..873a113780650 100644 --- a/libc/src/__support/str_to_float.h +++ b/libc/src/__support/str_to_float.h @@ -33,6 +33,7 @@ #include "src/__support/str_to_integer.h" #include "src/__support/str_to_num_result.h" #include "src/__support/uint128.h" +#include "src/__support/wctype_utils.h" namespace LIBC_NAMESPACE_DECL { namespace internal { @@ -334,9 +335,9 @@ constexpr int32_t NUM_POWERS_OF_TWO = // the Eisel-Lemire algorithm fails, it's slower but more accurate. It's based // on the Simple Decimal Conversion algorithm by Nigel Tao, described at this // link: https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html -template +template LIBC_INLINE FloatConvertReturn simple_decimal_conversion( - const char *__restrict numStart, + const CharType *__restrict numStart, const size_t num_len = cpp::numeric_limits::max(), RoundDirection round = RoundDirection::Nearest) { using FPBits = typename fputil::FPBits; @@ -676,12 +677,11 @@ template <> LIBC_INLINE constexpr int32_t get_lower_bound() { // Takes a mantissa and base 10 exponent and converts it into its closest // floating point type T equivalient. First we try the Eisel-Lemire algorithm, // then if that fails then we fall back to a more accurate algorithm for -// accuracy. The resulting mantissa and exponent are placed in outputMantissa -// and outputExp2. -template +// accuracy. +template LIBC_INLINE FloatConvertReturn decimal_exp_to_float( ExpandedFloat init_num, bool truncated, RoundDirection round, - const char *__restrict numStart, + const CharType *__restrict numStart, const size_t num_len = cpp::numeric_limits::max()) { using FPBits = typename fputil::FPBits; using StorageType = typename FPBits::StorageType; @@ -860,36 +860,42 @@ LIBC_INLINE FloatConvertReturn binary_exp_to_float(ExpandedFloat init_num, return output; } -// checks if the next 4 characters of the string pointer are the start of a +// Checks if the first characters of the string pointer are the start of a // hexadecimal floating point number. Does not advance the string pointer. -LIBC_INLINE bool is_float_hex_start(const char *__restrict src, - const char decimalPoint) { - if (!(src[0] == '0' && tolower(src[1]) == 'x')) { +template +LIBC_INLINE static bool is_float_hex_start(const CharType *__restrict src) { + if (!is_char_or_wchar(src[0], '0', L'0') || + !is_char_or_wchar(tolower(src[1]), 'x', L'x')) { return false; } size_t first_digit = 2; - if (src[2] == decimalPoint) { + if (src[2] == constants::DECIMAL_POINT) { ++first_digit; } return isalnum(src[first_digit]) && b36_char_to_int(src[first_digit]) < 16; } -// Takes the start of a string representing a decimal float, as well as the -// local decimalPoint. It returns if it suceeded in parsing any digits, and if -// the return value is true then the outputs are pointer to the end of the -// number, and the mantissa and exponent for the closest float T representation. -// If the return value is false, then it is assumed that there is no number -// here. -template -LIBC_INLINE StrToNumResult> -decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, - RoundDirection round) { +// Verifies that first prefix_len characters of str, when lowercased, match the +// specified prefix. +template +LIBC_INLINE static bool tolower_starts_with(const CharType *str, + size_t prefix_len, + const CharType *prefix) { + for (size_t i = 0; i < prefix_len; ++i) { + if (tolower(str[i]) != prefix[i]) + return false; + } + return true; +} + +// Attempts parsing a decimal floating point number at the start of the string. +template +LIBC_INLINE static StrToNumResult> +decimal_string_to_float(const CharType *__restrict src, RoundDirection round) { using FPBits = typename fputil::FPBits; using StorageType = typename FPBits::StorageType; constexpr uint32_t BASE = 10; - constexpr char EXPONENT_MARKER = 'e'; - bool truncated = false; bool seen_digit = false; bool after_decimal = false; @@ -926,7 +932,7 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, ++index; continue; } - if (src[index] == DECIMAL_POINT) { + if (src[index] == constants::DECIMAL_POINT) { if (after_decimal) { break; // this means that src[index] points to a second decimal point, // ending the number. @@ -943,13 +949,10 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, return output; // TODO: When adding max length argument, handle the case of a trailing - // EXPONENT MARKER, see scanf for more details. - if (tolower(src[index]) == EXPONENT_MARKER) { - bool has_sign = false; - if (src[index + 1] == '+' || src[index + 1] == '-') { - has_sign = true; - } - if (isdigit(src[index + 1 + static_cast(has_sign)])) { + // exponent marker, see scanf for more details. + if (tolower(src[index]) == constants::DECIMAL_EXPONENT_MARKER) { + int sign = get_sign(src + index + 1); + if (isdigit(src[index + 1 + static_cast(sign != 0)])) { ++index; auto result = strtointeger(src + index, 10); if (result.has_error()) @@ -985,22 +988,16 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT, return output; } -// Takes the start of a string representing a hexadecimal float, as well as the -// local decimal point. It returns if it suceeded in parsing any digits, and if -// the return value is true then the outputs are pointer to the end of the -// number, and the mantissa and exponent for the closest float T representation. -// If the return value is false, then it is assumed that there is no number -// here. -template -LIBC_INLINE StrToNumResult> -hexadecimal_string_to_float(const char *__restrict src, - const char DECIMAL_POINT, RoundDirection round) { +// Attempts parsing a hexadecimal floating point number at the start of the +// string. +template +LIBC_INLINE static StrToNumResult> +hexadecimal_string_to_float(const CharType *__restrict src, + RoundDirection round) { using FPBits = typename fputil::FPBits; using StorageType = typename FPBits::StorageType; constexpr uint32_t BASE = 16; - constexpr char EXPONENT_MARKER = 'p'; - bool truncated = false; bool seen_digit = false; bool after_decimal = false; @@ -1038,7 +1035,7 @@ hexadecimal_string_to_float(const char *__restrict src, ++index; continue; } - if (src[index] == DECIMAL_POINT) { + if (src[index] == constants::DECIMAL_POINT) { if (after_decimal) { break; // this means that src[index] points to a second decimal point, // ending the number. @@ -1057,12 +1054,9 @@ hexadecimal_string_to_float(const char *__restrict src, // Convert the exponent from having a base of 16 to having a base of 2. exponent *= 4; - if (tolower(src[index]) == EXPONENT_MARKER) { - bool has_sign = false; - if (src[index + 1] == '+' || src[index + 1] == '-') { - has_sign = true; - } - if (isdigit(src[index + 1 + static_cast(has_sign)])) { + if (tolower(src[index]) == constants::HEX_EXPONENT_MARKER) { + int sign = get_sign(src + index + 1); + if (isdigit(src[index + 1 + static_cast(sign != 0)])) { ++index; auto result = strtointeger(src + index, 10); if (result.has_error()) @@ -1098,21 +1092,21 @@ hexadecimal_string_to_float(const char *__restrict src, return output; } -template +template LIBC_INLINE typename fputil::FPBits::StorageType -nan_mantissa_from_ncharseq(const cpp::string_view ncharseq) { +nan_mantissa_from_ncharseq(const CharType *str, size_t len) { using FPBits = typename fputil::FPBits; using StorageType = typename FPBits::StorageType; StorageType nan_mantissa = 0; - if (ncharseq.data() != nullptr && isdigit(ncharseq[0])) { + if (len > 0 && isdigit(str[0])) { StrToNumResult strtoint_result = - strtointeger(ncharseq.data(), 0); + strtointeger(str, 0, len); if (!strtoint_result.has_error()) nan_mantissa = strtoint_result.value; - if (strtoint_result.parsed_len != static_cast(ncharseq.size())) + if (strtoint_result.parsed_len != static_cast(len)) nan_mantissa = 0; } @@ -1123,59 +1117,44 @@ nan_mantissa_from_ncharseq(const cpp::string_view ncharseq) { // is used as the backend for all of the string to float functions. // TODO: Add src_len member to match strtointeger. // TODO: Next, move from char* and length to string_view -template -LIBC_INLINE StrToNumResult strtofloatingpoint(const char *__restrict src) { +template +LIBC_INLINE StrToNumResult +strtofloatingpoint(const CharType *__restrict src) { using FPBits = typename fputil::FPBits; using StorageType = typename FPBits::StorageType; FPBits result = FPBits(); bool seen_digit = false; - char sign = '+'; - int error = 0; size_t index = first_non_whitespace(src); + int sign = get_sign(src + index); + bool is_positive = (sign >= 0); + index += (sign != 0); - if (src[index] == '+' || src[index] == '-') { - sign = src[index]; - ++index; - } - - if (sign == '-') { + if (sign < 0) { result.set_sign(Sign::NEG); } - static constexpr char DECIMAL_POINT = '.'; - static const char *inf_string = "infinity"; - static const char *nan_string = "nan"; - - if (isdigit(src[index]) || src[index] == DECIMAL_POINT) { // regular number + if (isdigit(src[index]) || + src[index] == constants::DECIMAL_POINT) { // regular number int base = 10; - if (is_float_hex_start(src + index, DECIMAL_POINT)) { + if (is_float_hex_start(src + index)) { base = 16; index += 2; seen_digit = true; } RoundDirection round_direction = RoundDirection::Nearest; - switch (fputil::quick_get_round()) { case FE_TONEAREST: round_direction = RoundDirection::Nearest; break; case FE_UPWARD: - if (sign == '+') { - round_direction = RoundDirection::Up; - } else { - round_direction = RoundDirection::Down; - } + round_direction = is_positive ? RoundDirection::Up : RoundDirection::Down; break; case FE_DOWNWARD: - if (sign == '+') { - round_direction = RoundDirection::Down; - } else { - round_direction = RoundDirection::Up; - } + round_direction = is_positive ? RoundDirection::Down : RoundDirection::Up; break; case FE_TOWARDZERO: round_direction = RoundDirection::Down; @@ -1184,58 +1163,53 @@ LIBC_INLINE StrToNumResult strtofloatingpoint(const char *__restrict src) { StrToNumResult> parse_result({0, 0}); if (base == 16) { - parse_result = hexadecimal_string_to_float(src + index, DECIMAL_POINT, - round_direction); + parse_result = + hexadecimal_string_to_float(src + index, round_direction); } else { // base is 10 - parse_result = decimal_string_to_float(src + index, DECIMAL_POINT, - round_direction); + parse_result = decimal_string_to_float(src + index, round_direction); } seen_digit = parse_result.parsed_len != 0; result.set_mantissa(parse_result.value.mantissa); result.set_biased_exponent(parse_result.value.exponent); index += parse_result.parsed_len; error = parse_result.error; - } else if (tolower(src[index]) == 'n') { // NaN - if (tolower(src[index + 1]) == nan_string[1] && - tolower(src[index + 2]) == nan_string[2]) { - seen_digit = true; - index += 3; - StorageType nan_mantissa = 0; - // this handles the case of `NaN(n-character-sequence)`, where the - // n-character-sequence is made of 0 or more letters, numbers, or - // underscore characters in any order. - if (src[index] == '(') { - size_t left_paren = index; + } else if (tolower_starts_with(src + index, 3, + constants::NAN_STRING)) { + // NAN + seen_digit = true; + index += 3; + StorageType nan_mantissa = 0; + // this handles the case of `NaN(n-character-sequence)`, where the + // n-character-sequence is made of 0 or more letters, numbers, or + // underscore characters in any order. + if (is_char_or_wchar(src[index], '(', L'(')) { + size_t left_paren = index; + ++index; + while (isalnum(src[index]) || is_char_or_wchar(src[index], '_', L'_')) ++index; - while (isalnum(src[index]) || src[index] == '_') - ++index; - if (src[index] == ')') { - ++index; - nan_mantissa = nan_mantissa_from_ncharseq( - cpp::string_view(src + (left_paren + 1), index - left_paren - 2)); - } else { - index = left_paren; - } - } - result = FPBits(result.quiet_nan(result.sign(), nan_mantissa)); - } - } else if (tolower(src[index]) == 'i') { // INF - if (tolower(src[index + 1]) == inf_string[1] && - tolower(src[index + 2]) == inf_string[2]) { - seen_digit = true; - result = FPBits(result.inf(result.sign())); - if (tolower(src[index + 3]) == inf_string[3] && - tolower(src[index + 4]) == inf_string[4] && - tolower(src[index + 5]) == inf_string[5] && - tolower(src[index + 6]) == inf_string[6] && - tolower(src[index + 7]) == inf_string[7]) { - // if the string is "INFINITY" then consume 8 characters. - index += 8; + if (is_char_or_wchar(src[index], ')', L')')) { + ++index; + nan_mantissa = nan_mantissa_from_ncharseq(src + (left_paren + 1), + index - left_paren - 2); } else { - index += 3; + index = left_paren; } } + result = FPBits(result.quiet_nan(result.sign(), nan_mantissa)); + } else if (tolower_starts_with(src + index, 8, + constants::INF_STRING)) { + // INFINITY + seen_digit = true; + result = FPBits(result.inf(result.sign())); + index += 8; + } else if (tolower_starts_with(src + index, 3, + constants::INF_STRING)) { + // INF + seen_digit = true; + result = FPBits(result.inf(result.sign())); + index += 3; } + if (!seen_digit) { // If there is nothing to actually parse, then return 0. return {T(0), 0, error}; } @@ -1262,7 +1236,7 @@ template LIBC_INLINE StrToNumResult strtonan(const char *arg) { ++index; if (arg[index] == '\0') - nan_mantissa = nan_mantissa_from_ncharseq(cpp::string_view(arg, index)); + nan_mantissa = nan_mantissa_from_ncharseq(arg, index); result = FPBits::quiet_nan(Sign::POS, nan_mantissa); return {result.get_val(), 0, error}; diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index ba27cd77f6bac..e3fac9fb80529 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -99,6 +99,17 @@ add_entrypoint_object( libc.src.__support.str_to_integer ) +add_entrypoint_object( + wcstof + SRCS + wcstof.cpp + HDRS + wcstof.h + DEPENDS + libc.src.__support.str_to_float + libc.src.errno.errno +) + add_entrypoint_object( wcstok SRCS diff --git a/libc/src/wchar/wcstof.cpp b/libc/src/wchar/wcstof.cpp new file mode 100644 index 0000000000000..162a60a4cb4e1 --- /dev/null +++ b/libc/src/wchar/wcstof.cpp @@ -0,0 +1,30 @@ +//===-- Implementation of wcstof ------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstof.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/str_to_float.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(float, wcstof, + (const wchar_t *__restrict str, + wchar_t **__restrict str_end)) { + auto result = internal::strtofloatingpoint(str); + if (result.has_error()) + libc_errno = result.error; + + if (str_end != nullptr) + *str_end = const_cast(str + result.parsed_len); + + return result.value; +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcstof.h b/libc/src/wchar/wcstof.h new file mode 100644 index 0000000000000..1aa7a30b8bff4 --- /dev/null +++ b/libc/src/wchar/wcstof.h @@ -0,0 +1,21 @@ +//===-- Implementation header for wcstof ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCSTOF_H +#define LLVM_LIBC_SRC_WCHAR_WCSTOF_H + +#include "hdr/types/wchar_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +float wcstof(const wchar_t *__restrict str, wchar_t **__restrict str_end); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCSTOF_H diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 8e1e8543a0cad..122cad2575327 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -528,3 +528,15 @@ add_libc_test( libc.src.wchar.wcstoull .wcstol_test_support ) + +add_libc_test( + wcstof_test + SUITE + libc_wchar_unittests + SRCS + wcstof_test.cpp + DEPENDS + libc.src.wchar.wcstof + libc.test.UnitTest.ErrnoCheckingTest + libc.test.UnitTest.LibcFPTestHelpers +) diff --git a/libc/test/src/wchar/wcstof_test.cpp b/libc/test/src/wchar/wcstof_test.cpp new file mode 100644 index 0000000000000..6c23f8c328a14 --- /dev/null +++ b/libc/test/src/wchar/wcstof_test.cpp @@ -0,0 +1,199 @@ +//===-- Unittests for wcstof ----------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcstof.h" + +#include "test/UnitTest/ErrnoCheckingTest.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/RoundingModeUtils.h" +#include "test/UnitTest/Test.h" + +using LIBC_NAMESPACE::fputil::testing::ForceRoundingModeTest; +using LIBC_NAMESPACE::fputil::testing::RoundingMode; + +class LlvmLibcWcstofTest : public LIBC_NAMESPACE::testing::ErrnoCheckingTest, + ForceRoundingModeTest { +public: + void run_test(const wchar_t *inputString, const ptrdiff_t expectedStrLen, + const uint32_t expectedRawData, const int expectedErrno = 0) { + // expectedRawData is the expected float result as a uint32_t, organized + // according to IEEE754: + // + // +-- 1 Sign Bit +-- 23 Mantissa bits + // | | + // | +----------+----------+ + // | | | + // SEEEEEEEEMMMMMMMMMMMMMMMMMMMMMMM + // | | + // +--+---+ + // | + // +-- 8 Exponent Bits + // + // This is so that the result can be compared in parts. + wchar_t *str_end = nullptr; + + LIBC_NAMESPACE::fputil::FPBits expected_fp = + LIBC_NAMESPACE::fputil::FPBits(expectedRawData); + + float result = LIBC_NAMESPACE::wcstof(inputString, &str_end); + + EXPECT_EQ(str_end - inputString, expectedStrLen); + EXPECT_FP_EQ(result, expected_fp.get_val()); + ASSERT_ERRNO_EQ(expectedErrno); + } +}; + +TEST_F(LlvmLibcWcstofTest, BasicDecimalTests) { + run_test(L"1", 1, 0x3f800000); + run_test(L"123", 3, 0x42f60000); + run_test(L"1234567890", 10, 0x4e932c06u); + run_test(L"123456789012345678901", 21, 0x60d629d4); + run_test(L"0.1", 3, 0x3dcccccdu); + run_test(L".1", 2, 0x3dcccccdu); + run_test(L"-0.123456789", 12, 0xbdfcd6eau); + run_test(L"0.11111111111111111111", 22, 0x3de38e39u); + run_test(L"0.0000000000000000000000001", 27, 0x15f79688u); +} + +TEST_F(LlvmLibcWcstofTest, DecimalOutOfRangeTests) { + run_test(L"555E36", 6, 0x7f800000, ERANGE); + run_test(L"1e-10000", 8, 0x0, ERANGE); +} + +TEST_F(LlvmLibcWcstofTest, DecimalsWithRoundingProblems) { + run_test(L"20040229", 8, 0x4b98e512); + run_test(L"20040401", 8, 0x4b98e568); + run_test(L"9E9", 3, 0x50061c46); +} + +TEST_F(LlvmLibcWcstofTest, DecimalSubnormals) { + run_test(L"1.4012984643248170709237295832899161312802619418765e-45", 55, 0x1, + ERANGE); +} + +TEST_F(LlvmLibcWcstofTest, DecimalWithLongExponent) { + run_test(L"1e2147483648", 12, 0x7f800000, ERANGE); + run_test(L"1e2147483646", 12, 0x7f800000, ERANGE); + run_test(L"100e2147483646", 14, 0x7f800000, ERANGE); + run_test(L"1e-2147483647", 13, 0x0, ERANGE); + run_test(L"1e-2147483649", 13, 0x0, ERANGE); +} + +TEST_F(LlvmLibcWcstofTest, BasicHexadecimalTests) { + run_test(L"0x1", 3, 0x3f800000); + run_test(L"0x10", 4, 0x41800000); + run_test(L"0x11", 4, 0x41880000); + run_test(L"0x0.1234", 8, 0x3d91a000); +} + +TEST_F(LlvmLibcWcstofTest, HexadecimalSubnormalTests) { + run_test(L"0x0.0000000000000000000000000000000002", 38, 0x4000, ERANGE); + + // This is the largest subnormal number as represented in hex + run_test(L"0x0.00000000000000000000000000000003fffff8", 42, 0x7fffff, ERANGE); +} + +TEST_F(LlvmLibcWcstofTest, HexadecimalSubnormalRoundingTests) { + // This is the largest subnormal number that gets rounded down to 0 (as a + // float) + run_test(L"0x0.00000000000000000000000000000000000004", 42, 0x0, ERANGE); + + // This is slightly larger, and thus rounded up + run_test(L"0x0.000000000000000000000000000000000000041", 43, 0x00000001, + ERANGE); + + // These check that we're rounding to even properly + run_test(L"0x0.0000000000000000000000000000000000000b", 42, 0x00000001, + ERANGE); + run_test(L"0x0.0000000000000000000000000000000000000c", 42, 0x00000002, + ERANGE); + + // These check that we're rounding to even properly even when the input bits + // are longer than the bit fields can contain. + run_test(L"0x1.000000000000000000000p-150", 30, 0x00000000, ERANGE); + run_test(L"0x1.000010000000000001000p-150", 30, 0x00000001, ERANGE); + run_test(L"0x1.000100000000000001000p-134", 30, 0x00008001, ERANGE); + run_test(L"0x1.FFFFFC000000000001000p-127", 30, 0x007FFFFF, ERANGE); + run_test(L"0x1.FFFFFE000000000000000p-127", 30, 0x00800000); +} + +TEST_F(LlvmLibcWcstofTest, HexadecimalNormalRoundingTests) { + // This also checks the round to even behavior by checking three adjacent + // numbers. + // This gets rounded down to even + run_test(L"0x123456500", 11, 0x4f91a2b2); + // This doesn't get rounded at all + run_test(L"0x123456600", 11, 0x4f91a2b3); + // This gets rounded up to even + run_test(L"0x123456700", 11, 0x4f91a2b4); + // Correct rounding for long input + run_test(L"0x1.000001000000000000000", 25, 0x3f800000); + run_test(L"0x1.000001000000000000100", 25, 0x3f800001); +} + +TEST_F(LlvmLibcWcstofTest, HexadecimalsWithRoundingProblems) { + run_test(L"0xFFFFFFFF", 10, 0x4f800000); +} + +TEST_F(LlvmLibcWcstofTest, HexadecimalOutOfRangeTests) { + run_test(L"0x123456789123456789123456789123456789", 38, 0x7f800000, ERANGE); + run_test(L"-0x123456789123456789123456789123456789", 39, 0xff800000, ERANGE); + run_test(L"0x0.00000000000000000000000000000000000001", 42, 0x0, ERANGE); +} + +TEST_F(LlvmLibcWcstofTest, InfTests) { + run_test(L"INF", 3, 0x7f800000); + run_test(L"INFinity", 8, 0x7f800000); + run_test(L"infnity", 3, 0x7f800000); + run_test(L"infinit", 3, 0x7f800000); + run_test(L"infinfinit", 3, 0x7f800000); + run_test(L"innf", 0, 0x0); + run_test(L"-inf", 4, 0xff800000); + run_test(L"-iNfInItY", 9, 0xff800000); +} + +TEST_F(LlvmLibcWcstofTest, SimpleNaNTests) { + run_test(L"NaN", 3, 0x7fc00000); + run_test(L"-nAn", 4, 0xffc00000); +} + +// These NaNs are of the form `NaN(n-character-sequence)` where the +// n-character-sequence is 0 or more letters or numbers. If there is anything +// other than a letter or a number, then the valid number is just `NaN`. If +// the sequence is valid, then the interpretation of them is implementation +// defined, in this case it's passed to strtoll with an automatic base, and +// the result is put into the mantissa if it takes up the whole width of the +// parentheses. +TEST_F(LlvmLibcWcstofTest, NaNWithParenthesesEmptyTest) { + run_test(L"NaN()", 5, 0x7fc00000); +} + +TEST_F(LlvmLibcWcstofTest, NaNWithParenthesesValidNumberTests) { + run_test(L"NaN(1234)", 9, 0x7fc004d2); + run_test(L"NaN(0x1234)", 11, 0x7fc01234); + run_test(L"NaN(01234)", 10, 0x7fc0029c); +} + +TEST_F(LlvmLibcWcstofTest, NaNWithParenthesesInvalidSequenceTests) { + run_test(L"NaN( 1234)", 3, 0x7fc00000); + run_test(L"NaN(-1234)", 3, 0x7fc00000); + run_test(L"NaN(asd&f)", 3, 0x7fc00000); + run_test(L"NaN(123 )", 3, 0x7fc00000); + run_test(L"NaN(123+asdf)", 3, 0x7fc00000); + run_test(L"NaN(123", 3, 0x7fc00000); +} + +TEST_F(LlvmLibcWcstofTest, NaNWithParenthesesValidSequenceInvalidNumberTests) { + run_test(L"NaN(1a)", 7, 0x7fc00000); + run_test(L"NaN(asdf)", 9, 0x7fc00000); + run_test(L"NaN(1A1)", 8, 0x7fc00000); + run_test(L"NaN(underscores_are_ok)", 23, 0x7fc00000); + run_test( + L"NaN(1234567890qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM_)", + 68, 0x7fc00000); +} diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index ecd11b91d0d86..788c6570081a2 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1025,6 +1025,7 @@ libc_support_library( ":__support_str_to_integer", ":__support_str_to_num_result", ":__support_uint128", + ":__support_wctype_utils", ":hdr_errno_macros", ], )