Expand Up
@@ -9,6 +9,7 @@
#ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H
#define LLVM_LIBC_SRC___SUPPORT_STR_TO_FLOAT_H
#include " src/__support/CPP/bit.h"
#include " src/__support/CPP/limits.h"
#include " src/__support/CPP/optional.h"
#include " src/__support/FPUtil/FEnvImpl.h"
Expand Down
Expand Up
@@ -37,45 +38,6 @@ template <class T> struct FloatConvertReturn {
int error = 0 ;
};
template <class T > LIBC_INLINE uint32_t leading_zeroes (T inputNumber) {
constexpr uint32_t BITS_IN_T = sizeof (T) * 8 ;
if (inputNumber == 0 ) {
return BITS_IN_T;
}
uint32_t cur_guess = BITS_IN_T / 2 ;
uint32_t range_size = BITS_IN_T / 2 ;
// while either shifting by curGuess does not get rid of all of the bits or
// shifting by one less also gets rid of all of the bits then we have not
// found the first bit.
while (((inputNumber >> cur_guess) > 0 ) ||
((inputNumber >> (cur_guess - 1 )) == 0 )) {
// Binary search for the first set bit
range_size /= 2 ;
if (range_size == 0 ) {
break ;
}
if ((inputNumber >> cur_guess) > 0 ) {
cur_guess += range_size;
} else {
cur_guess -= range_size;
}
}
if (inputNumber >> cur_guess > 0 ) {
cur_guess++;
}
return BITS_IN_T - cur_guess;
}
template <>
LIBC_INLINE uint32_t leading_zeroes<uint32_t >(uint32_t inputNumber) {
return cpp::countl_zero (inputNumber);
}
template <>
LIBC_INLINE uint32_t leading_zeroes<uint64_t >(uint64_t inputNumber) {
return cpp::countl_zero (inputNumber);
}
LIBC_INLINE uint64_t low64 (const UInt128 &num) {
return static_cast <uint64_t >(num & 0xffffffffffffffff );
}
Expand Down
Expand Up
@@ -108,10 +70,11 @@ template <class T>
LIBC_INLINE cpp::optional<ExpandedFloat<T>>
eisel_lemire (ExpandedFloat<T> init_num,
RoundDirection round = RoundDirection::Nearest) {
using FPBits = typename fputil::FPBits<T>;
using FloatProp = typename FPBits::FloatProp;
using UIntType = typename FPBits::UIntType;
using BitsType = typename fputil::FPBits<T>::UIntType;
BitsType mantissa = init_num.mantissa ;
UIntType mantissa = init_num.mantissa ;
int32_t exp10 = init_num.exponent ;
constexpr uint32_t BITS_IN_MANTISSA = sizeof (mantissa) * 8 ;
Expand All
@@ -128,12 +91,11 @@ eisel_lemire(ExpandedFloat<T> init_num,
}
// Normalization
uint32_t clz = leading_zeroes<BitsType >(mantissa);
uint32_t clz = cpp::countl_zero<UIntType >(mantissa);
mantissa <<= clz;
uint32_t exp2 = static_cast <uint32_t >(exp10_to_exp2 (exp10)) +
BITS_IN_MANTISSA + fputil::FloatProperties<T>::EXPONENT_BIAS -
clz;
BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
// Multiplication
const uint64_t *power_of_ten =
Expand All
@@ -150,9 +112,7 @@ eisel_lemire(ExpandedFloat<T> init_num,
// accuracy, and the most significant bit is ignored.) = 9 bits. Similarly,
// it's 6 bits for floats in this case.
const uint64_t halfway_constant =
(uint64_t (1 ) << (BITS_IN_MANTISSA -
fputil::FloatProperties<T>::MANTISSA_WIDTH - 3 )) -
1 ;
(uint64_t (1 ) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ))) - 1 ;
if ((high64 (first_approx) & halfway_constant) == halfway_constant &&
low64 (first_approx) + mantissa < mantissa) {
UInt128 low_bits =
Expand All
@@ -171,12 +131,11 @@ eisel_lemire(ExpandedFloat<T> init_num,
}
// Shifting to 54 bits for doubles and 25 bits for floats
BitsType msb =
static_cast <BitsType>(high64 (final_approx) >> (BITS_IN_MANTISSA - 1 ));
BitsType final_mantissa =
static_cast <BitsType>(high64 (final_approx) >>
(msb + BITS_IN_MANTISSA -
(fputil::FloatProperties<T>::MANTISSA_WIDTH + 3 )));
UIntType msb =
static_cast <UIntType>(high64 (final_approx) >> (BITS_IN_MANTISSA - 1 ));
UIntType final_mantissa = static_cast <UIntType>(
high64 (final_approx) >>
(msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 )));
exp2 -= static_cast <uint32_t >(1 ^ msb); // same as !msb
if (round == RoundDirection::Nearest) {
Expand All
@@ -202,15 +161,14 @@ eisel_lemire(ExpandedFloat<T> init_num,
// From 54 to 53 bits for doubles and 25 to 24 bits for floats
final_mantissa >>= 1 ;
if ((final_mantissa >> (fputil::FloatProperties<T>::MANTISSA_WIDTH + 1 )) >
0 ) {
if ((final_mantissa >> (FloatProp::MANTISSA_WIDTH + 1 )) > 0 ) {
final_mantissa >>= 1 ;
++exp2 ;
}
// The if block is equivalent to (but has fewer branches than):
// if exp2 <= 0 || exp2 >= 0x7FF { etc }
if (exp2 - 1 >= (1 << fputil::FloatProperties<T> ::EXPONENT_WIDTH) - 2 ) {
if (exp2 - 1 >= (1 << FloatProp ::EXPONENT_WIDTH) - 2 ) {
return cpp::nullopt;
}
Expand All
@@ -225,9 +183,11 @@ template <>
LIBC_INLINE cpp::optional<ExpandedFloat<long double >>
eisel_lemire<long double >(ExpandedFloat<long double > init_num,
RoundDirection round) {
using BitsType = typename fputil::FPBits<long double >::UIntType;
using FPBits = typename fputil::FPBits<long double >;
using FloatProp = typename FPBits::FloatProp;
using UIntType = typename FPBits::UIntType;
BitsType mantissa = init_num.mantissa ;
UIntType mantissa = init_num.mantissa ;
int32_t exp10 = init_num.exponent ;
constexpr uint32_t BITS_IN_MANTISSA = sizeof (mantissa) * 8 ;
Expand All
@@ -248,12 +208,11 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
}
// Normalization
uint32_t clz = leading_zeroes<BitsType >(mantissa);
uint32_t clz = cpp::countl_zero<UIntType >(mantissa);
mantissa <<= clz;
uint32_t exp2 = static_cast <uint32_t >(exp10_to_exp2 (exp10)) +
BITS_IN_MANTISSA +
fputil::FloatProperties<long double >::EXPONENT_BIAS - clz;
BITS_IN_MANTISSA + FloatProp::EXPONENT_BIAS - clz;
// Multiplication
const uint64_t *power_of_ten =
Expand Down
Expand Up
@@ -290,10 +249,7 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
// accuracy, and the most significant bit is ignored.) = 61 bits. Similarly,
// it's 12 bits for 128 bit floats in this case.
constexpr UInt128 HALFWAY_CONSTANT =
(UInt128 (1 ) << (BITS_IN_MANTISSA -
fputil::FloatProperties<long double >::MANTISSA_WIDTH -
3 )) -
1 ;
(UInt128 (1 ) << (BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ))) - 1 ;
if ((final_approx_upper & HALFWAY_CONSTANT) == HALFWAY_CONSTANT &&
final_approx_lower + mantissa < mantissa) {
Expand All
@@ -303,10 +259,9 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
// Shifting to 65 bits for 80 bit floats and 113 bits for 128 bit floats
uint32_t msb =
static_cast <uint32_t >(final_approx_upper >> (BITS_IN_MANTISSA - 1 ));
BitsType final_mantissa =
UIntType final_mantissa =
final_approx_upper >>
(msb + BITS_IN_MANTISSA -
(fputil::FloatProperties<long double >::MANTISSA_WIDTH + 3 ));
(msb + BITS_IN_MANTISSA - (FloatProp::MANTISSA_WIDTH + 3 ));
exp2 -= static_cast <uint32_t >(1 ^ msb); // same as !msb
if (round == RoundDirection::Nearest) {
Expand All
@@ -331,16 +286,14 @@ eisel_lemire<long double>(ExpandedFloat<long double> init_num,
// From 65 to 64 bits for 80 bit floats and 113 to 112 bits for 128 bit
// floats
final_mantissa >>= 1 ;
if ((final_mantissa >>
(fputil::FloatProperties<long double >::MANTISSA_WIDTH + 1 )) > 0 ) {
if ((final_mantissa >> (FloatProp::MANTISSA_WIDTH + 1 )) > 0 ) {
final_mantissa >>= 1 ;
++exp2 ;
}
// The if block is equivalent to (but has fewer branches than):
// if exp2 <= 0 || exp2 >= MANTISSA_MAX { etc }
if (exp2 - 1 >=
(1 << fputil::FloatProperties<long double >::EXPONENT_WIDTH) - 2 ) {
if (exp2 - 1 >= (1 << FloatProp::EXPONENT_WIDTH) - 2 ) {
return cpp::nullopt;
}
Expand Down
Expand Up
@@ -368,6 +321,9 @@ template <class T>
LIBC_INLINE FloatConvertReturn<T>
simple_decimal_conversion (const char *__restrict numStart,
RoundDirection round = RoundDirection::Nearest) {
using FPBits = typename fputil::FPBits<T>;
using FloatProp = typename FPBits::FloatProp;
using UIntType = typename FPBits::UIntType;
int32_t exp2 = 0 ;
HighPrecisionDecimal hpd = HighPrecisionDecimal (numStart);
Expand All
@@ -383,16 +339,16 @@ simple_decimal_conversion(const char *__restrict numStart,
// float, return inf.
if (hpd.get_decimal_point () > 0 &&
exp10_to_exp2 (hpd.get_decimal_point () - 1 ) >
static_cast <int64_t >(fputil::FloatProperties<T> ::EXPONENT_BIAS)) {
output.num = {0 , fputil:: FPBits<T> ::MAX_EXPONENT};
static_cast <int64_t >(FloatProp ::EXPONENT_BIAS)) {
output.num = {0 , FPBits::MAX_EXPONENT};
output.error = ERANGE;
return output;
}
// If the exponent is too small even for a subnormal, return 0.
if (hpd.get_decimal_point () < 0 &&
exp10_to_exp2 (-hpd.get_decimal_point ()) >
static_cast <int64_t >(fputil::FloatProperties<T> ::EXPONENT_BIAS +
fputil::FloatProperties<T> ::MANTISSA_WIDTH)) {
static_cast <int64_t >(FloatProp ::EXPONENT_BIAS +
FloatProp ::MANTISSA_WIDTH)) {
output.num = {0 , 0 };
output.error = ERANGE;
return output;
Expand Down
Expand Up
@@ -431,19 +387,18 @@ simple_decimal_conversion(const char *__restrict numStart,
hpd.shift (1 );
// Get the biased exponent
exp2 += fputil::FloatProperties<T> ::EXPONENT_BIAS;
exp2 += FloatProp ::EXPONENT_BIAS;
// Handle the exponent being too large (and return inf).
if (exp2 >= fputil:: FPBits<T> ::MAX_EXPONENT) {
output.num = {0 , fputil:: FPBits<T> ::MAX_EXPONENT};
if (exp2 >= FPBits::MAX_EXPONENT) {
output.num = {0 , FPBits::MAX_EXPONENT};
output.error = ERANGE;
return output;
}
// Shift left to fill the mantissa
hpd.shift (fputil::FloatProperties<T>::MANTISSA_WIDTH);
typename fputil::FPBits<T>::UIntType final_mantissa =
hpd.round_to_integer_type <typename fputil::FPBits<T>::UIntType>();
hpd.shift (FloatProp::MANTISSA_WIDTH);
UIntType final_mantissa = hpd.round_to_integer_type <UIntType>();
// Handle subnormals
if (exp2 <= 0 ) {
Expand All
@@ -455,25 +410,23 @@ simple_decimal_conversion(const char *__restrict numStart,
// Shift right one more time to compensate for the left shift to get it
// between 1 and 2.
hpd.shift (-1 );
final_mantissa =
hpd.round_to_integer_type <typename fputil::FPBits<T>::UIntType>(round );
final_mantissa = hpd.round_to_integer_type <UIntType>(round );
// Check if by shifting right we've caused this to round to a normal number.
if ((final_mantissa >> fputil::FloatProperties<T> ::MANTISSA_WIDTH) != 0 ) {
if ((final_mantissa >> FloatProp ::MANTISSA_WIDTH) != 0 ) {
++exp2 ;
}
}
// Check if rounding added a bit, and shift down if that's the case.
if (final_mantissa == typename fputil::FPBits<T>::UIntType (2 )
<< fputil::FloatProperties<T>::MANTISSA_WIDTH) {
if (final_mantissa == UIntType (2 ) << FloatProp::MANTISSA_WIDTH) {
final_mantissa >>= 1 ;
++exp2 ;
// Check if this rounding causes exp2 to go out of range and make the result
// INF. If this is the case, then finalMantissa and exp2 are already the
// correct values for an INF result.
if (exp2 >= fputil:: FPBits<T> ::MAX_EXPONENT) {
if (exp2 >= FPBits::MAX_EXPONENT) {
output.error = ERANGE;
}
}
Expand Down
Expand Up
@@ -563,18 +516,20 @@ template <class T>
LIBC_INLINE cpp::optional<ExpandedFloat<T>>
clinger_fast_path (ExpandedFloat<T> init_num,
RoundDirection round = RoundDirection::Nearest) {
using FPBits = typename fputil::FPBits<T>;
using FloatProp = typename FPBits::FloatProp;
using UIntType = typename FPBits::UIntType;
typename fputil::FPBits<T>:: UIntType mantissa = init_num.mantissa ;
UIntType mantissa = init_num.mantissa ;
int32_t exp10 = init_num.exponent ;
if (mantissa >> fputil::FloatProperties<T>:: MANTISSA_WIDTH > 0 ) {
if (( mantissa >> FloatProp:: MANTISSA_WIDTH) > 0 ) {
return cpp::nullopt;
}
fputil:: FPBits<T> result;
FPBits result;
T float_mantissa;
if constexpr (cpp::is_same_v<typename fputil::FPBits<T>::UIntType,
cpp::UInt<128 >>) {
if constexpr (cpp::is_same_v<UIntType, cpp::UInt<128 >>) {
float_mantissa = static_cast <T>(fputil::DyadicFloat<128 >(
false , 0 ,
fputil::DyadicFloat<128 >::MantissaType (
Expand All
@@ -584,7 +539,7 @@ clinger_fast_path(ExpandedFloat<T> init_num,
}
if (exp10 == 0 ) {
result = fputil:: FPBits<T> (float_mantissa);
result = FPBits (float_mantissa);
}
if (exp10 > 0 ) {
if (exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN +
Expand All
@@ -600,30 +555,30 @@ clinger_fast_path(ExpandedFloat<T> init_num,
if (float_mantissa > ClingerConsts<T>::MAX_EXACT_INT) {
return cpp::nullopt;
}
result = fputil::FPBits<T>(float_mantissa *
ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]);
result =
FPBits (float_mantissa * ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]);
} else if (exp10 < 0 ) {
if (-exp10 > ClingerConsts<T>::EXACT_POWERS_OF_TEN) {
return cpp::nullopt;
}
result = fputil::FPBits<T>(float_mantissa /
ClingerConsts<T>::POWERS_OF_TEN_ARRAY[-exp10]);
result =
FPBits (float_mantissa / ClingerConsts<T>::POWERS_OF_TEN_ARRAY[-exp10]);
}
// If the rounding mode is not nearest, then the sign of the number may affect
// the result. To make sure the rounding mode is respected properly, the
// calculation is redone with a negative result, and the rounding mode is used
// to select the correct result.
if (round != RoundDirection::Nearest) {
fputil:: FPBits<T> negative_result;
FPBits negative_result;
// I'm 99% sure this will break under fast math optimizations.
negative_result = fputil:: FPBits<T>(
(-float_mantissa) * ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]);
negative_result = FPBits ((-float_mantissa) *
ClingerConsts<T>::POWERS_OF_TEN_ARRAY[exp10]);
// If the results are equal, then we don't need to use the rounding mode.
if (T (result) != -T (negative_result)) {
fputil:: FPBits<T> lower_result;
fputil:: FPBits<T> higher_result;
FPBits lower_result;
FPBits higher_result;
if (T (result) < -T (negative_result)) {
lower_result = result;
Expand Down
Expand Up
@@ -691,8 +646,10 @@ template <class T>
LIBC_INLINE FloatConvertReturn<T>
decimal_exp_to_float (ExpandedFloat<T> init_num, const char *__restrict numStart,
bool truncated, RoundDirection round) {
using FPBits = typename fputil::FPBits<T>;
using UIntType = typename FPBits::UIntType;
typename fputil::FPBits<T>:: UIntType mantissa = init_num.mantissa ;
UIntType mantissa = init_num.mantissa ;
int32_t exp10 = init_num.exponent ;
FloatConvertReturn<T> output;
Expand All
@@ -702,7 +659,7 @@ decimal_exp_to_float(ExpandedFloat<T> init_num, const char *__restrict numStart,
// float, return inf. These bounds are relatively loose, but are mostly
// serving as a first pass. Some close numbers getting through is okay.
if (exp10 > get_upper_bound<T>()) {
output.num = {0 , fputil:: FPBits<T> ::MAX_EXPONENT};
output.num = {0 , FPBits::MAX_EXPONENT};
output.error = ERANGE;
return output;
}
Expand Down
Expand Up
@@ -766,40 +723,39 @@ template <class T>
LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float (ExpandedFloat<T> init_num,
bool truncated,
RoundDirection round) {
using BitsType = typename fputil::FPBits<T>::UIntType;
using FPBits = typename fputil::FPBits<T>;
using FloatProp = typename FPBits::FloatProp;
using UIntType = typename FPBits::UIntType;
BitsType mantissa = init_num.mantissa ;
UIntType mantissa = init_num.mantissa ;
int32_t exp2 = init_num.exponent ;
FloatConvertReturn<T> output;
// This is the number of leading zeroes a properly normalized float of type T
// should have.
constexpr int32_t NUMBITS = sizeof (BitsType) * 8 ;
constexpr int32_t INF_EXP =
(1 << fputil::FloatProperties<T>::EXPONENT_WIDTH) - 1 ;
constexpr int32_t NUMBITS = sizeof (UIntType) * 8 ;
constexpr int32_t INF_EXP = (1 << FloatProp::EXPONENT_WIDTH) - 1 ;
// Normalization step 1: Bring the leading bit to the highest bit of BitsType .
uint32_t amount_to_shift_left = leading_zeroes<BitsType >(mantissa);
// Normalization step 1: Bring the leading bit to the highest bit of UIntType .
uint32_t amount_to_shift_left = cpp::countl_zero<UIntType >(mantissa);
mantissa <<= amount_to_shift_left;
// Keep exp2 representing the exponent of the lowest bit of BitsType .
// Keep exp2 representing the exponent of the lowest bit of UIntType .
exp2 -= amount_to_shift_left;
// biasedExponent represents the biased exponent of the most significant bit.
int32_t biased_exponent =
exp2 + NUMBITS + fputil::FPBits<T>::EXPONENT_BIAS - 1 ;
int32_t biased_exponent = exp2 + NUMBITS + FPBits::EXPONENT_BIAS - 1 ;
// Handle numbers that're too large and get squashed to inf
if (biased_exponent >= INF_EXP) {
// This indicates an overflow, so we make the result INF and set errno.
output.num = {0 , (1 << fputil::FloatProperties<T> ::EXPONENT_WIDTH) - 1 };
output.num = {0 , (1 << FloatProp ::EXPONENT_WIDTH) - 1 };
output.error = ERANGE;
return output;
}
uint32_t amount_to_shift_right =
NUMBITS - fputil::FloatProperties<T>::MANTISSA_WIDTH - 1 ;
uint32_t amount_to_shift_right = NUMBITS - FloatProp::MANTISSA_WIDTH - 1 ;
// Handle subnormals.
if (biased_exponent <= 0 ) {
Expand All
@@ -814,19 +770,19 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
}
}
BitsType round_bit_mask = BitsType (1 ) << (amount_to_shift_right - 1 );
BitsType sticky_mask = round_bit_mask - 1 ;
UIntType round_bit_mask = UIntType (1 ) << (amount_to_shift_right - 1 );
UIntType sticky_mask = round_bit_mask - 1 ;
bool round_bit = static_cast <bool >(mantissa & round_bit_mask);
bool sticky_bit = static_cast <bool >(mantissa & sticky_mask) || truncated;
if (amount_to_shift_right < NUMBITS) {
// Shift the mantissa and clear the implicit bit.
mantissa >>= amount_to_shift_right;
mantissa &= fputil::FloatProperties<T> ::MANTISSA_MASK;
mantissa &= FloatProp ::MANTISSA_MASK;
} else {
mantissa = 0 ;
}
bool least_significant_bit = static_cast <bool >(mantissa & BitsType (1 ));
bool least_significant_bit = static_cast <bool >(mantissa & UIntType (1 ));
// TODO: check that this rounding behavior is correct.
Expand All
@@ -845,7 +801,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
}
}
if (mantissa > fputil::FloatProperties<T> ::MANTISSA_MASK) {
if (mantissa > FloatProp ::MANTISSA_MASK) {
// Rounding causes the exponent to increase.
++biased_exponent;
Expand All
@@ -858,8 +814,7 @@ LIBC_INLINE FloatConvertReturn<T> binary_exp_to_float(ExpandedFloat<T> init_num,
output.error = ERANGE;
}
output.num = {mantissa & fputil::FloatProperties<T>::MANTISSA_MASK,
biased_exponent};
output.num = {mantissa & FloatProp::MANTISSA_MASK, biased_exponent};
return output;
}
Expand Down
Expand Up
@@ -887,14 +842,16 @@ template <class T>
LIBC_INLINE StrToNumResult<ExpandedFloat<T>>
decimal_string_to_float (const char *__restrict src, const char DECIMAL_POINT,
RoundDirection round) {
using BitsType = typename fputil::FPBits<T>::UIntType;
using FPBits = typename fputil::FPBits<T>;
using UIntType = typename FPBits::UIntType;
constexpr uint32_t BASE = 10 ;
constexpr char EXPONENT_MARKER = ' e' ;
bool truncated = false ;
bool seen_digit = false ;
bool after_decimal = false ;
BitsType mantissa = 0 ;
UIntType mantissa = 0 ;
int32_t exponent = 0 ;
size_t index = 0 ;
Expand All
@@ -905,8 +862,8 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
// the format mantissa * (base ^ exponent)
// The loop fills the mantissa with as many digits as it can hold
const BitsType bitstype_max_div_by_base =
cpp::numeric_limits<BitsType >::max () / BASE;
const UIntType bitstype_max_div_by_base =
cpp::numeric_limits<UIntType >::max () / BASE;
while (true ) {
if (isdigit (src[index ])) {
uint32_t digit = src[index ] - ' 0' ;
Expand Down
Expand Up
@@ -962,10 +919,10 @@ decimal_string_to_float(const char *__restrict src, const char DECIMAL_POINT,
// If the result is in the valid range, then we use it. The valid range is
// also within the int32 range, so this prevents overflow issues.
if (temp_exponent > fputil:: FPBits<T> ::MAX_EXPONENT) {
exponent = fputil:: FPBits<T> ::MAX_EXPONENT;
} else if (temp_exponent < -fputil:: FPBits<T> ::MAX_EXPONENT) {
exponent = -fputil:: FPBits<T> ::MAX_EXPONENT;
if (temp_exponent > FPBits::MAX_EXPONENT) {
exponent = FPBits::MAX_EXPONENT;
} else if (temp_exponent < -FPBits::MAX_EXPONENT) {
exponent = -FPBits::MAX_EXPONENT;
} else {
exponent = static_cast <int32_t >(temp_exponent);
}
Expand Down
Expand Up
@@ -994,14 +951,16 @@ template <class T>
LIBC_INLINE StrToNumResult<ExpandedFloat<T>>
hexadecimal_string_to_float (const char *__restrict src,
const char DECIMAL_POINT, RoundDirection round) {
using BitsType = typename fputil::FPBits<T>::UIntType;
using FPBits = typename fputil::FPBits<T>;
using UIntType = typename FPBits::UIntType;
constexpr uint32_t BASE = 16 ;
constexpr char EXPONENT_MARKER = ' p' ;
bool truncated = false ;
bool seen_digit = false ;
bool after_decimal = false ;
BitsType mantissa = 0 ;
UIntType mantissa = 0 ;
int32_t exponent = 0 ;
size_t index = 0 ;
Expand All
@@ -1012,8 +971,8 @@ hexadecimal_string_to_float(const char *__restrict src,
// the format mantissa * (base ^ exponent)
// The loop fills the mantissa with as many digits as it can hold
const BitsType bitstype_max_div_by_base =
cpp::numeric_limits<BitsType >::max () / BASE;
const UIntType bitstype_max_div_by_base =
cpp::numeric_limits<UIntType >::max () / BASE;
while (true ) {
if (isalnum (src[index ])) {
uint32_t digit = b36_char_to_int (src[index ]);
Expand Down
Expand Up
@@ -1074,10 +1033,10 @@ hexadecimal_string_to_float(const char *__restrict src,
// If the result is in the valid range, then we use it. The valid range is
// also within the int32 range, so this prevents overflow issues.
if (temp_exponent > fputil:: FPBits<T> ::MAX_EXPONENT) {
exponent = fputil:: FPBits<T> ::MAX_EXPONENT;
} else if (temp_exponent < -fputil:: FPBits<T> ::MAX_EXPONENT) {
exponent = -fputil:: FPBits<T> ::MAX_EXPONENT;
if (temp_exponent > FPBits::MAX_EXPONENT) {
exponent = FPBits::MAX_EXPONENT;
} else if (temp_exponent < -FPBits::MAX_EXPONENT) {
exponent = -FPBits::MAX_EXPONENT;
} else {
exponent = static_cast <int32_t >(temp_exponent);
}
Expand All
@@ -1099,8 +1058,10 @@ hexadecimal_string_to_float(const char *__restrict src,
// is used as the backend for all of the string to float functions.
template <class T >
LIBC_INLINE StrToNumResult<T> strtofloatingpoint (const char *__restrict src) {
using BitsType = typename fputil::FPBits<T>::UIntType;
fputil::FPBits<T> result = fputil::FPBits<T>();
using FPBits = typename fputil::FPBits<T>;
using UIntType = typename FPBits::UIntType;
FPBits result = FPBits ();
bool seen_digit = false ;
char sign = ' +' ;
Expand Down
Expand Up
@@ -1172,7 +1133,7 @@ LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) {
tolower (src[index + 2 ]) == nan_string[2 ]) {
seen_digit = true ;
index += 3 ;
BitsType nan_mantissa = 0 ;
UIntType nan_mantissa = 0 ;
// this handles the case of `NaN(n-character-sequence)`, where the
// n-character-sequence is made of 0 or more letters and numbers in any
// order.
Expand All
@@ -1186,7 +1147,7 @@ LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) {
if (src[index ] == ' )' ) {
++index ;
if (isdigit (src[left_paren + 1 ])) {
// This is to prevent errors when BitsType is larger than 64 bits,
// This is to prevent errors when UIntType is larger than 64 bits,
// since strtointeger only supports up to 64 bits. This is actually
// more than is required by the specification, which says for the
// input type "NAN(n-char-sequence)" that "the meaning of
Expand All
@@ -1197,7 +1158,7 @@ LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) {
if (strtoint_result.has_error ()) {
error = strtoint_result.error ;
}
nan_mantissa = static_cast <BitsType >(strtoint_result.value );
nan_mantissa = static_cast <UIntType >(strtoint_result.value );
if (src[left_paren + 1 + strtoint_result.parsed_len ] != ' )' )
nan_mantissa = 0 ;
}
Expand All
@@ -1207,21 +1168,21 @@ LIBC_INLINE StrToNumResult<T> strtofloatingpoint(const char *__restrict src) {
}
nan_mantissa |= fputil::FloatProperties<T>::QUIET_NAN_MASK;
if (result.get_sign ()) {
result = fputil:: FPBits<T> (result.build_quiet_nan (nan_mantissa));
result = FPBits (result.build_quiet_nan (nan_mantissa));
result.set_sign (true );
} else {
result.set_sign (false );
result = fputil:: FPBits<T> (result.build_quiet_nan (nan_mantissa));
result = FPBits (result.build_quiet_nan (nan_mantissa));
}
}
} else if (tolower (src[index ]) == ' i' ) { // INF
if (tolower (src[index + 1 ]) == inf_string[1 ] &&
tolower (src[index + 2 ]) == inf_string[2 ]) {
seen_digit = true ;
if (result.get_sign ())
result = fputil:: FPBits<T> (result.neg_inf ());
result = FPBits (result.neg_inf ());
else
result = fputil:: FPBits<T> (result.inf ());
result = FPBits (result.inf ());
if (tolower (src[index + 3 ]) == inf_string[3 ] &&
tolower (src[index + 4 ]) == inf_string[4 ] &&
tolower (src[index + 5 ]) == inf_string[5 ] &&
Expand Down