diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h index 7b3882dde1b72..93766efdb0988 100644 --- a/libc/src/__support/FPUtil/FPBits.h +++ b/libc/src/__support/FPUtil/FPBits.h @@ -772,6 +772,177 @@ struct FPRepImpl : public FPRepSem { } return RetT(result.uintval()); } + + // 'Number' represents a finite (non-inf, non-NaN) floating point number. It + // is independant of the underlying encoding and allows for easy manipulation + // of sign, exponent and significand. This format's precision is larger than + // the encoded form. There is no notion of subnormal for a 'Number'. + struct Number { + // The number of extra bits for the significand compared to the normal + // encoded form. + // When the encoded form is subnormal we have (EXTRA_PRECISION + 1) bit of + // extra precision. + LIBC_INLINE_VAR static constexpr int EXTRA_PRECISION = + UP::STORAGE_LEN - UP::FRACTION_LEN - 1; + + Sign sign = Sign::POS; + int32_t exponent = 0; + StorageType significand = 0; + + // Whether this Number represents the zero value. + LIBC_INLINE constexpr bool is_zero() const { return significand == 0; } + + // Moves the leading one of the significand to StorageType's MSB position + // and changes the exponent accordingly. This changes the internal + // representation to maximize the precision of the Number but it doesn't + // change its value. + LIBC_INLINE constexpr Number maximize_precision() const { + if (is_zero()) + return *this; + return get_scaled(-cpp::countl_zero(significand)); + } + + // Moves the trailing one of the significand to StorageType's LSB position + // and changes the exponent accordingly. This changes the internal + // representation to minimize the precision of the Number but it doesn't + // change its value. + LIBC_INLINE constexpr Number minimize_precision() const { + if (is_zero()) + return *this; + return get_scaled(cpp::countr_zero(significand)); + } + + // If non-zero, normalizes this number by moving the leading bit of the + // significand to StorageType's MSB position (maximize_precision). If zero + // also makes the exponent 0. + LIBC_INLINE constexpr Number normalize() const { + if (is_zero()) + return {sign, 0, significand}; + return maximize_precision(); + } + + // The rounding mode to use when materializing a Number (see below). + enum Rounding { TOWARDZERO, AWAYZERO, TONEAREST }; + + // The precision to use when materializing a Number (see below). + // - EXACT means this Number contains all the information, + // - TRUNCATED means that the significand was truncated. + enum Precision { TRUNCATED, EXACT }; + + // Creates a 'RetT' from the number representation. + // - When this 'Number' is too large to be represented 'infinity' or + // 'max_normal' is returned depending on the rounding mode. + // - When this 'Number' is too small to be represented 'zero' or + // 'min_subnormal' is returned depending on the rounding mode. + LIBC_INLINE constexpr RetT materialize(Rounding rounding = TOWARDZERO, + Precision precision = EXACT) const { + if (is_zero()) + return precision == TRUNCATED && rounding == AWAYZERO + ? RetT::min_subnormal(sign) + : RetT::zero(sign); + + const auto underflow = [=]() -> RetT { + return rounding == AWAYZERO ? RetT::min_subnormal(sign) + : RetT::zero(sign); + }; + const auto overflow = [=]() -> RetT { + return rounding == TOWARDZERO ? RetT::max_normal(sign) + : RetT::inf(sign); + }; + + const int leading_zeroes = cpp::countl_zero(significand); + LIBC_ASSERT(leading_zeroes <= UP::STORAGE_LEN); + // If 'exponent' is too small 'exponent - leading_zeroes' below can + // overflow which is undefined behavior for signed integers. If exponent + // is too close from INT32_MIN we bail out and return the appropriate + // underflow value. + constexpr int32_t smallest_exponent = INT32_MIN + UP::STORAGE_LEN; + if (exponent <= smallest_exponent) + return underflow(); + + // The exponent when the leading bit is at its final position. + int32_t rep_exponent = exponent - leading_zeroes; + + constexpr int32_t EXP_MAX(Exponent::MAX()); + constexpr int32_t EXP_MIN(Exponent::MIN()); + constexpr int32_t EXP_SUBNORMAL(Exponent::SUBNORMAL()); + + int lshift = leading_zeroes - EXTRA_PRECISION; + + // Adjust shift and exponent when the number is subnormal. + if (rep_exponent < EXP_MIN) { + lshift -= EXP_MIN - rep_exponent; + rep_exponent = EXP_SUBNORMAL; + } + + // The final significand shifted accordingly. + StorageType rep_significand = significand; + if (lshift > 0) + rep_significand <<= lshift; + else if (lshift < 0) + rep_significand >>= -lshift; + + // The number of extra precision bits we have in 'significand'. + const int extra_len = -lshift; + + if (extra_len > UP::STORAGE_LEN) + return underflow(); + + if (rep_exponent > EXP_MAX) + return overflow(); + + // When rounding is AWAYZERO or TONEAREST we need to consider extra + // precision bits. + LIBC_ASSERT(extra_len <= UP::STORAGE_LEN); + const bool has_extra_len = extra_len > 0; + StorageType extra_bits_mask{}; + StorageType extra_bits_midpoint{}; + if (has_extra_len) { + if (extra_len == UP::STORAGE_LEN) + extra_bits_mask = StorageType(~(StorageType(0))); // subnormals + else + extra_bits_mask = (StorageType(1) << extra_len) - StorageType(1); + extra_bits_midpoint = (extra_bits_mask >> 1) + StorageType(1); + } + const StorageType extra_bits = significand & extra_bits_mask; + const bool round_toward_inf = + (rounding == AWAYZERO && + ((extra_bits > 0) || (precision == TRUNCATED))) || + (rounding == TONEAREST && + ((extra_bits > extra_bits_midpoint) || + ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED)))); + const RetT rep( + encode(sign, Exponent(rep_exponent), Significand(rep_significand))); + return round_toward_inf ? rep.next_toward_inf() : rep; + } + + private: + // This operation changes the scale of the Number by offsetting the exponent + // and shift the significand. + LIBC_INLINE constexpr Number get_scaled(int offset) const { + if (offset == 0) + return *this; + Number num; + num.sign = sign; + num.exponent = exponent + offset; + num.significand = offset == 0 ? significand + : (offset > 0 ? significand >> offset + : significand << -offset); + return num; + } + }; + + // Returns a 'Number' representation of the number, the returned number + // may or may not be normalized (leading bit of the significant at MSB + // position). Only valid to call when is_finite(). + LIBC_INLINE constexpr Number get_number() const { + LIBC_ASSERT(is_finite()); + Number num; + num.sign = sign(); + num.exponent = get_explicit_exponent() + Number::EXTRA_PRECISION; + num.significand = get_explicit_mantissa(); + return num; + } }; // A generic class to manipulate floating point formats. diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp index f5c27d4fc0302..30903824db299 100644 --- a/libc/test/src/__support/FPUtil/fpbits_test.cpp +++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp @@ -212,10 +212,13 @@ constexpr FP all_fp_values[] = { constexpr Sign all_signs[] = {Sign::POS, Sign::NEG}; -using FPTypes = LIBC_NAMESPACE::testing::TypeList< - FPRep, FPRep, - FPRep, FPRep, - FPRep>; +using FPTypes = + LIBC_NAMESPACE::testing::TypeList, // + FPRep, // + FPRep, // + FPRep, // + FPRep // + >; template constexpr auto make(Sign sign, FP fp) { switch (fp) { @@ -238,6 +241,7 @@ template constexpr auto make(Sign sign, FP fp) { case FP::QUIET_NAN: return T::quiet_nan(sign); } + __builtin_unreachable(); } // Tests all properties for all types of float. @@ -300,6 +304,283 @@ TYPED_TEST(LlvmLibcFPBitsTest, NextTowardInf, FPTypes) { } } +TYPED_TEST(LlvmLibcFPBitsTest, NumberConstruction, FPTypes) { + using LIBC_NAMESPACE::cpp::countl_zero; + using LIBC_NAMESPACE::cpp::countr_zero; + using Number = typename T::Number; + + // When using get_number() the significand is transfered as-is and the + // exponent is adjusted to reflect the extra precision (now the significand + // uses (STORAGE_LEN - 1) bits instead of FRACTION_LEN bits). + + // e.g., with IEEE754_Binary16 + // 1.0 in IEEE754_Binary16 : 0b0011110000000000 + // SEEEEEMMMMMMMMMM + // number's significand : 0b0000010000000000 + // EXTRA_PRECISION : ^^^^^ + // number's exponent : EXTRA_PRECISION + + const T one = T::one(); + + const Number num = one.get_number(); + + // "num" and "one" have the same sign. + ASSERT_EQ(num.sign.is_pos(), one.is_pos()); + + // For 'one', the leading one of the significant is at position FRACTION_LEN. + // So we have FRACTION_LEN zeroes after it. + ASSERT_EQ(countr_zero(num.significand), T::FRACTION_LEN); + + // The exponent is increased by EXTRA_PRECISION. + // Since the exponent for 'one' is '0' the number's exponent is just + // EXTRA_PRECISION. + ASSERT_EQ(num.exponent, Number::EXTRA_PRECISION); + + // Because the significant is now stored in 'StorageType' we have extra + // precisions bits available at the left of the leading one. + ASSERT_GT(Number::EXTRA_PRECISION, 0); + ASSERT_EQ(countl_zero(num.significand), Number::EXTRA_PRECISION); + + // In maximized precision form, the leading one is moved at StorageType's MSB. + // number's significand : 0b1000000000000000 + // number's exponent : 0 + const Number max_precision = one.get_number().maximize_precision(); + ASSERT_TRUE(max_precision.sign.is_pos()); + // The leading bit is now in the MSB of the storage. + ASSERT_EQ(countl_zero(max_precision.significand), 0); + ASSERT_EQ(max_precision.exponent, 0); + + // In minimized precision form, the leading one is moved at StorageType's LSB. + // number's significand : 0b0000000000000001 + // number's exponent : FRACTION_LEN + EXTRA_PRECISION + const Number min_precision = one.get_number().minimize_precision(); + ASSERT_TRUE(min_precision.sign.is_pos()); + // The leading bit is now in the MSB of the storage. + ASSERT_EQ(countr_zero(min_precision.significand), 0); + ASSERT_EQ(min_precision.exponent, T::FRACTION_LEN + Number::EXTRA_PRECISION); +} + +#define ASSERT_MATERIALIZE_AS(NUMBER, ROUNDING, PRECISION, REP) \ + ASSERT_SAME_REP(NUMBER.materialize(ROUNDING, PRECISION), REP) + +// For all 'FPType' and all finite 'FP' values, we check that we can convert the +// 'FPRep' to a 'Number' and back to the original 'FPRep' without loss. +// We also check that changing the scale of the intermediary 'Number' has no +// effect. +TYPED_TEST(LlvmLibcFPBitsTest, NumberBackAndForth, FPTypes) { + // using StorageType = typename T::StorageType; + using Number = typename T::Number; + for (Sign sign : all_signs) { + for (FP fp : all_fp_values) { + const T rep = make(sign, fp); + if (!rep.is_finite()) + continue; + // We test numbers at different scales. + // Note: changing scale changes the internal representation but not the + // Number's value. + const Number scaled_numbers[] = { + rep.get_number(), + rep.get_number().maximize_precision(), + rep.get_number().minimize_precision(), + }; + for (const Number &num : scaled_numbers) { + // When numbers are exact (i.e., not truncated) they should materialize + // back exactly whatever the rounding mode. + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep); + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep); + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep); + } + } + } +} + +// Here we test materialization of a 'Number' back to an 'FPRep' with the +// 'TOWARDZERO' rounding mode. This rounding mode corresponds to C++ cast +// semantics and simply discards the extra precision. +// That is, whatever the values of the extra bits, 'Number' will materialize +// back as 'FPRep' exactly. +TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) { + using StorageType = typename T::StorageType; + using Number = typename T::Number; + constexpr auto set_last_bits = [](StorageType value, int bits) { + return value | ((StorageType(1) << bits) - StorageType(1)); + }; + for (Sign sign : all_signs) { + for (FP fp : all_fp_values) { + const T rep = make(sign, fp); + if (!rep.is_finite()) + continue; + // Number with extra precision bits. + Number num = rep.get_number().maximize_precision(); + const int extra_bits = Number::EXTRA_PRECISION + rep.is_subnormal(); + + // Exact number converts back to rep. + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep); + // Non-exact numbers converts back to rep. + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep); + + if (rep.is_zero()) + continue; // extra bits are only present for non-zero numbers. + + const auto sig = num.significand; + num.significand = set_last_bits(sig, 1); // Smallest extra value. + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep); + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep); + num.significand = set_last_bits(sig, extra_bits); // Largest extra value. + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep); + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep); + } + } +} + +// Here we test materialization of a 'Number' back to an 'FPRep' with the +// 'AWAYZERO' rounding mode. This rounding mode will convert back to 'FPRep' +// only if there is no extra bit set and Truncation is 'EXACT', otherwise it +// will materialize as the next representable number. +TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) { + using StorageType = typename T::StorageType; + using Number = typename T::Number; + constexpr auto set_last_bits = [](StorageType value, int bits) { + return value | ((StorageType(1) << bits) - StorageType(1)); + }; + const struct { + FP initial; + FP rounded; + } TESTS[] = { + {FP::ZERO, FP::MIN_SUBNORMAL}, // + {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, // + {FP::MAX_NORMAL, FP::INF}, // + }; + for (Sign sign : all_signs) { + for (auto tc : TESTS) { + const T rep = make(sign, tc.initial); + const T rounded = make(sign, tc.rounded); + // Number with extra precision bits. + Number num = rep.get_number().maximize_precision(); + const int extra_bits = Number::EXTRA_PRECISION + rep.is_subnormal(); + + // Exact number converts back to rep. + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep); + // Non-exact numbers get rounded toward infinity. + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded); + + if (rep.is_zero()) + continue; // extra bits are only present for non-zero numbers. + + const auto sig = num.significand; + num.significand = set_last_bits(sig, 1); // Smallest extra value. + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded); + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded); + num.significand = set_last_bits(sig, extra_bits); // Largest extra value. + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded); + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded); + } + } +} + +// Here we test materialization of a 'Number' back to an 'FPRep' with the +// 'TONEAREST' rounding mode. This rounding mode will convert back to 'FPRep' +// only if there is no extra bit set and Truncation is 'EXACT', otherwise it +// will materialize as the next representable number. +TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) { + using StorageType = typename T::StorageType; + using Number = typename T::Number; + constexpr auto set_last_bits = [](StorageType value, int bits) { + return value | ((StorageType(1) << bits) - StorageType(1)); + }; + constexpr auto set_bit_at = [](StorageType value, int pos) { + return value | (StorageType(1) << (pos - 1)); + }; + const struct { + FP initial; + FP rounded; + } TESTS[] = { + {FP::ZERO, FP::MIN_SUBNORMAL}, // + {FP::MAX_SUBNORMAL, FP::MIN_NORMAL}, // + {FP::MAX_NORMAL, FP::INF}, // + }; + for (Sign sign : all_signs) { + for (auto tc : TESTS) { + const T rep = make(sign, tc.initial); + const T rounded = make(sign, tc.rounded); + Number num = rep.get_number().maximize_precision(); + const int extra_bits = Number::EXTRA_PRECISION + rep.is_subnormal(); + + // Exact number converts back to rep. + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep); + // Non-exact numbers converts back to rep. + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep); + + if (rep.is_zero()) + continue; // extra bits are only present for non-zero numbers. + + const auto sig = num.significand; + num.significand = set_last_bits(sig, 1); // Smallest extra value. + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep); + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep); + num.significand = set_last_bits(sig, extra_bits); // Largest extra value. + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded); + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded); + num.significand = set_bit_at(sig, extra_bits); // Half extra value. + // We're exactly half-way between two numbers. + // If exact we round toward zero. + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep); + // If truncated we round toward infinity. + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded); + // The next value will always round toward infinity. + ++num.significand; + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded); + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded); + } + } +} + +TYPED_TEST(LlvmLibcFPBitsTest, SmallestNumber, FPTypes) { + using StorageType = typename T::StorageType; + using Number = typename T::Number; + constexpr int32_t exponents[] = {INT32_MIN, INT32_MIN / 2}; + for (Sign sign : all_signs) { + for (int32_t exponent : exponents) { + Number num; + num.sign = sign; + num.exponent = exponent; + num.significand = StorageType(1); + + const T zero = make(sign, FP::ZERO); + const T min = make(sign, FP::MIN_SUBNORMAL); + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero); + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero); + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min); + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min); + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero); + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero); + } + } +} + +TYPED_TEST(LlvmLibcFPBitsTest, LargestNumber, FPTypes) { + using StorageType = typename T::StorageType; + using Number = typename T::Number; + constexpr int32_t exponents[] = {INT32_MAX, INT32_MAX / 2}; + for (Sign sign : all_signs) { + for (int32_t exponent : exponents) { + Number num; + num.sign = sign; + num.exponent = exponent; + num.significand = ~StorageType(0); + + const T max = make(sign, FP::MAX_NORMAL); + const T inf = make(sign, FP::INF); + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max); + ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max); + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf); + ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf); + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf); + ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf); + } + } +} + TEST(LlvmLibcFPBitsTest, FloatType) { using FloatBits = FPBits;