Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[libc] Add conversions between FPBits and greater precision floating point representations #80709

Open
wants to merge 7 commits into
base: main
Choose a base branch
from

Conversation

gchatelet
Copy link
Contributor

No description provided.

@llvmbot
Copy link

llvmbot commented Feb 5, 2024

@llvm/pr-subscribers-libc

Author: Guillaume Chatelet (gchatelet)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/80709.diff

2 Files Affected:

  • (modified) libc/src/__support/FPUtil/FPBits.h (+130)
  • (modified) libc/test/src/__support/FPUtil/fpbits_test.cpp (+258)
diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
index 6665c90845683b..6ce3a1bb08ffa5 100644
--- a/libc/src/__support/FPUtil/FPBits.h
+++ b/libc/src/__support/FPUtil/FPBits.h
@@ -769,6 +769,136 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
     }
     return RetT(result.uintval());
   }
+
+  // 'Number' represents a finite (non-inf, non-NaN) floating point number. It
+  // is independant of the underlying encoding and allows for easy manipulation
+  // of sign, exponent and significand. This format's precision is larger than
+  // the encoded form. There is no notion of subnormal for a 'Number'.
+  struct Number {
+
+    // The number of extra bits for the significand compared to the encoded
+    // form.
+    LIBC_INLINE_VAR static constexpr int EXTRA_PRECISION =
+        UP::STORAGE_LEN - UP::FRACTION_LEN - 1;
+
+    Sign sign = Sign::POS;
+    int32_t exponent = 0;
+    StorageType significand = 0;
+
+    LIBC_INLINE constexpr bool is_zero() const { return significand == 0; }
+
+    // Moves the leading one of the significand to StorageType's MSB position
+    // and changes the exponent accordingly. This changes the internal
+    // representation to maximize the precision of the Number but it doesn't
+    // change its value.
+    LIBC_INLINE constexpr Number maximize_precision() const {
+      return get_scaled(-cpp::countl_zero(significand));
+    }
+
+    // Moves the trailing one of the significand to StorageType's LSB position
+    // and changes the exponent accordingly. This changes the internal
+    // representation to minimize the precision of the Number but it doesn't
+    // change its value.
+    LIBC_INLINE constexpr Number minimize_precision() const {
+      return get_scaled(cpp::countr_zero(significand));
+    }
+
+    // If non-zero, normalizes this number by moving the leading bit of the
+    // significand to StorageType's MSB position (maximize_precision). If zero
+    // also makes the exponent 0.
+    LIBC_INLINE constexpr Number normalize() const {
+      if (is_zero())
+        return {sign, 0, significand};
+      return maximize_precision();
+    }
+
+    // The rounding mode to use when materializing a Number (see below).
+    enum Rounding { TOWARDZERO, AWAYZERO, TONEAREST };
+
+    // The precision to use when materializing a Number (see below).
+    // - EXACT means this Number contains all the information,
+    // - TRUNCATED means that the significand was truncated.
+    enum Precision { TRUNCATED, EXACT };
+
+    // Creates a 'RetT' from the number representation.
+    // When this Number is too large to be represented 'infinity' is returned.
+    // When this Number is too small to be represented 'zero' or 'min_subnormal'
+    // is returned depending on the rounding mode.
+    LIBC_INLINE constexpr RetT materialize(Rounding rounding = TOWARDZERO,
+                                           Precision precision = EXACT) const {
+      if (exponent <= (INT32_MIN + UP::STORAGE_LEN))
+        return rounding == AWAYZERO ? RetT::min_subnormal(sign)
+                                    : RetT::zero(sign);
+      if (is_zero())
+        return precision == TRUNCATED && rounding == AWAYZERO
+                   ? RetT::min_subnormal(sign)
+                   : RetT::zero(sign);
+      if (exponent >= (INT32_MAX - UP::STORAGE_LEN))
+        return rounding == TOWARDZERO ? RetT::max_normal(sign)
+                                      : RetT::inf(sign);
+
+      const int leading_zeroes = cpp::countl_zero(significand);
+      const int extra_len = EXTRA_PRECISION - leading_zeroes;
+      // 'extra_len' is smaller than 'STORAGE_LEN' by definition.
+      static_assert(EXTRA_PRECISION < UP::STORAGE_LEN);
+      const StorageType extra_bits_mask =
+          extra_len <= 0 ? StorageType(0)
+                         : (StorageType(1) << extra_len) - StorageType(1);
+      const StorageType extra_bits = significand & extra_bits_mask;
+      const StorageType extra_bits_midpoint = extra_bits_mask >> 1;
+      const bool round_toward_inf =
+          (rounding == AWAYZERO && (extra_bits || precision == TRUNCATED)) ||
+          (rounding == TONEAREST &&
+           ((extra_bits > extra_bits_midpoint) ||
+            ((extra_bits == extra_bits_midpoint) && (precision == TRUNCATED))));
+      int32_t rep_exponent = exponent - leading_zeroes;
+      constexpr int32_t EXP_MIN = (int32_t)Exponent::MIN();
+      constexpr int32_t EXP_SUBNORMAL = (int32_t)Exponent::SUBNORMAL();
+
+      int lshift = leading_zeroes - EXTRA_PRECISION;
+      if (rep_exponent < EXP_MIN) {
+        lshift -= EXP_MIN - rep_exponent;
+        rep_exponent = EXP_SUBNORMAL;
+      }
+
+      StorageType rep_significand = significand;
+      if (lshift > 0)
+        rep_significand <<= lshift;
+      else if (lshift < 0)
+        rep_significand >>= -lshift;
+
+      const RetT rep(
+          encode(sign, Exponent(rep_exponent), Significand(rep_significand)));
+
+      return round_toward_inf ? rep.next_toward_inf() : rep;
+    }
+
+  private:
+    // This operation changes the scale of the Number by offsetting the exponent
+    // and shift the significand.
+    LIBC_INLINE constexpr Number get_scaled(int offset) const {
+      if (offset == 0)
+        return *this;
+      Number num;
+      num.sign = sign;
+      num.exponent = exponent + offset;
+      num.significand = offset == 0 ? significand
+                                    : (offset > 0 ? significand >> offset
+                                                  : significand << -offset);
+      return num;
+    }
+  };
+
+  // Returns a 'Number' representation of the number, the returned number
+  // may or may not be normalized (leading bit of the significant at MSB
+  // position). Only valid to call when is_finite().
+  LIBC_INLINE constexpr Number get_number() const {
+    Number num;
+    num.sign = sign();
+    num.exponent = get_explicit_exponent() + Number::EXTRA_PRECISION;
+    num.significand = get_explicit_mantissa();
+    return num;
+  }
 };
 
 // A generic class to manipulate floating point formats.
diff --git a/libc/test/src/__support/FPUtil/fpbits_test.cpp b/libc/test/src/__support/FPUtil/fpbits_test.cpp
index 4504a4f0cfcc7d..2305eed3866640 100644
--- a/libc/test/src/__support/FPUtil/fpbits_test.cpp
+++ b/libc/test/src/__support/FPUtil/fpbits_test.cpp
@@ -327,6 +327,264 @@ TYPED_TEST(LlvmLibcFPBitsTest, NextTowardInf, FPTypes) {
   }
 }
 
+TYPED_TEST(LlvmLibcFPBitsTest, NumberConstruction, FPTypes) {
+  using LIBC_NAMESPACE::cpp::countl_zero;
+  using LIBC_NAMESPACE::cpp::countr_zero;
+  using Number = typename T::Number;
+
+  // When using get_number() the significand is transfered as-is and the
+  // exponent is adjusted to reflect the extra precision (now the significand
+  // uses (STORAGE_LEN - 1) bits instead of FRACTION_LEN bits).
+
+  // e.g., with IEEE754_Binary16
+  // 1.0 in IEEE754_Binary16 : 0b0011110000000000
+  //                             SEEEEEMMMMMMMMMM
+  // number's significand    : 0b0000010000000000
+  // EXTRA_PRECISION         :   ^^^^^
+  // number's exponent       : EXTRA_PRECISION
+
+  const T one = T::one();
+
+  const Number num = one.get_number();
+
+  // "num" and "one" have the same sign.
+  ASSERT_EQ(num.sign.is_pos(), one.is_pos());
+
+  // For 'one', the leading one of the significant is at position FRACTION_LEN.
+  // So we have FRACTION_LEN zeroes after it.
+  ASSERT_EQ(countr_zero(num.significand), T::FRACTION_LEN);
+
+  // The exponent is increased by EXTRA_PRECISION.
+  // Since the exponent for 'one' is '0' the number's exponent is just
+  // EXTRA_PRECISION.
+  ASSERT_EQ(num.exponent, Number::EXTRA_PRECISION);
+
+  // Because the significant is now stored in 'StorageType' we have extra
+  // precisions bits available at the left of the leading one.
+  ASSERT_GT(Number::EXTRA_PRECISION, 0);
+  ASSERT_EQ(countl_zero(num.significand), Number::EXTRA_PRECISION);
+
+  // In maximized precision form, the leading one is moved at StorageType's MSB.
+  // number's significand    : 0b1000000000000000
+  // number's exponent       : 0
+  const Number max_precision = one.get_number().maximize_precision();
+  ASSERT_TRUE(max_precision.sign.is_pos());
+  // The leading bit is now in the MSB of the storage.
+  ASSERT_EQ(countl_zero(max_precision.significand), 0);
+  ASSERT_EQ(max_precision.exponent, 0);
+
+  // In minimized precision form, the leading one is moved at StorageType's LSB.
+  // number's significand    : 0b0000000000000001
+  // number's exponent       : FRACTION_LEN + EXTRA_PRECISION
+  const Number min_precision = one.get_number().minimize_precision();
+  ASSERT_TRUE(min_precision.sign.is_pos());
+  // The leading bit is now in the MSB of the storage.
+  ASSERT_EQ(countr_zero(min_precision.significand), 0);
+  ASSERT_EQ(min_precision.exponent, T::FRACTION_LEN + Number::EXTRA_PRECISION);
+}
+
+#define ASSERT_MATERIALIZE_AS(NUMBER, ROUNDING, PRECISION, REP)                \
+  ASSERT_SAME_REP(NUMBER.materialize(ROUNDING, PRECISION), REP)
+
+// For all 'FPType' and all finite 'FP' values, we check that we can convert the
+// 'FPRep' to a 'Number' and back to the original 'FPRep' without loss.
+// We also check that changing the scale of the intermediary 'Number' has no
+// effect.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberBackAndForth, FPTypes) {
+  // using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  for (Sign sign : all_signs) {
+    for (FP fp : all_fp_values) {
+      const T rep = make<T>(sign, fp);
+      if (!rep.is_finite())
+        continue;
+      // We test numbers at different scales.
+      // Note: changing scale changes the internal representation but not the
+      // Number's value.
+      const Number scaled_numbers[] = {
+          rep.get_number(),
+          rep.get_number().maximize_precision(),
+          rep.get_number().minimize_precision(),
+      };
+      for (const Number &num : scaled_numbers) {
+        // When numbers are exact (i.e., not truncated) they should materialize
+        // back exactly whatever the rounding mode.
+        ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+        ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep);
+        ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+      }
+    }
+  }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'TOWARDZERO' rounding mode. This rounding mode corresponds to C++ cast
+// semantics and simply discards the extra precision.
+// That is, whatever the values of the extra bits, 'Number' will materialize
+// back as 'FPRep' exactly.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundTowardZero, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  static constexpr StorageType EXTRA_BITS_MASK =
+      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+                                         Number::EXTRA_PRECISION>();
+  for (Sign sign : all_signs) {
+    for (FP fp : all_fp_values) {
+      const T rep = make<T>(sign, fp);
+      if (!rep.is_finite())
+        continue;
+      // Number with EXTRA_PRECISION bits.
+      Number num = rep.get_number().maximize_precision();
+
+      // Exact number converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+      // Non-exact numbers converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+
+      if (rep.is_zero())
+        continue; // extra bits are only present for non-zero numbers.
+
+      ++num.significand; // Smallest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, rep);
+      ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, rep);
+    }
+  }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'AWAYZERO' rounding mode. This rounding mode will convert back to 'FPRep'
+// only if there is no extra bit set and Truncation is 'EXACT', otherwise it
+// will materialize as the next representable number.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundAwayZero, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  static constexpr StorageType EXTRA_BITS_MASK =
+      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+                                         Number::EXTRA_PRECISION>();
+  const struct {
+    FP initial;
+    FP rounded;
+  } TESTS[] = {
+      {FP::ZERO, FP::MIN_SUBNORMAL},          //
+      {FP::MAX_SUBNORMAL, FP::MIN_NORMAL},    //
+      {FP::MAX_NORMAL, FP::INF},              //
+      {FP::INF, FP::INF},                     //
+      {FP::QUIET_NAN, FP::QUIET_NAN},         //
+      {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+  };
+  for (Sign sign : all_signs) {
+    for (auto tc : TESTS) {
+      const T rep = make<T>(sign, tc.initial);
+      const T rounded = make<T>(sign, tc.rounded);
+      Number num = rep.get_number().maximize_precision();
+
+      // Exact number converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rep);
+      // Non-exact numbers get rounded toward infinity.
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+
+      if (rep.is_zero())
+        continue; // extra bits are only present for non-zero numbers.
+
+      ++num.significand; // Smallest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, rounded);
+      ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, rounded);
+    }
+  }
+}
+
+// Here we test materialization of a 'Number' back to an 'FPRep' with the
+// 'TONEAREST' rounding mode. This rounding mode will convert back to 'FPRep'
+// only if there is no extra bit set and Truncation is 'EXACT', otherwise it
+// will materialize as the next representable number.
+TYPED_TEST(LlvmLibcFPBitsTest, NumberRoundToNearest, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  static constexpr StorageType EXTRA_BITS_MASK =
+      LIBC_NAMESPACE::mask_trailing_ones<StorageType,
+                                         Number::EXTRA_PRECISION>();
+  const struct {
+    FP initial;
+    FP rounded;
+  } TESTS[] = {
+      {FP::ZERO, FP::MIN_SUBNORMAL},          //
+      {FP::MAX_SUBNORMAL, FP::MIN_NORMAL},    //
+      {FP::MAX_NORMAL, FP::INF},              //
+      {FP::INF, FP::INF},                     //
+      {FP::QUIET_NAN, FP::QUIET_NAN},         //
+      {FP::SIGNALING_NAN, FP::SIGNALING_NAN}, //
+  };
+  for (Sign sign : all_signs) {
+    for (auto tc : TESTS) {
+      const T rep = make<T>(sign, tc.initial);
+      const T rounded = make<T>(sign, tc.rounded);
+      Number num = rep.get_number().maximize_precision();
+
+      // Exact number converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+      // Non-exact numbers converts back to rep.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
+
+      if (rep.is_zero())
+        continue; // extra bits are only present for non-zero numbers.
+
+      ++num.significand; // Smallest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rep);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rep);
+      num.significand |= EXTRA_BITS_MASK; // Largest extra bits value.
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, rounded);
+      ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, rounded);
+    }
+  }
+}
+
+// We test the materialization of
+TYPED_TEST(LlvmLibcFPBitsTest, SmallestNumber, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  for (Sign sign : all_signs) {
+    Number num;
+    num.sign = sign;
+    num.exponent = INT32_MIN;
+    num.significand = StorageType(1);
+
+    const T zero = make<T>(sign, FP::ZERO);
+    const T min = make<T>(sign, FP::MIN_SUBNORMAL);
+    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, zero);
+    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, zero);
+    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, min);
+    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, min);
+    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, zero);
+    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, zero);
+  }
+}
+
+TYPED_TEST(LlvmLibcFPBitsTest, LargestNumber, FPTypes) {
+  using StorageType = typename T::StorageType;
+  using Number = typename T::Number;
+  for (Sign sign : all_signs) {
+    Number num;
+    num.sign = sign;
+    num.exponent = INT32_MAX;
+    num.significand = ~StorageType(0);
+
+    const T inf = make<T>(sign, FP::INF);
+    const T max = make<T>(sign, FP::MAX_NORMAL);
+    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::EXACT, max);
+    ASSERT_MATERIALIZE_AS(num, Number::TOWARDZERO, Number::TRUNCATED, max);
+    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::EXACT, inf);
+    ASSERT_MATERIALIZE_AS(num, Number::AWAYZERO, Number::TRUNCATED, inf);
+    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::EXACT, inf);
+    ASSERT_MATERIALIZE_AS(num, Number::TONEAREST, Number::TRUNCATED, inf);
+  }
+}
+
 TEST(LlvmLibcFPBitsTest, FloatType) {
   using FloatBits = FPBits<float>;
 

@gchatelet gchatelet marked this pull request as draft February 5, 2024 20:50
@gchatelet gchatelet removed the request for review from legrosbuffle February 5, 2024 20:51
@michaelrj-google
Copy link
Contributor

This number struct looks very similar to DyadicFloat. Would it be possible to use that instead?

@gchatelet
Copy link
Contributor Author

This number struct looks very similar to DyadicFloat. Would it be possible to use that instead?

It is indeed quite similar, thx for noticing.
I've converted this PR to draft as I still need to fix a few corner cases. Once all of my tests pass I'll integrate it into DyadicFloat.

@gchatelet
Copy link
Contributor Author

Making this patch ready for review to get pre-commit CI running

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants