[libc][math] Add C23 ldexpf128 math function and fix DyadicFloat conv…

…ersions for subnormal ranges and 80-bit floating points. (#81780)
llvm · Feb 15, 2024 · ff409d3 · ff409d3
1 parent 8ce1448
commit ff409d3
Show file tree

Hide file tree

Showing 20 changed files with 222 additions and 63 deletions.
diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
@@ -387,6 +387,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
     libc.src.math.fmaxf128
     libc.src.math.fminf128
     libc.src.math.frexpf128
+    libc.src.math.ldexpf128
     libc.src.math.roundf128
     libc.src.math.sqrtf128
     libc.src.math.truncf128

diff --git a/libc/config/linux/riscv/entrypoints.txt b/libc/config/linux/riscv/entrypoints.txt
@@ -396,6 +396,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
     libc.src.math.fmaxf128
     libc.src.math.fminf128
     libc.src.math.frexpf128
+    libc.src.math.ldexpf128
     libc.src.math.roundf128
     libc.src.math.sqrtf128
     libc.src.math.truncf128

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -435,6 +435,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
     libc.src.math.fmaxf128
     libc.src.math.fminf128
     libc.src.math.frexpf128
+    libc.src.math.ldexpf128
     libc.src.math.roundf128
     libc.src.math.sqrtf128
     libc.src.math.truncf128

diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst
@@ -191,6 +191,8 @@ Basic Operations
 +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
 | ldexpl       | |check| | |check| | |check| | |check| | |check| |         |         | |check| | |check| | |check| |         |         |
 +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
+| ldexpf128    | |check| | |check| |         | |check| |         |         |         |         |         |         |         |         |
++--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
 | llrint       | |check| | |check| | |check| | |check| | |check| |         |         | |check| | |check| | |check| |         |         |
 +--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
 | llrintf      | |check| | |check| | |check| | |check| | |check| |         |         | |check| | |check| | |check| |         |         |

diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
@@ -413,6 +413,7 @@ def StdC : StandardSpec<"stdc"> {
           FunctionSpec<"ldexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
           FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
           FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
+          GuardedFunctionSpec<"ldexpf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<IntType>], "LIBC_COMPILER_HAS_FLOAT128">,
 
           FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

diff --git a/libc/src/__support/FPUtil/CMakeLists.txt b/libc/src/__support/FPUtil/CMakeLists.txt
@@ -75,24 +75,6 @@ add_header_library(
     libc.src.__support.common
 )
 
-add_header_library(
-  manipulation_functions
-  HDRS
-    ManipulationFunctions.h
-  DEPENDS
-    .fenv_impl
-    .fp_bits
-    .nearest_integer_operations
-    .normal_float
-    libc.src.__support.CPP.bit
-    libc.src.__support.CPP.limits
-    libc.src.__support.CPP.type_traits
-    libc.src.__support.common
-    libc.src.__support.macros.optimization
-    libc.include.math
-    libc.src.errno.errno
-)
-
 add_header_library(
   basic_operations
   HDRS
@@ -221,4 +203,23 @@ add_header_library(
     libc.src.__support.macros.optimization
 )
 
+add_header_library(
+  manipulation_functions
+  HDRS
+    ManipulationFunctions.h
+  DEPENDS
+    .fenv_impl
+    .fp_bits
+    .dyadic_float
+    .nearest_integer_operations
+    .normal_float
+    libc.src.__support.CPP.bit
+    libc.src.__support.CPP.limits
+    libc.src.__support.CPP.type_traits
+    libc.src.__support.common
+    libc.src.__support.macros.optimization
+    libc.include.math
+    libc.src.errno.errno
+)
+
 add_subdirectory(generic)
diff --git a/libc/src/__support/FPUtil/FPBits.h b/libc/src/__support/FPUtil/FPBits.h
@@ -633,13 +633,13 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
   using typename UP::Significand;
 
   using UP::FP_MASK;
-  using UP::SIG_LEN;
 
 public:
   // Constants.
   using UP::EXP_BIAS;
   using UP::EXP_MASK;
   using UP::FRACTION_MASK;
+  using UP::SIG_LEN;
   using UP::SIGN_MASK;
   LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
       (1 << UP::EXP_LEN) - 1;
@@ -732,11 +732,14 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
   // Unsafe function to create a floating point representation.
   // It simply packs the sign, biased exponent and mantissa values without
   // checking bound nor normalization.
+  //
+  // WARNING: For X86 Extended Precision, implicit bit needs to be set correctly
+  // in the 'mantissa' by the caller.  This function will not check for its
+  // validity.
+  //
   // FIXME: Use an uint32_t for 'biased_exp'.
   LIBC_INLINE static constexpr RetT
   create_value(Sign sign, StorageType biased_exp, StorageType mantissa) {
-    static_assert(fp_type != FPType::X86_Binary80,
-                  "This function is not tested for X86 Extended Precision");
     return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)),
                        Significand(mantissa)));
   }

diff --git a/libc/src/__support/FPUtil/ManipulationFunctions.h b/libc/src/__support/FPUtil/ManipulationFunctions.h
@@ -12,6 +12,8 @@
 #include "FPBits.h"
 #include "NearestIntegerOperations.h"
 #include "NormalFloat.h"
+#include "dyadic_float.h"
+#include "rounding_mode.h"
 
 #include "src/__support/CPP/bit.h"
 #include "src/__support/CPP/limits.h" // INT_MAX, INT_MIN
@@ -117,10 +119,8 @@ LIBC_INLINE T logb(T x) {
 
 template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
 LIBC_INLINE T ldexp(T x, int exp) {
-  if (LIBC_UNLIKELY(exp == 0))
-    return x;
   FPBits<T> bits(x);
-  if (LIBC_UNLIKELY(bits.is_zero() || bits.is_inf_or_nan()))
+  if (LIBC_UNLIKELY((exp == 0) || bits.is_zero() || bits.is_inf_or_nan()))
     return x;
 
   // NormalFloat uses int32_t to store the true exponent value. We should ensure
@@ -129,18 +129,40 @@ LIBC_INLINE T ldexp(T x, int exp) {
   // early. Because the result of the ldexp operation can be a subnormal number,
   // we need to accommodate the (mantissaWidth + 1) worth of shift in
   // calculating the limit.
-  int exp_limit = FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
-  if (exp > exp_limit)
-    return FPBits<T>::inf(bits.sign()).get_val();
+  constexpr int EXP_LIMIT =
+      FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
+  if (LIBC_UNLIKELY(exp > EXP_LIMIT)) {
+    int rounding_mode = quick_get_round();
+    Sign sign = bits.sign();
+
+    if ((sign == Sign::POS && rounding_mode == FE_DOWNWARD) ||
+        (sign == Sign::NEG && rounding_mode == FE_UPWARD) ||
+        (rounding_mode == FE_TOWARDZERO))
+      return FPBits<T>::max_normal(sign).get_val();
+
+    set_errno_if_required(ERANGE);
+    raise_except_if_required(FE_OVERFLOW);
+    return FPBits<T>::inf(sign).get_val();
+  }
 
   // Similarly on the negative side we return zero early if |exp| is too small.
-  if (exp < -exp_limit)
-    return FPBits<T>::zero(bits.sign()).get_val();
+  if (LIBC_UNLIKELY(exp < -EXP_LIMIT)) {
+    int rounding_mode = quick_get_round();
+    Sign sign = bits.sign();
+
+    if ((sign == Sign::POS && rounding_mode == FE_UPWARD) ||
+        (sign == Sign::NEG && rounding_mode == FE_DOWNWARD))
+      return FPBits<T>::min_subnormal(sign).get_val();
+
+    set_errno_if_required(ERANGE);
+    raise_except_if_required(FE_UNDERFLOW);
+    return FPBits<T>::zero(sign).get_val();
+  }
 
   // For all other values, NormalFloat to T conversion handles it the right way.
-  NormalFloat<T> normal(bits);
+  DyadicFloat<FPBits<T>::STORAGE_LEN> normal(bits.get_val());
   normal.exponent += exp;
-  return normal;
+  return static_cast<T>(normal);
 }
 
 template <typename T, typename U,

diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h
@@ -44,7 +44,7 @@ template <size_t Bits> struct DyadicFloat {
     static_assert(FPBits<T>::FRACTION_LEN < Bits);
     FPBits<T> x_bits(x);
     sign = x_bits.sign();
-    exponent = x_bits.get_exponent() - FPBits<T>::FRACTION_LEN;
+    exponent = x_bits.get_explicit_exponent() - FPBits<T>::FRACTION_LEN;
     mantissa = MantissaType(x_bits.get_explicit_mantissa());
     normalize();
   }
@@ -79,25 +79,32 @@ template <size_t Bits> struct DyadicFloat {
     return *this;
   }
 
-  // Assume that it is already normalized and output is not underflow.
+  // Assume that it is already normalized.
   // Output is rounded correctly with respect to the current rounding mode.
-  // TODO(lntue): Add support for underflow.
-  // TODO(lntue): Test or add specialization for x86 long double.
   template <typename T,
             typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
                                             (FPBits<T>::FRACTION_LEN < Bits),
                                         void>>
   explicit operator T() const {
-    // TODO(lntue): Do we need to treat signed zeros properly?
-    if (mantissa.is_zero())
-      return 0.0;
+    if (LIBC_UNLIKELY(mantissa.is_zero()))
+      return FPBits<T>::zero(sign).get_val();
 
     // Assume that it is normalized, and output is also normal.
     constexpr uint32_t PRECISION = FPBits<T>::FRACTION_LEN + 1;
     using output_bits_t = typename FPBits<T>::StorageType;
+    constexpr output_bits_t IMPLICIT_MASK =
+        FPBits<T>::SIG_MASK - FPBits<T>::FRACTION_MASK;
 
     int exp_hi = exponent + static_cast<int>((Bits - 1) + FPBits<T>::EXP_BIAS);
 
+    if (LIBC_UNLIKELY(exp_hi > 2 * FPBits<T>::EXP_BIAS)) {
+      // Results overflow.
+      T d_hi =
+          FPBits<T>::create_value(sign, 2 * FPBits<T>::EXP_BIAS, IMPLICIT_MASK)
+              .get_val();
+      return T(2) * d_hi;
+    }
+
     bool denorm = false;
     uint32_t shift = Bits - PRECISION;
     if (LIBC_UNLIKELY(exp_hi <= 0)) {
@@ -112,49 +119,57 @@ template <size_t Bits> struct DyadicFloat {
 
     MantissaType m_hi(mantissa >> shift);
 
-    T d_hi = FPBits<T>::create_value(sign, exp_hi,
-                                     static_cast<output_bits_t>(m_hi) &
-                                         FPBits<T>::FRACTION_MASK)
+    T d_hi = FPBits<T>::create_value(
+                 sign, exp_hi,
+                 (static_cast<output_bits_t>(m_hi) & FPBits<T>::SIG_MASK) |
+                     IMPLICIT_MASK)
                  .get_val();
 
-    const MantissaType round_mask = MantissaType(1) << (shift - 1);
-    const MantissaType sticky_mask = round_mask - MantissaType(1);
+    MantissaType round_mask = MantissaType(1) << (shift - 1);
+    MantissaType sticky_mask = round_mask - MantissaType(1);
 
     bool round_bit = !(mantissa & round_mask).is_zero();
     bool sticky_bit = !(mantissa & sticky_mask).is_zero();
     int round_and_sticky = int(round_bit) * 2 + int(sticky_bit);
 
     T d_lo;
+
     if (LIBC_UNLIKELY(exp_lo <= 0)) {
       // d_lo is denormal, but the output is normal.
       int scale_up_exponent = 2 * PRECISION;
       T scale_up_factor =
           FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
-                                  output_bits_t(0))
+                                  IMPLICIT_MASK)
               .get_val();
       T scale_down_factor =
           FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
-                                  output_bits_t(0))
+                                  IMPLICIT_MASK)
               .get_val();
 
       d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
-                                     output_bits_t(0))
+                                     IMPLICIT_MASK)
                  .get_val();
 
       return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
              scale_down_factor;
     }
 
-    d_lo = FPBits<T>::create_value(sign, exp_lo, output_bits_t(0)).get_val();
+    d_lo = FPBits<T>::create_value(sign, exp_lo, IMPLICIT_MASK).get_val();
 
     // Still correct without FMA instructions if `d_lo` is not underflow.
     T r = multiply_add(d_lo, T(round_and_sticky), d_hi);
 
     if (LIBC_UNLIKELY(denorm)) {
-      // Output is denormal, simply clear the exponent field.
-      output_bits_t clear_exp = output_bits_t(exp_hi)
-                                << FPBits<T>::FRACTION_LEN;
+      // Exponent before rounding is in denormal range, simply clear the
+      // exponent field.
+      output_bits_t clear_exp = (output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
       output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
+      if (!(r_bits & FPBits<T>::EXP_MASK)) {
+        // Output is denormal after rounding, clear the implicit bit for 80-bit
+        // long double.
+        r_bits -= IMPLICIT_MASK;
+      }
+
       return FPBits<T>(r_bits).get_val();
     }
 

diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -149,6 +149,7 @@ add_math_entrypoint_object(ilogbl)
 add_math_entrypoint_object(ldexp)
 add_math_entrypoint_object(ldexpf)
 add_math_entrypoint_object(ldexpl)
+add_math_entrypoint_object(ldexpf128)
 
 add_math_entrypoint_object(log10)
 add_math_entrypoint_object(log10f)

diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -1001,10 +1001,10 @@ add_entrypoint_object(
     ldexp.cpp
   HDRS
     ../ldexp.h
+  COMPILE_OPTIONS
+    -O3
   DEPENDS
     libc.src.__support.FPUtil.manipulation_functions
-  COMPILE_OPTIONS
-    -O2
 )
 
 add_entrypoint_object(
@@ -1013,10 +1013,10 @@ add_entrypoint_object(
     ldexpf.cpp
   HDRS
     ../ldexpf.h
+  COMPILE_OPTIONS
+    -O3
   DEPENDS
     libc.src.__support.FPUtil.manipulation_functions
-  COMPILE_OPTIONS
-    -O2
 )
 
 add_entrypoint_object(
@@ -1025,10 +1025,23 @@ add_entrypoint_object(
     ldexpl.cpp
   HDRS
     ../ldexpl.h
+  COMPILE_OPTIONS
+    -O3
   DEPENDS
     libc.src.__support.FPUtil.manipulation_functions
+)
+
+add_entrypoint_object(
+  ldexpf128
+  SRCS
+    ldexpf128.cpp
+  HDRS
+    ../ldexpf128.h
   COMPILE_OPTIONS
-    -O2
+    -O3
+  DEPENDS
+  libc.src.__support.macros.properties.float
+  libc.src.__support.FPUtil.manipulation_functions
 )
 
 add_object_library(

diff --git a/libc/src/math/generic/ldexpf128.cpp b/libc/src/math/generic/ldexpf128.cpp
@@ -0,0 +1,19 @@
+//===-- Implementation of ldexpf128 function ------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/math/ldexpf128.h"
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+
+namespace LIBC_NAMESPACE {
+
+LLVM_LIBC_FUNCTION(float128, ldexpf128, (float128 x, int exp)) {
+  return fputil::ldexp(x, exp);
+}
+
+} // namespace LIBC_NAMESPACE