Skip to content

Commit

Permalink
[libc][math] Add C23 ldexpf128 math function and fix DyadicFloat conv…
Browse files Browse the repository at this point in the history
…ersions for subnormal ranges and 80-bit floating points. (#81780)
  • Loading branch information
lntue committed Feb 15, 2024
1 parent 8ce1448 commit ff409d3
Show file tree
Hide file tree
Showing 20 changed files with 222 additions and 63 deletions.
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/riscv/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,7 @@ if(LIBC_COMPILER_HAS_FLOAT128)
libc.src.math.fmaxf128
libc.src.math.fminf128
libc.src.math.frexpf128
libc.src.math.ldexpf128
libc.src.math.roundf128
libc.src.math.sqrtf128
libc.src.math.truncf128
Expand Down
2 changes: 2 additions & 0 deletions libc/docs/math/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,8 @@ Basic Operations
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| ldexpl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| ldexpf128 | |check| | |check| | | |check| | | | | | | | | |
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| llrint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
+--------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+
| llrintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | |
Expand Down
1 change: 1 addition & 0 deletions libc/spec/stdc.td
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,7 @@ def StdC : StandardSpec<"stdc"> {
FunctionSpec<"ldexp", RetValSpec<DoubleType>, [ArgSpec<DoubleType>, ArgSpec<IntType>]>,
FunctionSpec<"ldexpf", RetValSpec<FloatType>, [ArgSpec<FloatType>, ArgSpec<IntType>]>,
FunctionSpec<"ldexpl", RetValSpec<LongDoubleType>, [ArgSpec<LongDoubleType>, ArgSpec<IntType>]>,
GuardedFunctionSpec<"ldexpf128", RetValSpec<Float128Type>, [ArgSpec<Float128Type>, ArgSpec<IntType>], "LIBC_COMPILER_HAS_FLOAT128">,

FunctionSpec<"log10", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"log10f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
Expand Down
37 changes: 19 additions & 18 deletions libc/src/__support/FPUtil/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -75,24 +75,6 @@ add_header_library(
libc.src.__support.common
)

add_header_library(
manipulation_functions
HDRS
ManipulationFunctions.h
DEPENDS
.fenv_impl
.fp_bits
.nearest_integer_operations
.normal_float
libc.src.__support.CPP.bit
libc.src.__support.CPP.limits
libc.src.__support.CPP.type_traits
libc.src.__support.common
libc.src.__support.macros.optimization
libc.include.math
libc.src.errno.errno
)

add_header_library(
basic_operations
HDRS
Expand Down Expand Up @@ -221,4 +203,23 @@ add_header_library(
libc.src.__support.macros.optimization
)

add_header_library(
manipulation_functions
HDRS
ManipulationFunctions.h
DEPENDS
.fenv_impl
.fp_bits
.dyadic_float
.nearest_integer_operations
.normal_float
libc.src.__support.CPP.bit
libc.src.__support.CPP.limits
libc.src.__support.CPP.type_traits
libc.src.__support.common
libc.src.__support.macros.optimization
libc.include.math
libc.src.errno.errno
)

add_subdirectory(generic)
9 changes: 6 additions & 3 deletions libc/src/__support/FPUtil/FPBits.h
Original file line number Diff line number Diff line change
Expand Up @@ -633,13 +633,13 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
using typename UP::Significand;

using UP::FP_MASK;
using UP::SIG_LEN;

public:
// Constants.
using UP::EXP_BIAS;
using UP::EXP_MASK;
using UP::FRACTION_MASK;
using UP::SIG_LEN;
using UP::SIGN_MASK;
LIBC_INLINE_VAR static constexpr int MAX_BIASED_EXPONENT =
(1 << UP::EXP_LEN) - 1;
Expand Down Expand Up @@ -732,11 +732,14 @@ struct FPRepImpl : public FPRepSem<fp_type, RetT> {
// Unsafe function to create a floating point representation.
// It simply packs the sign, biased exponent and mantissa values without
// checking bound nor normalization.
//
// WARNING: For X86 Extended Precision, implicit bit needs to be set correctly
// in the 'mantissa' by the caller. This function will not check for its
// validity.
//
// FIXME: Use an uint32_t for 'biased_exp'.
LIBC_INLINE static constexpr RetT
create_value(Sign sign, StorageType biased_exp, StorageType mantissa) {
static_assert(fp_type != FPType::X86_Binary80,
"This function is not tested for X86 Extended Precision");
return RetT(encode(sign, BiasedExponent(static_cast<uint32_t>(biased_exp)),
Significand(mantissa)));
}
Expand Down
42 changes: 32 additions & 10 deletions libc/src/__support/FPUtil/ManipulationFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#include "FPBits.h"
#include "NearestIntegerOperations.h"
#include "NormalFloat.h"
#include "dyadic_float.h"
#include "rounding_mode.h"

#include "src/__support/CPP/bit.h"
#include "src/__support/CPP/limits.h" // INT_MAX, INT_MIN
Expand Down Expand Up @@ -117,10 +119,8 @@ LIBC_INLINE T logb(T x) {

template <typename T, cpp::enable_if_t<cpp::is_floating_point_v<T>, int> = 0>
LIBC_INLINE T ldexp(T x, int exp) {
if (LIBC_UNLIKELY(exp == 0))
return x;
FPBits<T> bits(x);
if (LIBC_UNLIKELY(bits.is_zero() || bits.is_inf_or_nan()))
if (LIBC_UNLIKELY((exp == 0) || bits.is_zero() || bits.is_inf_or_nan()))
return x;

// NormalFloat uses int32_t to store the true exponent value. We should ensure
Expand All @@ -129,18 +129,40 @@ LIBC_INLINE T ldexp(T x, int exp) {
// early. Because the result of the ldexp operation can be a subnormal number,
// we need to accommodate the (mantissaWidth + 1) worth of shift in
// calculating the limit.
int exp_limit = FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
if (exp > exp_limit)
return FPBits<T>::inf(bits.sign()).get_val();
constexpr int EXP_LIMIT =
FPBits<T>::MAX_BIASED_EXPONENT + FPBits<T>::FRACTION_LEN + 1;
if (LIBC_UNLIKELY(exp > EXP_LIMIT)) {
int rounding_mode = quick_get_round();
Sign sign = bits.sign();

if ((sign == Sign::POS && rounding_mode == FE_DOWNWARD) ||
(sign == Sign::NEG && rounding_mode == FE_UPWARD) ||
(rounding_mode == FE_TOWARDZERO))
return FPBits<T>::max_normal(sign).get_val();

set_errno_if_required(ERANGE);
raise_except_if_required(FE_OVERFLOW);
return FPBits<T>::inf(sign).get_val();
}

// Similarly on the negative side we return zero early if |exp| is too small.
if (exp < -exp_limit)
return FPBits<T>::zero(bits.sign()).get_val();
if (LIBC_UNLIKELY(exp < -EXP_LIMIT)) {
int rounding_mode = quick_get_round();
Sign sign = bits.sign();

if ((sign == Sign::POS && rounding_mode == FE_UPWARD) ||
(sign == Sign::NEG && rounding_mode == FE_DOWNWARD))
return FPBits<T>::min_subnormal(sign).get_val();

set_errno_if_required(ERANGE);
raise_except_if_required(FE_UNDERFLOW);
return FPBits<T>::zero(sign).get_val();
}

// For all other values, NormalFloat to T conversion handles it the right way.
NormalFloat<T> normal(bits);
DyadicFloat<FPBits<T>::STORAGE_LEN> normal(bits.get_val());
normal.exponent += exp;
return normal;
return static_cast<T>(normal);
}

template <typename T, typename U,
Expand Down
53 changes: 34 additions & 19 deletions libc/src/__support/FPUtil/dyadic_float.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ template <size_t Bits> struct DyadicFloat {
static_assert(FPBits<T>::FRACTION_LEN < Bits);
FPBits<T> x_bits(x);
sign = x_bits.sign();
exponent = x_bits.get_exponent() - FPBits<T>::FRACTION_LEN;
exponent = x_bits.get_explicit_exponent() - FPBits<T>::FRACTION_LEN;
mantissa = MantissaType(x_bits.get_explicit_mantissa());
normalize();
}
Expand Down Expand Up @@ -79,25 +79,32 @@ template <size_t Bits> struct DyadicFloat {
return *this;
}

// Assume that it is already normalized and output is not underflow.
// Assume that it is already normalized.
// Output is rounded correctly with respect to the current rounding mode.
// TODO(lntue): Add support for underflow.
// TODO(lntue): Test or add specialization for x86 long double.
template <typename T,
typename = cpp::enable_if_t<cpp::is_floating_point_v<T> &&
(FPBits<T>::FRACTION_LEN < Bits),
void>>
explicit operator T() const {
// TODO(lntue): Do we need to treat signed zeros properly?
if (mantissa.is_zero())
return 0.0;
if (LIBC_UNLIKELY(mantissa.is_zero()))
return FPBits<T>::zero(sign).get_val();

// Assume that it is normalized, and output is also normal.
constexpr uint32_t PRECISION = FPBits<T>::FRACTION_LEN + 1;
using output_bits_t = typename FPBits<T>::StorageType;
constexpr output_bits_t IMPLICIT_MASK =
FPBits<T>::SIG_MASK - FPBits<T>::FRACTION_MASK;

int exp_hi = exponent + static_cast<int>((Bits - 1) + FPBits<T>::EXP_BIAS);

if (LIBC_UNLIKELY(exp_hi > 2 * FPBits<T>::EXP_BIAS)) {
// Results overflow.
T d_hi =
FPBits<T>::create_value(sign, 2 * FPBits<T>::EXP_BIAS, IMPLICIT_MASK)
.get_val();
return T(2) * d_hi;
}

bool denorm = false;
uint32_t shift = Bits - PRECISION;
if (LIBC_UNLIKELY(exp_hi <= 0)) {
Expand All @@ -112,49 +119,57 @@ template <size_t Bits> struct DyadicFloat {

MantissaType m_hi(mantissa >> shift);

T d_hi = FPBits<T>::create_value(sign, exp_hi,
static_cast<output_bits_t>(m_hi) &
FPBits<T>::FRACTION_MASK)
T d_hi = FPBits<T>::create_value(
sign, exp_hi,
(static_cast<output_bits_t>(m_hi) & FPBits<T>::SIG_MASK) |
IMPLICIT_MASK)
.get_val();

const MantissaType round_mask = MantissaType(1) << (shift - 1);
const MantissaType sticky_mask = round_mask - MantissaType(1);
MantissaType round_mask = MantissaType(1) << (shift - 1);
MantissaType sticky_mask = round_mask - MantissaType(1);

bool round_bit = !(mantissa & round_mask).is_zero();
bool sticky_bit = !(mantissa & sticky_mask).is_zero();
int round_and_sticky = int(round_bit) * 2 + int(sticky_bit);

T d_lo;

if (LIBC_UNLIKELY(exp_lo <= 0)) {
// d_lo is denormal, but the output is normal.
int scale_up_exponent = 2 * PRECISION;
T scale_up_factor =
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS + scale_up_exponent,
output_bits_t(0))
IMPLICIT_MASK)
.get_val();
T scale_down_factor =
FPBits<T>::create_value(sign, FPBits<T>::EXP_BIAS - scale_up_exponent,
output_bits_t(0))
IMPLICIT_MASK)
.get_val();

d_lo = FPBits<T>::create_value(sign, exp_lo + scale_up_exponent,
output_bits_t(0))
IMPLICIT_MASK)
.get_val();

return multiply_add(d_lo, T(round_and_sticky), d_hi * scale_up_factor) *
scale_down_factor;
}

d_lo = FPBits<T>::create_value(sign, exp_lo, output_bits_t(0)).get_val();
d_lo = FPBits<T>::create_value(sign, exp_lo, IMPLICIT_MASK).get_val();

// Still correct without FMA instructions if `d_lo` is not underflow.
T r = multiply_add(d_lo, T(round_and_sticky), d_hi);

if (LIBC_UNLIKELY(denorm)) {
// Output is denormal, simply clear the exponent field.
output_bits_t clear_exp = output_bits_t(exp_hi)
<< FPBits<T>::FRACTION_LEN;
// Exponent before rounding is in denormal range, simply clear the
// exponent field.
output_bits_t clear_exp = (output_bits_t(exp_hi) << FPBits<T>::SIG_LEN);
output_bits_t r_bits = FPBits<T>(r).uintval() - clear_exp;
if (!(r_bits & FPBits<T>::EXP_MASK)) {
// Output is denormal after rounding, clear the implicit bit for 80-bit
// long double.
r_bits -= IMPLICIT_MASK;
}

return FPBits<T>(r_bits).get_val();
}

Expand Down
1 change: 1 addition & 0 deletions libc/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ add_math_entrypoint_object(ilogbl)
add_math_entrypoint_object(ldexp)
add_math_entrypoint_object(ldexpf)
add_math_entrypoint_object(ldexpl)
add_math_entrypoint_object(ldexpf128)

add_math_entrypoint_object(log10)
add_math_entrypoint_object(log10f)
Expand Down
23 changes: 18 additions & 5 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1001,10 +1001,10 @@ add_entrypoint_object(
ldexp.cpp
HDRS
../ldexp.h
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O2
)

add_entrypoint_object(
Expand All @@ -1013,10 +1013,10 @@ add_entrypoint_object(
ldexpf.cpp
HDRS
../ldexpf.h
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
COMPILE_OPTIONS
-O2
)

add_entrypoint_object(
Expand All @@ -1025,10 +1025,23 @@ add_entrypoint_object(
ldexpl.cpp
HDRS
../ldexpl.h
COMPILE_OPTIONS
-O3
DEPENDS
libc.src.__support.FPUtil.manipulation_functions
)

add_entrypoint_object(
ldexpf128
SRCS
ldexpf128.cpp
HDRS
../ldexpf128.h
COMPILE_OPTIONS
-O2
-O3
DEPENDS
libc.src.__support.macros.properties.float
libc.src.__support.FPUtil.manipulation_functions
)

add_object_library(
Expand Down
19 changes: 19 additions & 0 deletions libc/src/math/generic/ldexpf128.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
//===-- Implementation of ldexpf128 function ------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/math/ldexpf128.h"
#include "src/__support/FPUtil/ManipulationFunctions.h"
#include "src/__support/common.h"

namespace LIBC_NAMESPACE {

LLVM_LIBC_FUNCTION(float128, ldexpf128, (float128 x, int exp)) {
return fputil::ldexp(x, exp);
}

} // namespace LIBC_NAMESPACE
Loading

0 comments on commit ff409d3

Please sign in to comment.