diff --git a/libc/shared/math.h b/libc/shared/math.h index a5c9de4fb7029..663900b1d3519 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -55,6 +55,7 @@ #include "math/expf.h" #include "math/expf16.h" #include "math/expm1.h" +#include "math/expm1f.h" #include "math/frexpf.h" #include "math/frexpf128.h" #include "math/frexpf16.h" diff --git a/libc/shared/math/expm1f.h b/libc/shared/math/expm1f.h new file mode 100644 index 0000000000000..e0cf6a846f116 --- /dev/null +++ b/libc/shared/math/expm1f.h @@ -0,0 +1,23 @@ +//===-- Shared expm1f function ----------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_EXPM1F_H +#define LLVM_LIBC_SHARED_MATH_EXPM1F_H + +#include "shared/libc_common.h" +#include "src/__support/math/expm1f.h" + +namespace LIBC_NAMESPACE_DECL { +namespace shared { + +using math::expm1f; + +} // namespace shared +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SHARED_MATH_EXPM1F_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 74f17b9fd8099..00b44c5725565 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -906,6 +906,23 @@ add_header_library( libc.src.errno.errno ) +add_header_library( + expm1f + HDRS + expm1f.h + DEPENDS + .common_constants + libc.src.__support.FPUtil.basic_operations + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.nearest_integer + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + libc.src.errno.errno +) + add_header_library( range_reduction_double HDRS diff --git a/libc/src/__support/math/expm1f.h b/libc/src/__support/math/expm1f.h new file mode 100644 index 0000000000000..43e79ae3112dc --- /dev/null +++ b/libc/src/__support/math/expm1f.h @@ -0,0 +1,182 @@ +//===-- Implementation header for expm1f ------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H + +#include "common_constants.h" // Lookup tables EXP_M1 and EXP_M2. +#include "src/__support/FPUtil/BasicOperations.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FMA.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/nearest_integer.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +LIBC_INLINE static constexpr float expm1f(float x) { + using namespace common_constants_internal; + using FPBits = typename fputil::FPBits; + FPBits xbits(x); + + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & 0x7fff'ffffU; + +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Exceptional value + if (LIBC_UNLIKELY(x_u == 0x3e35'bec5U)) { // x = 0x1.6b7d8ap-3f + int round_mode = fputil::quick_get_round(); + if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD) + return 0x1.8dbe64p-3f; + return 0x1.8dbe62p-3f; + } +#if !defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE) + if (LIBC_UNLIKELY(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f + int round_mode = fputil::quick_get_round(); + if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD) + return -0x1.71c884p-4f; + return -0x1.71c882p-4f; + } +#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + + // When |x| > 25*log(2), or nan + if (LIBC_UNLIKELY(x_abs >= 0x418a'a123U)) { + // x < log(2^-25) + if (xbits.is_neg()) { + // exp(-Inf) = 0 + if (xbits.is_inf()) + return -1.0f; + // exp(nan) = nan + if (xbits.is_nan()) + return x; + int round_mode = fputil::quick_get_round(); + if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO) + return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f + return -1.0f; + } else { + // x >= 89 or nan + if (xbits.uintval() >= 0x42b2'0000) { + if (xbits.uintval() < 0x7f80'0000U) { + int rounding = fputil::quick_get_round(); + if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) + return FPBits::max_normal().get_val(); + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW); + } + return x + FPBits::inf().get_val(); + } + } + } + + // |x| < 2^-4 + if (x_abs < 0x3d80'0000U) { + // |x| < 2^-25 + if (x_abs < 0x3300'0000U) { + // x = -0.0f + if (LIBC_UNLIKELY(xbits.uintval() == 0x8000'0000U)) + return x; + // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x + // is: + // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x| + // = |x| + // < 2^-25 + // < epsilon(1)/2. + // So the correctly rounded values of expm1(x) are: + // = x + eps(x) if rounding mode = FE_UPWARD, + // or (rounding mode = FE_TOWARDZERO and x is + // negative), + // = x otherwise. + // To simplify the rounding decision and make it more efficient, we use + // fma(x, x, x) ~ x + x^2 instead. + // Note: to use the formula x + x^2 to decide the correct rounding, we + // do need fma(x, x, x) to prevent underflow caused by x*x when |x| < + // 2^-76. For targets without FMA instructions, we simply use double for + // intermediate results as it is more efficient than using an emulated + // version of FMA. +#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) + return fputil::multiply_add(x, x, x); +#else + double xd = x; + return static_cast(fputil::multiply_add(xd, xd, xd)); +#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT + } + + constexpr double COEFFS[] = {0x1p-1, + 0x1.55555555557ddp-3, + 0x1.55555555552fap-5, + 0x1.111110fcd58b7p-7, + 0x1.6c16c1717660bp-10, + 0x1.a0241f0006d62p-13, + 0x1.a01e3f8d3c06p-16}; + + // 2^-25 <= |x| < 2^-4 + double xd = static_cast(x); + double xsq = xd * xd; + // Degree-8 minimax polynomial generated by Sollya with: + // > display = hexadecimal; + // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]); + + double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]); + double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]); + double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]); + + double r = fputil::polyeval(xsq, c0, c1, c2, COEFFS[6]); + return static_cast(fputil::multiply_add(r, xsq, xd)); + } + + // For -18 < x < 89, to compute expm1(x), we perform the following range + // reduction: find hi, mid, lo such that: + // x = hi + mid + lo, in which + // hi is an integer, + // mid * 2^7 is an integer + // -2^(-8) <= lo < 2^-8. + // In particular, + // hi + mid = round(x * 2^7) * 2^(-7). + // Then, + // expm1(x) = exp(hi + mid + lo) - 1 = exp(hi) * exp(mid) * exp(lo) - 1. + // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2 + // respectively. exp(lo) is computed using a degree-4 minimax polynomial + // generated by Sollya. + + // x_hi = hi + mid. + float kf = fputil::nearest_integer(x * 0x1.0p7f); + int x_hi = static_cast(kf); + // Subtract (hi + mid) from x to get lo. + double xd = static_cast(fputil::multiply_add(kf, -0x1.0p-7f, x)); + x_hi += 104 << 7; + // hi = x_hi >> 7 + double exp_hi = EXP_M1[x_hi >> 7]; + // lo = x_hi & 0x0000'007fU; + double exp_mid = EXP_M2[x_hi & 0x7f]; + double exp_hi_mid = exp_hi * exp_mid; + // Degree-4 minimax polynomial generated by Sollya with the following + // commands: + // > display = hexadecimal; + // > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]); + // > Q; + double exp_lo = + fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1, + 0x1.555566668e5e7p-3, 0x1.55555555ef243p-5); + return static_cast(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0)); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_EXPM1F_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index d81b29396c425..2a2f4e7567c71 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1561,16 +1561,7 @@ add_entrypoint_object( HDRS ../expm1f.h DEPENDS - libc.src.__support.FPUtil.basic_operations - libc.src.__support.FPUtil.fenv_impl - libc.src.__support.FPUtil.fp_bits - libc.src.__support.FPUtil.multiply_add - libc.src.__support.FPUtil.nearest_integer - libc.src.__support.FPUtil.polyeval - libc.src.__support.FPUtil.rounding_mode - libc.src.__support.macros.optimization - libc.src.__support.math.common_constants - libc.src.errno.errno + libc.src.__support.math.expm1f ) add_entrypoint_object( diff --git a/libc/src/math/generic/expm1f.cpp b/libc/src/math/generic/expm1f.cpp index 72c8aa358d618..60d3bfe814963 100644 --- a/libc/src/math/generic/expm1f.cpp +++ b/libc/src/math/generic/expm1f.cpp @@ -7,168 +7,10 @@ //===----------------------------------------------------------------------===// #include "src/math/expm1f.h" -#include "src/__support/FPUtil/BasicOperations.h" -#include "src/__support/FPUtil/FEnvImpl.h" -#include "src/__support/FPUtil/FMA.h" -#include "src/__support/FPUtil/FPBits.h" -#include "src/__support/FPUtil/PolyEval.h" -#include "src/__support/FPUtil/multiply_add.h" -#include "src/__support/FPUtil/nearest_integer.h" -#include "src/__support/FPUtil/rounding_mode.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/cpu_features.h" // LIBC_TARGET_CPU_HAS_FMA -#include "src/__support/math/common_constants.h" // Lookup tables EXP_M1 and EXP_M2. +#include "src/__support/math/expm1f.h" namespace LIBC_NAMESPACE_DECL { -LLVM_LIBC_FUNCTION(float, expm1f, (float x)) { - using namespace common_constants_internal; - using FPBits = typename fputil::FPBits; - FPBits xbits(x); - - uint32_t x_u = xbits.uintval(); - uint32_t x_abs = x_u & 0x7fff'ffffU; - -#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS - // Exceptional value - if (LIBC_UNLIKELY(x_u == 0x3e35'bec5U)) { // x = 0x1.6b7d8ap-3f - int round_mode = fputil::quick_get_round(); - if (round_mode == FE_TONEAREST || round_mode == FE_UPWARD) - return 0x1.8dbe64p-3f; - return 0x1.8dbe62p-3f; - } -#if !defined(LIBC_TARGET_CPU_HAS_FMA_DOUBLE) - if (LIBC_UNLIKELY(x_u == 0xbdc1'c6cbU)) { // x = -0x1.838d96p-4f - int round_mode = fputil::quick_get_round(); - if (round_mode == FE_TONEAREST || round_mode == FE_DOWNWARD) - return -0x1.71c884p-4f; - return -0x1.71c882p-4f; - } -#endif // LIBC_TARGET_CPU_HAS_FMA_DOUBLE -#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS - - // When |x| > 25*log(2), or nan - if (LIBC_UNLIKELY(x_abs >= 0x418a'a123U)) { - // x < log(2^-25) - if (xbits.is_neg()) { - // exp(-Inf) = 0 - if (xbits.is_inf()) - return -1.0f; - // exp(nan) = nan - if (xbits.is_nan()) - return x; - int round_mode = fputil::quick_get_round(); - if (round_mode == FE_UPWARD || round_mode == FE_TOWARDZERO) - return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f - return -1.0f; - } else { - // x >= 89 or nan - if (xbits.uintval() >= 0x42b2'0000) { - if (xbits.uintval() < 0x7f80'0000U) { - int rounding = fputil::quick_get_round(); - if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) - return FPBits::max_normal().get_val(); - - fputil::set_errno_if_required(ERANGE); - fputil::raise_except_if_required(FE_OVERFLOW); - } - return x + FPBits::inf().get_val(); - } - } - } - - // |x| < 2^-4 - if (x_abs < 0x3d80'0000U) { - // |x| < 2^-25 - if (x_abs < 0x3300'0000U) { - // x = -0.0f - if (LIBC_UNLIKELY(xbits.uintval() == 0x8000'0000U)) - return x; - // When |x| < 2^-25, the relative error of the approximation e^x - 1 ~ x - // is: - // |(e^x - 1) - x| / |e^x - 1| < |x^2| / |x| - // = |x| - // < 2^-25 - // < epsilon(1)/2. - // So the correctly rounded values of expm1(x) are: - // = x + eps(x) if rounding mode = FE_UPWARD, - // or (rounding mode = FE_TOWARDZERO and x is - // negative), - // = x otherwise. - // To simplify the rounding decision and make it more efficient, we use - // fma(x, x, x) ~ x + x^2 instead. - // Note: to use the formula x + x^2 to decide the correct rounding, we - // do need fma(x, x, x) to prevent underflow caused by x*x when |x| < - // 2^-76. For targets without FMA instructions, we simply use double for - // intermediate results as it is more efficient than using an emulated - // version of FMA. -#if defined(LIBC_TARGET_CPU_HAS_FMA_FLOAT) - return fputil::multiply_add(x, x, x); -#else - double xd = x; - return static_cast(fputil::multiply_add(xd, xd, xd)); -#endif // LIBC_TARGET_CPU_HAS_FMA_FLOAT - } - - constexpr double COEFFS[] = {0x1p-1, - 0x1.55555555557ddp-3, - 0x1.55555555552fap-5, - 0x1.111110fcd58b7p-7, - 0x1.6c16c1717660bp-10, - 0x1.a0241f0006d62p-13, - 0x1.a01e3f8d3c06p-16}; - - // 2^-25 <= |x| < 2^-4 - double xd = static_cast(x); - double xsq = xd * xd; - // Degree-8 minimax polynomial generated by Sollya with: - // > display = hexadecimal; - // > P = fpminimax((expm1(x) - x)/x^2, 6, [|D...|], [-2^-4, 2^-4]); - - double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]); - double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]); - double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]); - - double r = fputil::polyeval(xsq, c0, c1, c2, COEFFS[6]); - return static_cast(fputil::multiply_add(r, xsq, xd)); - } - - // For -18 < x < 89, to compute expm1(x), we perform the following range - // reduction: find hi, mid, lo such that: - // x = hi + mid + lo, in which - // hi is an integer, - // mid * 2^7 is an integer - // -2^(-8) <= lo < 2^-8. - // In particular, - // hi + mid = round(x * 2^7) * 2^(-7). - // Then, - // expm1(x) = exp(hi + mid + lo) - 1 = exp(hi) * exp(mid) * exp(lo) - 1. - // We store exp(hi) and exp(mid) in the lookup tables EXP_M1 and EXP_M2 - // respectively. exp(lo) is computed using a degree-4 minimax polynomial - // generated by Sollya. - - // x_hi = hi + mid. - float kf = fputil::nearest_integer(x * 0x1.0p7f); - int x_hi = static_cast(kf); - // Subtract (hi + mid) from x to get lo. - double xd = static_cast(fputil::multiply_add(kf, -0x1.0p-7f, x)); - x_hi += 104 << 7; - // hi = x_hi >> 7 - double exp_hi = EXP_M1[x_hi >> 7]; - // lo = x_hi & 0x0000'007fU; - double exp_mid = EXP_M2[x_hi & 0x7f]; - double exp_hi_mid = exp_hi * exp_mid; - // Degree-4 minimax polynomial generated by Sollya with the following - // commands: - // > display = hexadecimal; - // > Q = fpminimax(expm1(x)/x, 3, [|D...|], [-2^-8, 2^-8]); - // > Q; - double exp_lo = - fputil::polyeval(xd, 0x1.0p0, 0x1.ffffffffff777p-1, 0x1.000000000071cp-1, - 0x1.555566668e5e7p-3, 0x1.55555555ef243p-5); - return static_cast(fputil::multiply_add(exp_hi_mid, exp_lo, -1.0)); -} +LLVM_LIBC_FUNCTION(float, expm1f, (float x)) { return math::expm1f(x); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/test/shared/CMakeLists.txt b/libc/test/shared/CMakeLists.txt index 7d438223db027..64d000d6fc445 100644 --- a/libc/test/shared/CMakeLists.txt +++ b/libc/test/shared/CMakeLists.txt @@ -46,6 +46,7 @@ add_fp_unittest( libc.src.__support.math.exp2m1f libc.src.__support.math.exp2m1f16 libc.src.__support.math.expm1 + libc.src.__support.math.expm1f libc.src.__support.math.exp10 libc.src.__support.math.exp10f libc.src.__support.math.exp10f16 diff --git a/libc/test/shared/shared_math_test.cpp b/libc/test/shared/shared_math_test.cpp index 40a1c0c7632c6..bbe1fde87592b 100644 --- a/libc/test/shared/shared_math_test.cpp +++ b/libc/test/shared/shared_math_test.cpp @@ -65,6 +65,7 @@ TEST(LlvmLibcSharedMathTest, AllFloat) { EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::exp2m1f(0.0f)); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::expf(0.0f)); EXPECT_FP_EQ(0x1p+0f, LIBC_NAMESPACE::shared::exp2f(0.0f)); + EXPECT_FP_EQ(0x0p+0f, LIBC_NAMESPACE::shared::expm1f(0.0f)); EXPECT_FP_EQ_ALL_ROUNDING(0.75f, LIBC_NAMESPACE::shared::frexpf(24.0f, &exponent)); diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 18f8e966cbfad..7c563ff109fb8 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -3038,6 +3038,21 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_expm1f", + hdrs = ["src/__support/math/expm1f.h"], + deps = [ + ":__support_fputil_fma", + ":__support_fputil_multiply_add", + ":__support_fputil_nearest_integer", + ":__support_fputil_polyeval", + ":__support_fputil_rounding_mode", + ":__support_macros_optimization", + ":__support_macros_properties_cpu_features", + ":__support_math_common_constants", + ], +) + libc_support_library( name = "__support_range_reduction_double", hdrs = [ @@ -3777,14 +3792,7 @@ libc_math_function( libc_math_function( name = "expm1f", additional_deps = [ - ":__support_fputil_fma", - ":__support_fputil_multiply_add", - ":__support_fputil_nearest_integer", - ":__support_fputil_polyeval", - ":__support_fputil_rounding_mode", - ":__support_macros_optimization", - ":__support_macros_properties_cpu_features", - ":__support_math_common_constants", + ":__support_math_expm1f", ], )