-
Notifications
You must be signed in to change notification settings - Fork 10.8k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[libc][math] Improve atanf performance. #85463
Conversation
@llvm/pr-subscribers-libc Author: None (lntue) ChangesSimplify the range reduction logic and computations. Performance test using Before:
After:
Full diff: https://github.com/llvm/llvm-project/pull/85463.diff 6 Files Affected:
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 87f53105a1b317..a8f3f6be36022a 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -2057,16 +2057,9 @@ add_object_library(
SRCS
inv_trigf_utils.cpp
DEPENDS
- .math_utils
- libc.src.__support.FPUtil.fp_bits
- libc.src.__support.FPUtil.fenv_impl
- libc.src.__support.FPUtil.nearest_integer
- libc.src.__support.FPUtil.nearest_integer_operations
+ libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.common
- libc.include.errno
- libc.src.errno.errno
- libc.include.math
)
add_entrypoint_object(
@@ -2112,15 +2105,33 @@ add_entrypoint_object(
HDRS
../atanf.h
DEPENDS
- .inv_trigf_utils
- .math_utils
+ libc.src.__support.FPUtil.except_value_utils
libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.multiply_add
+ libc.src.__support.FPUtil.nearest_integer
+ libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.rounding_mode
libc.src.__support.macros.optimization
COMPILE_OPTIONS
-O3
)
+add_entrypoint_object(
+ atan2f
+ SRCS
+ atan2f.cpp
+ HDRS
+ ../atan2f.h
+ COMPILE_OPTIONS
+ -O3
+ DEPENDS
+ .inv_trigf_utils
+ .math_utils
+ libc.src.__support.FPUtil.fp_bits
+ libc.src.__support.FPUtil.rounding_mode
+ libc.src.__support.macros.optimization
+)
+
add_entrypoint_object(
scalbn
SRCS
diff --git a/libc/src/math/generic/atanf.cpp b/libc/src/math/generic/atanf.cpp
index e0f8a1bfc2ecc9..2ed5135c669348 100644
--- a/libc/src/math/generic/atanf.cpp
+++ b/libc/src/math/generic/atanf.cpp
@@ -7,11 +7,14 @@
//===----------------------------------------------------------------------===//
#include "src/math/atanf.h"
-#include "math_utils.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/PolyEval.h"
+#include "src/__support/FPUtil/except_value_utils.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
#include "src/__support/FPUtil/rounding_mode.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
-#include "src/math/generic/inv_trigf_utils.h"
+#include "inv_trigf_utils.h"
namespace LIBC_NAMESPACE {
@@ -19,48 +22,95 @@ LLVM_LIBC_FUNCTION(float, atanf, (float x)) {
using FPBits = typename fputil::FPBits<float>;
using Sign = fputil::Sign;
- // x == 0.0
- if (LIBC_UNLIKELY(x == 0.0f))
- return x;
+ constexpr double FINAL_SIGN[2] = {1.0, -1.0};
+ constexpr double SIGNED_PI_OVER_2[2] = {0x1.921fb54442d18p0,
+ -0x1.921fb54442d18p0};
- FPBits xbits(x);
- Sign sign = xbits.sign();
- xbits.set_sign(Sign::POS);
+ FPBits x_bits(x);
+ Sign sign = x_bits.sign();
+ x_bits.set_sign(Sign::POS);
+ uint32_t x_abs = x_bits.uintval();
- if (LIBC_UNLIKELY(xbits.is_inf_or_nan())) {
- if (xbits.is_inf())
- return static_cast<float>(
- opt_barrier(sign.is_neg() ? -M_MATH_PI_2 : M_MATH_PI_2));
- else
+ // x is inf or nan, |x| < 2^-4 or |x|= > 16.
+ if (LIBC_UNLIKELY(x_abs <= 0x3d80'0000U || x_abs >= 0x4180'0000U)) {
+ double x_d = static_cast<double>(x);
+ double const_term = 0.0;
+ if (LIBC_UNLIKELY(x_abs >= 0x4180'0000)) {
+ // atan(+-Inf) = +-pi/2.
+ if (x_bits.is_inf())
+ return static_cast<float>(SIGNED_PI_OVER_2[sign.is_neg()]);
+ if (x_bits.is_nan())
+ return x;
+ // x >= 16
+ x_d = -1.0 / x_d;
+ const_term = SIGNED_PI_OVER_2[sign.is_neg()];
+ }
+ // 0 <= x < 1/16;
+ if (LIBC_UNLIKELY(x_bits.is_zero()))
return x;
- }
- // |x| == 0.06905200332403183
- if (LIBC_UNLIKELY(xbits.uintval() == 0x3d8d6b23U)) {
- if (fputil::fenv_is_round_to_nearest()) {
- // 0.06894256919622421
- FPBits br(0x3d8d31c3U);
- br.set_sign(sign);
- return br.get_val();
+ // x <= 2^-12;
+ if (LIBC_UNLIKELY(x_abs < 0x3980'0000)) {
+#if defined(LIBC_TARGET_CPU_HAS_FMA)
+ return fputil::multiply_add(x, -0x1.0p-25f, x);
+#else
+ double x_d = static_cast<double>(x);
+ return static_cast<float>(fputil::multiply_add(x_d, -0x1.0p-25, x_d));
+#endif // LIBC_TARGET_CPU_HAS_FMA
}
+ // Use Taylor polynomial:
+ // atan(x) ~ x * (1 - x^2 / 3 + x^4 / 5 - x^6 / 7 + x^8 / 9 - x^10 / 11).
+ double x2 = x_d * x_d;
+ double x4 = x2 * x2;
+ double c0 = fputil::multiply_add(x2, ATAN_COEFFS[0][1], ATAN_COEFFS[0][0]);
+ double c1 = fputil::multiply_add(x2, ATAN_COEFFS[0][3], ATAN_COEFFS[0][2]);
+ double c2 = fputil::multiply_add(x2, ATAN_COEFFS[0][5], ATAN_COEFFS[0][4]);
+ double p = fputil::polyeval(x4, c0, c1, c2);
+ double r = fputil::multiply_add(x_d, p, const_term);
+ return static_cast<float>(r);
}
- // |x| == 1.8670953512191772
- if (LIBC_UNLIKELY(xbits.uintval() == 0x3feefcfbU)) {
- int rounding_mode = fputil::quick_get_round();
- if (sign.is_neg()) {
- if (rounding_mode == FE_DOWNWARD) {
- // -1.0790828466415405
- return FPBits(0xbf8a1f63U).get_val();
- }
- } else {
- if (rounding_mode == FE_UPWARD) {
- // 1.0790828466415405
- return FPBits(0x3f8a1f63U).get_val();
- }
+ // Range reduction steps:
+ // 1) atan(x) = sign(x) * atan(|x|)
+ // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
+ // 3) For 1/16 < x <= 1, we find k such that: |x - k/16| <= 1/32.
+ // 4) Then we use polynomial approximation:
+ // atan(x) ~ atan((k/16) + (x - (k/16)) * Q(x - k/16)
+ // = P(x - k/16)
+ double x_d, const_term, final_sign;
+ int idx;
+
+ if (x_abs > 0x3f80'0000U) {
+ // |x| > 1, we need to invert x, so we will perform range reduction in
+ // double precision.
+ x_d = 1.0 / static_cast<double>(x_bits.get_val());
+ double k_d = fputil::nearest_integer(x_d * 0x1.0p4);
+ x_d = fputil::multiply_add(k_d, -0x1.0p-4, x_d);
+ idx = static_cast<int>(k_d);
+ final_sign = FINAL_SIGN[sign.is_pos()];
+ // Adjust constant term of the polynomial by +- pi/2.
+ const_term = fputil::multiply_add(final_sign, ATAN_COEFFS[idx][0],
+ SIGNED_PI_OVER_2[sign.is_neg()]);
+ } else {
+ // Exceptional value:
+ if (LIBC_UNLIKELY(x_abs == 0x3dbb'6ac7U)) {
+ return sign.is_pos()
+ ? fputil::round_result_slightly_up(0x1.75cb06p-4f)
+ : fputil::round_result_slightly_down(-0x1.75cb06p-4f);
}
+ // Perform range reduction in single precision.
+ float x_f = x_bits.get_val();
+ float k_f = fputil::nearest_integer(x_f * 0x1.0p4f);
+ x_f = fputil::multiply_add(k_f, -0x1.0p-4f, x_f);
+ x_d = static_cast<double>(x_f);
+ idx = static_cast<int>(k_f);
+ final_sign = FINAL_SIGN[sign.is_neg()];
+ const_term = final_sign * ATAN_COEFFS[idx][0];
}
- return static_cast<float>(atan_eval(x));
+ double p = atan_eval(x_d, idx);
+ double r = fputil::multiply_add(final_sign * x_d, p, const_term);
+
+ return static_cast<float>(r);
}
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/inv_trigf_utils.cpp b/libc/src/math/generic/inv_trigf_utils.cpp
index 8013c0470affb1..93d5bcbf7b567d 100644
--- a/libc/src/math/generic/inv_trigf_utils.cpp
+++ b/libc/src/math/generic/inv_trigf_utils.cpp
@@ -10,19 +10,71 @@
namespace LIBC_NAMESPACE {
-// N[Table[ArcTan[x], {x, 1/16, 16/16, 1/16}], 40]
-alignas(64) const double ATAN_T[ATAN_T_SIZE] = {
- 0x1.ff55bb72cfdeap-5, 0x1.fd5ba9aac2f6ep-4, 0x1.7b97b4bce5b02p-3,
- 0x1.f5b75f92c80ddp-3, 0x1.362773707ebccp-2, 0x1.6f61941e4def1p-2,
- 0x1.a64eec3cc23fdp-2, 0x1.dac670561bb4fp-2, 0x1.0657e94db30d0p-1,
- 0x1.1e00babdefeb4p-1, 0x1.345f01cce37bbp-1, 0x1.4978fa3269ee1p-1,
- 0x1.5d58987169b18p-1, 0x1.700a7c5784634p-1, 0x1.819d0b7158a4dp-1,
- 0x1.921fb54442d18p-1};
-
-// for(int i = 0; i < 5; i++)
-// printf("%.13a,\n", (-2 * (i % 2) + 1) * 1.0 / (2 * i + 1));
-alignas(64) const double ATAN_K[5] = {
- 0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3,
- -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4};
+// Polynomial approximation for 0 <= x <= 1:
+// atan(x) ~ atan((i/16) + (x - (i/16)) * Q(x - i/16)
+// = P(x - i/16)
+// Generated by Sollya with:
+// > for i from 1 to 16 do {
+// mid_point = i/16;
+// P = fpminimax(atan(mid_point + x), 7, [|D...|], [-1/32, 1/32]);
+// print("{", coeff(P, 0), ",", coeff(P, 1), ",", coeff(P, 2), ",",
+// coeff(P, 3), ",", coeff(P, 4), ",", coeff(P, 5), ",", coeff(P, 6),
+// ",", coeff(P, 7), "},");
+// };
+// For i = 0, ATAN_COEFFS[0][j] = (-1)^j * (1/(2*j + 1)) is the odd coefficients
+// of the Taylor polynomial of atan(x).
+double ATAN_COEFFS[17][8] = {
+ {0x1.0000000000000p+0, -0x1.5555555555555p-2, 0x1.999999999999ap-3,
+ -0x1.2492492492492p-3, 0x1.c71c71c71c71cp-4, -0x1.745d1745d1746p-4,
+ 0x1.3b13b13b13b14p-4, -0x1.1111111111111p-4},
+ {0x1.ff55bb72cfdb1p-5, 0x1.fe01fe01fe1bp-1, -0x1.fc05f80821d1ap-5,
+ -0x1.4d6930419fc5fp-2, 0x1.f61b9f6d69313p-5, 0x1.8208a32f4346cp-3,
+ -0x1.ecb8fc53d04efp-5, -0x1.060710cb59cbcp-3},
+ {0x1.fd5ba9aac2f3cp-4, 0x1.f81f81f81f96ap-1, -0x1.f05e09cf4c1b2p-4,
+ -0x1.368c3aac7543ep-2, 0x1.d9b14bddfac55p-4, 0x1.4048e55ec725ep-3,
+ -0x1.b98ca3c1594b5p-4, -0x1.664eabaabbc16p-4},
+ {0x1.7b97b4bce5ae7p-3, 0x1.ee9c7f8458f06p-1, -0x1.665c226c8dc69p-3,
+ -0x1.1344bb77961b7p-2, 0x1.42ac97745d3ccp-3, 0x1.c32e142047ec1p-4,
+ -0x1.137ae41ab96cbp-3, -0x1.1a6ae8c09a4b6p-5},
+ {0x1.f5b75f92c80c6p-3, 0x1.e1e1e1e1e1ed4p-1, -0x1.c5894d101ad4p-3,
+ -0x1.ce6de02b38c38p-3, 0x1.78a3920c336b9p-3, 0x1.dd5ff94a9d499p-5,
+ -0x1.1ac2d3f9d072ep-3, 0x1.0af9735dff373p-6},
+ {0x1.362773707ebc5p-2, 0x1.d272ca3fc5b8bp-1, -0x1.0997e8ae90cb6p-2,
+ -0x1.6cf6667146798p-3, 0x1.8dd1dff17f3d3p-3, 0x1.24860eced656fp-7,
+ -0x1.f4220e8f18ed5p-4, 0x1.b700aed7cdc34p-5},
+ {0x1.6f61941e4deeep-2, 0x1.c0e070381c115p-1, -0x1.2726dd1347c7ep-2,
+ -0x1.09f37b3ad010dp-3, 0x1.85eaca5196f5cp-3, -0x1.04d640117852ap-5,
+ -0x1.802c2956871c7p-4, 0x1.2992b45df0ee7p-4},
+ {0x1.a64eec3cc23fep-2, 0x1.adbe87f94906bp-1, -0x1.3b9d8eab5eae5p-2,
+ -0x1.57c09646faabbp-4, 0x1.6795330e73aep-3, -0x1.f2d89a702a652p-5,
+ -0x1.f3afd90a9d4d7p-5, 0x1.3261723d3f153p-4},
+ {0x1.dac670561bb53p-2, 0x1.999999999998fp-1, -0x1.47ae147afd8cap-2,
+ -0x1.5d867c3dfd72ap-5, 0x1.3a92a76cba833p-3, -0x1.3ec460286928ap-4,
+ -0x1.ed02ff86892acp-6, 0x1.0a674c8f05727p-4},
+ {0x1.0657e94db30d2p-1, 0x1.84f00c27805ffp-1, -0x1.4c62cb564f677p-2,
+ -0x1.e6495b262dfe7p-8, 0x1.063c34eca262bp-3, -0x1.58b78dc79b5aep-4,
+ -0x1.4623815233be1p-8, 0x1.93afe94328089p-5},
+ {0x1.1e00babdefeb6p-1, 0x1.702e05c0b8159p-1, -0x1.4af2b78236bd6p-2,
+ 0x1.5d0b7ea46ed08p-6, 0x1.a124870236935p-4, -0x1.519e1ec133a88p-4,
+ 0x1.a54632a3f48c7p-7, 0x1.099ca0945096dp-5},
+ {0x1.345f01cce37bdp-1, 0x1.5babcc647fa7ep-1, -0x1.449db09443a67p-2,
+ 0x1.655caac78a0fcp-5, 0x1.3bbbdb0d09efap-4, -0x1.34a306c27e021p-4,
+ 0x1.83fe749c7966p-6, 0x1.2057cc96d9edcp-6},
+ {0x1.4978fa3269ee2p-1, 0x1.47ae147ae146bp-1, -0x1.3a92a305652e1p-2,
+ 0x1.ec21b5172657fp-5, 0x1.c2f8c45d2f4eep-5, -0x1.0ba99c4aeb8acp-4,
+ 0x1.d716a4af4d1d6p-6, 0x1.97fba0a9696dep-8},
+ {0x1.5d58987169b19p-1, 0x1.34679ace0133cp-1, -0x1.2ddfb03920e2fp-2,
+ 0x1.2491307c0fa0bp-4, 0x1.29c7eca0136fp-5, -0x1.bca792caa6f1cp-5,
+ 0x1.e5d92545576bcp-6, -0x1.8ca76fcf5ccd2p-10},
+ {0x1.700a7c5784634p-1, 0x1.21fb78121fb71p-1, -0x1.1f6a8499ea541p-2,
+ 0x1.41b15e5e77bcfp-4, 0x1.59bc9bf54fb02p-6, -0x1.63b54ff058e0fp-5,
+ 0x1.c8da01221306fp-6, -0x1.906b2c274c39cp-8},
+ {0x1.819d0b7158a4dp-1, 0x1.107fbbe01107cp-1, -0x1.0feeb40897d4ep-2,
+ 0x1.50e5afb95f5d6p-4, 0x1.2a7c2f0c7495dp-7, -0x1.12bd2bb5062cdp-5,
+ 0x1.93e8ceb89afebp-6, -0x1.10da9b8c6b731p-7},
+ {0x1.921fb54442d18p-1, 0x1.fffffffffffebp-2, -0x1.fffffffffcbbcp-3,
+ 0x1.555555564e2fep-4, -0x1.20b17d5dd89dcp-30, -0x1.9999c5ad71711p-6,
+ 0x1.5558b76e7aaf9p-6, -0x1.236e803c6c1f6p-7},
+};
} // namespace LIBC_NAMESPACE
diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h
index 20f912de2ac008..308bdc44e3f8ac 100644
--- a/libc/src/math/generic/inv_trigf_utils.h
+++ b/libc/src/math/generic/inv_trigf_utils.h
@@ -9,85 +9,32 @@
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H
#define LLVM_LIBC_SRC_MATH_GENERIC_INV_TRIGF_UTILS_H
-#include "math_utils.h"
-#include "src/__support/FPUtil/FEnvImpl.h"
-#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/PolyEval.h"
-#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/FPUtil/multiply_add.h"
#include "src/__support/common.h"
-#include <errno.h>
-
namespace LIBC_NAMESPACE {
// PI and PI / 2
constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
-// atan table size
-constexpr int ATAN_T_BITS = 4;
-constexpr int ATAN_T_SIZE = 1 << ATAN_T_BITS;
-
-// N[Table[ArcTan[x], {x, 1/8, 8/8, 1/8}], 40]
-extern const double ATAN_T[ATAN_T_SIZE];
-extern const double ATAN_K[5];
-
-// The main idea of the function is to use formula
-// atan(u) + atan(v) = atan((u+v)/(1-uv))
-
-// x should be positive, normal finite value
-LIBC_INLINE double atan_eval(double x) {
- using FPB = fputil::FPBits<double>;
- using Sign = fputil::Sign;
- // Added some small value to umin and umax mantissa to avoid possible rounding
- // errors.
- FPB::StorageType umin =
- FPB::create_value(Sign::POS, FPB::EXP_BIAS - ATAN_T_BITS - 1,
- 0x100000000000UL)
- .uintval();
- FPB::StorageType umax =
- FPB::create_value(Sign::POS, FPB::EXP_BIAS + ATAN_T_BITS,
- 0xF000000000000UL)
- .uintval();
-
- FPB bs(x);
- auto x_abs = bs.abs().uintval();
-
- if (x_abs <= umin) {
- double pe = LIBC_NAMESPACE::fputil::polyeval(
- x * x, 0.0, ATAN_K[1], ATAN_K[2], ATAN_K[3], ATAN_K[4]);
- return fputil::multiply_add(pe, x, x);
- }
-
- if (x_abs >= umax) {
- double one_over_x_m = -1.0 / x;
- double one_over_x2 = one_over_x_m * one_over_x_m;
- double pe = LIBC_NAMESPACE::fputil::polyeval(
- one_over_x2, ATAN_K[0], ATAN_K[1], ATAN_K[2], ATAN_K[3]);
- return fputil::multiply_add(pe, one_over_x_m,
- bs.is_neg() ? (-M_MATH_PI_2) : (M_MATH_PI_2));
- }
+extern double ATAN_COEFFS[17][8];
- double pos_x = FPB(x_abs).get_val();
- bool one_over_x = pos_x > 1.0;
- if (one_over_x) {
- pos_x = 1.0 / pos_x;
- }
+// For |x| <= 1/32 and 1 <= i <= 16, return Q(x) such that:
+// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
+LIBC_INLINE constexpr double atan_eval(double x, int i) {
+ double x2 = x * x;
- double near_x = fputil::nearest_integer(pos_x * ATAN_T_SIZE);
- int val = static_cast<int>(near_x);
- near_x *= 1.0 / ATAN_T_SIZE;
+ double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
+ double c1 = fputil::multiply_add(x, ATAN_COEFFS[i][4], ATAN_COEFFS[i][3]);
+ double c2 = fputil::multiply_add(x, ATAN_COEFFS[i][6], ATAN_COEFFS[i][5]);
- double v = (pos_x - near_x) / fputil::multiply_add(near_x, pos_x, 1.0);
- double v2 = v * v;
- double pe = LIBC_NAMESPACE::fputil::polyeval(v2, ATAN_K[0], ATAN_K[1],
- ATAN_K[2], ATAN_K[3], ATAN_K[4]);
- double result;
- if (one_over_x)
- result = M_MATH_PI_2 - fputil::multiply_add(pe, v, ATAN_T[val - 1]);
- else
- result = fputil::multiply_add(pe, v, ATAN_T[val - 1]);
- return bs.is_neg() ? -result : result;
+ double x4 = x2 * x2;
+ double d1 = fputil::multiply_add(x2, c1, c0);
+ double d2 = fputil::multiply_add(x2, ATAN_COEFFS[i][7], c2);
+ double p = fputil::multiply_add(x4, d2, d1);
+ return p;
}
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
@@ -99,7 +46,7 @@ constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
-0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
-LIBC_INLINE double asin_eval(double xsq) {
+LIBC_INLINE constexpr double asin_eval(double xsq) {
double x4 = xsq * xsq;
double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);
diff --git a/libc/test/src/math/exhaustive/atanf_test.cpp b/libc/test/src/math/exhaustive/atanf_test.cpp
index 508c288b050c55..6f23231d426741 100644
--- a/libc/test/src/math/exhaustive/atanf_test.cpp
+++ b/libc/test/src/math/exhaustive/atanf_test.cpp
@@ -25,7 +25,7 @@ TEST_F(LlvmLibcAtanfExhaustiveTest, PostiveRange) {
}
// Range: [-Inf, 0];
-static constexpr uint32_t NEG_START = 0xb000'0000U;
+static constexpr uint32_t NEG_START = 0x8000'0000U;
static constexpr uint32_t NEG_STOP = 0xff80'0000U;
TEST_F(LlvmLibcAtanfExhaustiveTest, NegativeRange) {
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index 073353a89c8907..fcc78c6f21544d 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -1234,13 +1234,9 @@ libc_support_library(
hdrs = ["src/math/generic/inv_trigf_utils.h"],
deps = [
":__support_common",
- ":__support_fputil_fenv_impl",
":__support_fputil_fma",
- ":__support_fputil_fp_bits",
":__support_fputil_multiply_add",
- ":__support_fputil_nearest_integer",
":__support_fputil_polyeval",
- ":math_utils",
],
)
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
I get an error with the CORE-MATH exhaustive tests:
Can you confirm? |
Thanks Paul! It is indeed the exceptional value that I missed. I've updated the patch accordingly. @zimmermann6 |
with the added exceptional case, all tests do pass with the CORE-MATH test suite. And the performance is almost the same as that of CORE-MATH. Good work! |
libc/test/src/math/atanf_test.cpp
Outdated
0xbfeefcfbU, 0x7F800000U, 0xFF800000U}; | ||
uint32_t val_arr[] = { | ||
0x3d8d6b23U, 0x3feefcfbU, 0xbd8d6b23U, 0xbfeefcfbU, | ||
0x7F800000U, 0xFF800000U, 0xbffe2ec1U /*-0x1.fc5d82p+0*/}; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What's up with this commented out value? Did you mean to include it in the array, or is it meant to mean something else?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The array elements are uint32_t
bit patterns, and I added the corresponding float value to the comment. Probably I should add the corresponding float values for the rest of them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
Simplify the range reduction logic and computations. Performance test using `perf.sh` from CORE-MATH project on Ryzen 5900X: Before: ``` $ ./perf.sh atanf --rdtsc --path1 LIBC-location: /home/lnt/experiment/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 14.369 + 0.556 clc/call; Median-Min = 0.613 clc/call; Max = 15.061 clc/call; -- System LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 26.180 + 0.015 clc/call; Median-Min = 0.014 clc/call; Max = 26.260 clc/call; -- LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 21.237 + 0.022 clc/call; Median-Min = 0.020 clc/call; Max = 21.272 clc/call; $ ./perf.sh atanf --rdtsc --path1 --latency LIBC-location: /home/lnt/experiment/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- [####################] 100 % Ntrial = 20 ; Min = 50.505 + 0.045 clc/call; Median-Min = 0.037 clc/call; Max = 50.579 clc/call; -- System LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 62.438 + 0.836 clc/call; Median-Min = 0.049 clc/call; Max = 64.498 clc/call; -- LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 67.781 + 0.546 clc/call; Median-Min = 0.028 clc/call; Max = 68.844 clc/call; ``` After: ``` $ ./perf.sh atanf --rdtsc --path2 LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 14.400 + 0.353 clc/call; Median-Min = 0.404 clc/call; Max = 14.863 clc/call; -- System LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 25.247 + 0.783 clc/call; Median-Min = 0.714 clc/call; Max = 26.238 clc/call; -- LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 13.751 + 0.158 clc/call; Median-Min = 0.140 clc/call; Max = 14.006 clc/call; $ ./perf.sh atanf --rdtsc --path2 --latency LIBC-location: /home/lnt/experiment/llvm/llvm-project/build/projects/libc/lib/libllvmlibc.a GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- [####################] 100 % Ntrial = 20 ; Min = 51.837 + 0.073 clc/call; Median-Min = 0.058 clc/call; Max = 52.000 clc/call; -- System LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 62.487 + 1.878 clc/call; Median-Min = 1.965 clc/call; Max = 64.569 clc/call; OK -- LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 55.414 + 1.312 clc/call; Median-Min = 0.345 clc/call; Max = 58.362 clc/call; ```
Simplify the range reduction logic and computations. Performance test using
perf.sh
from CORE-MATH project on Ryzen 5900X:Before:
After: