From 49d44084418255ce78ac0aa6f890f9cc436a920d Mon Sep 17 00:00:00 2001 From: Tue Ly Date: Tue, 3 Dec 2024 19:14:23 +0000 Subject: [PATCH] [libc][math] Add small code size options for atan2f. --- libc/src/__support/macros/optimization.h | 4 +- libc/src/math/generic/atan2f.cpp | 21 ++++++-- libc/src/math/generic/inv_trigf_utils.h | 66 ++++++++++++++++++++---- 3 files changed, 76 insertions(+), 15 deletions(-) diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h index 41ecd2bd6d719..a2634950d431b 100644 --- a/libc/src/__support/macros/optimization.h +++ b/libc/src/__support/macros/optimization.h @@ -48,7 +48,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) { #ifndef LIBC_MATH #define LIBC_MATH 0 -#else +#endif // LIBC_MATH #if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS) #define LIBC_MATH_HAS_SKIP_ACCURATE_PASS @@ -58,6 +58,4 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) { #define LIBC_MATH_HAS_SMALL_TABLES #endif -#endif // LIBC_MATH - #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp index a2e5499809a34..db7639396cdd7 100644 --- a/libc/src/math/generic/atan2f.cpp +++ b/libc/src/math/generic/atan2f.cpp @@ -21,6 +21,8 @@ namespace LIBC_NAMESPACE_DECL { namespace { +#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + // Look up tables for accurate pass: // atan(i/16) with i = 0..16, generated by Sollya with: @@ -163,6 +165,8 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx, return static_cast(cpp::bit_cast(rr_bits)); } +#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS + } // anonymous namespace // There are several range reduction steps we can take for atan2(y, x) as @@ -283,14 +287,24 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) { fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip]; double q_d = num_d / den_d; - double k_d = fputil::nearest_integer(q_d * 0x1.0p4f); + double k_d = fputil::nearest_integer(q_d * 0x1.0p4); int idx = static_cast(k_d); + double r; + +#ifdef LIBC_MATH_HAS_SMALL_TABLES + double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4); + r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx])); +#else q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d); double p = atan_eval(q_d, idx); - double r = final_sign * - fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]); + r = final_sign * + fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]); +#endif // LIBC_MATH_HAS_SMALL_TABLES +#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS + return static_cast(r); +#else constexpr uint32_t LOWER_ERR = 4; // Mask sticky bits in double precision before rounding to single precision. constexpr uint32_t MASK = @@ -306,6 +320,7 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) { return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign, const_term); +#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h index bdabec9c1b6bb..8b47aba342995 100644 --- a/libc/src/math/generic/inv_trigf_utils.h +++ b/libc/src/math/generic/inv_trigf_utils.h @@ -17,14 +17,35 @@ namespace LIBC_NAMESPACE_DECL { // PI and PI / 2 -constexpr double M_MATH_PI = 0x1.921fb54442d18p+1; -constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0; +static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1; +static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0; extern double ATAN_COEFFS[17][9]; +// Look-up table for atan(k/16) with k = 0..16. +static constexpr double ATAN_K_OVER_16[17] = { + 0.0, + 0x1.ff55bb72cfdeap-5, + 0x1.fd5ba9aac2f6ep-4, + 0x1.7b97b4bce5b02p-3, + 0x1.f5b75f92c80ddp-3, + 0x1.362773707ebccp-2, + 0x1.6f61941e4def1p-2, + 0x1.a64eec3cc23fdp-2, + 0x1.dac670561bb4fp-2, + 0x1.0657e94db30dp-1, + 0x1.1e00babdefeb4p-1, + 0x1.345f01cce37bbp-1, + 0x1.4978fa3269ee1p-1, + 0x1.5d58987169b18p-1, + 0x1.700a7c5784634p-1, + 0x1.819d0b7158a4dp-1, + 0x1.921fb54442d18p-1, +}; + // For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that: // Q(x) ~ (atan(x + i/16) - atan(i/16)) / x. -LIBC_INLINE double atan_eval(double x, int i) { +LIBC_INLINE static double atan_eval(double x, unsigned i) { double x2 = x * x; double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]); @@ -39,16 +60,43 @@ LIBC_INLINE double atan_eval(double x, int i) { return p; } +// Evaluate atan without big lookup table. +// atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16))) +// = atan((n - d * k/16)) / (d + n * k/16)) +// So we let q = (n - d * k/16) / (d + n * k/16), +// and approximate with Taylor polynomial: +// atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9 +LIBC_INLINE static double atan_eval_no_table(double num, double den, + double k_over_16) { + double num_r = fputil::multiply_add(den, -k_over_16, num); + double den_r = fputil::multiply_add(num, k_over_16, den); + double q = num_r / den_r; + + constexpr double ATAN_TAYLOR[] = { + -0x1.5555555555555p-2, + 0x1.999999999999ap-3, + -0x1.2492492492492p-3, + 0x1.c71c71c71c71cp-4, + }; + double q2 = q * q; + double q3 = q2 * q; + double q4 = q2 * q2; + double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]); + double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]); + double d = fputil::multiply_add(q4, c1, c0); + return fputil::multiply_add(q3, d, q); +} + // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|], // [|1, D...|], [0, 0.5]); -constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, - 0x1.6db6cc1541b31p-5, 0x1.f1caff324770ep-6, - 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6, - 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, - -0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5}; +static constexpr double ASIN_COEFFS[10] = { + 0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5, + 0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6, + 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8, + 0x1.02311ecf99c28p-5}; // Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x -LIBC_INLINE double asin_eval(double xsq) { +LIBC_INLINE static double asin_eval(double xsq) { double x4 = xsq * xsq; double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2], ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);