From 49d44084418255ce78ac0aa6f890f9cc436a920d Mon Sep 17 00:00:00 2001
From: Tue Ly <lntue.h@gmail.com>
Date: Tue, 3 Dec 2024 19:14:23 +0000
Subject: [PATCH] [libc][math] Add small code size options for atan2f.

---
 libc/src/__support/macros/optimization.h |  4 +-
 libc/src/math/generic/atan2f.cpp         | 21 ++++++--
 libc/src/math/generic/inv_trigf_utils.h  | 66 ++++++++++++++++++++----
 3 files changed, 76 insertions(+), 15 deletions(-)
diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h
index 41ecd2bd6d719..a2634950d431b 100644
--- a/libc/src/__support/macros/optimization.h
+++ b/libc/src/__support/macros/optimization.h
@@ -48,7 +48,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
 
 #ifndef LIBC_MATH
 #define LIBC_MATH 0
-#else
+#endif // LIBC_MATH
 
 #if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS)
 #define LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -58,6 +58,4 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
 #define LIBC_MATH_HAS_SMALL_TABLES
 #endif
 
-#endif // LIBC_MATH
-
 #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H
diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp
index a2e5499809a34..db7639396cdd7 100644
--- a/libc/src/math/generic/atan2f.cpp
+++ b/libc/src/math/generic/atan2f.cpp
@@ -21,6 +21,8 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace {
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
 // Look up tables for accurate pass:
 
 // atan(i/16) with i = 0..16, generated by Sollya with:
@@ -163,6 +165,8 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx,
   return static_cast<float>(cpp::bit_cast<double>(rr_bits));
 }
 
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
 } // anonymous namespace
 
 // There are several range reduction steps we can take for atan2(y, x) as
@@ -283,14 +287,24 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
   fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
   double q_d = num_d / den_d;
 
-  double k_d = fputil::nearest_integer(q_d * 0x1.0p4f);
+  double k_d = fputil::nearest_integer(q_d * 0x1.0p4);
   int idx = static_cast<int>(k_d);
+  double r;
+
+#ifdef LIBC_MATH_HAS_SMALL_TABLES
+  double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4);
+  r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx]));
+#else
   q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d);
 
   double p = atan_eval(q_d, idx);
-  double r = final_sign *
-             fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+  r = final_sign *
+      fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+#endif // LIBC_MATH_HAS_SMALL_TABLES
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  return static_cast<float>(r);
+#else
   constexpr uint32_t LOWER_ERR = 4;
   // Mask sticky bits in double precision before rounding to single precision.
   constexpr uint32_t MASK =
@@ -306,6 +320,7 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
 
   return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign,
                               const_term);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h
index bdabec9c1b6bb..8b47aba342995 100644
--- a/libc/src/math/generic/inv_trigf_utils.h
+++ b/libc/src/math/generic/inv_trigf_utils.h
@@ -17,14 +17,35 @@
 namespace LIBC_NAMESPACE_DECL {
 
 // PI and PI / 2
-constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
-constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
+static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
+static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
 
 extern double ATAN_COEFFS[17][9];
 
+// Look-up table for atan(k/16) with k = 0..16.
+static constexpr double ATAN_K_OVER_16[17] = {
+    0.0,
+    0x1.ff55bb72cfdeap-5,
+    0x1.fd5ba9aac2f6ep-4,
+    0x1.7b97b4bce5b02p-3,
+    0x1.f5b75f92c80ddp-3,
+    0x1.362773707ebccp-2,
+    0x1.6f61941e4def1p-2,
+    0x1.a64eec3cc23fdp-2,
+    0x1.dac670561bb4fp-2,
+    0x1.0657e94db30dp-1,
+    0x1.1e00babdefeb4p-1,
+    0x1.345f01cce37bbp-1,
+    0x1.4978fa3269ee1p-1,
+    0x1.5d58987169b18p-1,
+    0x1.700a7c5784634p-1,
+    0x1.819d0b7158a4dp-1,
+    0x1.921fb54442d18p-1,
+};
+
 // For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that:
 //   Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
-LIBC_INLINE double atan_eval(double x, int i) {
+LIBC_INLINE static double atan_eval(double x, unsigned i) {
   double x2 = x * x;
 
   double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
@@ -39,16 +60,43 @@ LIBC_INLINE double atan_eval(double x, int i) {
   return p;
 }
 
+// Evaluate atan without big lookup table.
+//   atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16)))
+//                          = atan((n - d * k/16)) / (d + n * k/16))
+// So we let q = (n - d * k/16) / (d + n * k/16),
+// and approximate with Taylor polynomial:
+//   atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9
+LIBC_INLINE static double atan_eval_no_table(double num, double den,
+                                             double k_over_16) {
+  double num_r = fputil::multiply_add(den, -k_over_16, num);
+  double den_r = fputil::multiply_add(num, k_over_16, den);
+  double q = num_r / den_r;
+
+  constexpr double ATAN_TAYLOR[] = {
+      -0x1.5555555555555p-2,
+      0x1.999999999999ap-3,
+      -0x1.2492492492492p-3,
+      0x1.c71c71c71c71cp-4,
+  };
+  double q2 = q * q;
+  double q3 = q2 * q;
+  double q4 = q2 * q2;
+  double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
+  double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
+  double d = fputil::multiply_add(q4, c1, c0);
+  return fputil::multiply_add(q3, d, q);
+}
+
 // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
 //                 [|1, D...|], [0, 0.5]);
-constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
-                                    0x1.6db6cc1541b31p-5, 0x1.f1caff324770ep-6,
-                                    0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
-                                    0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6,
-                                    -0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
+static constexpr double ASIN_COEFFS[10] = {
+    0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
+    0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
+    0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
+    0x1.02311ecf99c28p-5};
 
 // Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
-LIBC_INLINE double asin_eval(double xsq) {
+LIBC_INLINE static double asin_eval(double xsq) {
   double x4 = xsq * xsq;
   double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
                                ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);