Skip to content

Conversation

lntue
Copy link
Contributor

@lntue lntue commented Dec 3, 2024

No description provided.

@llvmbot llvmbot added the libc label Dec 3, 2024
@llvmbot
Copy link
Member

llvmbot commented Dec 3, 2024

@llvm/pr-subscribers-libc

Author: None (lntue)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/118532.diff

3 Files Affected:

  • (modified) libc/src/__support/macros/optimization.h (+1-3)
  • (modified) libc/src/math/generic/atan2f.cpp (+18-3)
  • (modified) libc/src/math/generic/inv_trigf_utils.h (+57-9)
diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h
index 41ecd2bd6d7191..a2634950d431bc 100644
--- a/libc/src/__support/macros/optimization.h
+++ b/libc/src/__support/macros/optimization.h
@@ -48,7 +48,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
 
 #ifndef LIBC_MATH
 #define LIBC_MATH 0
-#else
+#endif // LIBC_MATH
 
 #if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS)
 #define LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -58,6 +58,4 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
 #define LIBC_MATH_HAS_SMALL_TABLES
 #endif
 
-#endif // LIBC_MATH
-
 #endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H
diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp
index a2e5499809a340..db7639396cdd7a 100644
--- a/libc/src/math/generic/atan2f.cpp
+++ b/libc/src/math/generic/atan2f.cpp
@@ -21,6 +21,8 @@ namespace LIBC_NAMESPACE_DECL {
 
 namespace {
 
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
 // Look up tables for accurate pass:
 
 // atan(i/16) with i = 0..16, generated by Sollya with:
@@ -163,6 +165,8 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx,
   return static_cast<float>(cpp::bit_cast<double>(rr_bits));
 }
 
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
 } // anonymous namespace
 
 // There are several range reduction steps we can take for atan2(y, x) as
@@ -283,14 +287,24 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
   fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
   double q_d = num_d / den_d;
 
-  double k_d = fputil::nearest_integer(q_d * 0x1.0p4f);
+  double k_d = fputil::nearest_integer(q_d * 0x1.0p4);
   int idx = static_cast<int>(k_d);
+  double r;
+
+#ifdef LIBC_MATH_HAS_SMALL_TABLES
+  double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4);
+  r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx]));
+#else
   q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d);
 
   double p = atan_eval(q_d, idx);
-  double r = final_sign *
-             fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+  r = final_sign *
+      fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+#endif // LIBC_MATH_HAS_SMALL_TABLES
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+  return static_cast<float>(r);
+#else
   constexpr uint32_t LOWER_ERR = 4;
   // Mask sticky bits in double precision before rounding to single precision.
   constexpr uint32_t MASK =
@@ -306,6 +320,7 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
 
   return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign,
                               const_term);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
 }
 
 } // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h
index bdabec9c1b6bb9..8b47aba342995b 100644
--- a/libc/src/math/generic/inv_trigf_utils.h
+++ b/libc/src/math/generic/inv_trigf_utils.h
@@ -17,14 +17,35 @@
 namespace LIBC_NAMESPACE_DECL {
 
 // PI and PI / 2
-constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
-constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
+static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
+static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
 
 extern double ATAN_COEFFS[17][9];
 
+// Look-up table for atan(k/16) with k = 0..16.
+static constexpr double ATAN_K_OVER_16[17] = {
+    0.0,
+    0x1.ff55bb72cfdeap-5,
+    0x1.fd5ba9aac2f6ep-4,
+    0x1.7b97b4bce5b02p-3,
+    0x1.f5b75f92c80ddp-3,
+    0x1.362773707ebccp-2,
+    0x1.6f61941e4def1p-2,
+    0x1.a64eec3cc23fdp-2,
+    0x1.dac670561bb4fp-2,
+    0x1.0657e94db30dp-1,
+    0x1.1e00babdefeb4p-1,
+    0x1.345f01cce37bbp-1,
+    0x1.4978fa3269ee1p-1,
+    0x1.5d58987169b18p-1,
+    0x1.700a7c5784634p-1,
+    0x1.819d0b7158a4dp-1,
+    0x1.921fb54442d18p-1,
+};
+
 // For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that:
 //   Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
-LIBC_INLINE double atan_eval(double x, int i) {
+LIBC_INLINE static double atan_eval(double x, unsigned i) {
   double x2 = x * x;
 
   double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
@@ -39,16 +60,43 @@ LIBC_INLINE double atan_eval(double x, int i) {
   return p;
 }
 
+// Evaluate atan without big lookup table.
+//   atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16)))
+//                          = atan((n - d * k/16)) / (d + n * k/16))
+// So we let q = (n - d * k/16) / (d + n * k/16),
+// and approximate with Taylor polynomial:
+//   atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9
+LIBC_INLINE static double atan_eval_no_table(double num, double den,
+                                             double k_over_16) {
+  double num_r = fputil::multiply_add(den, -k_over_16, num);
+  double den_r = fputil::multiply_add(num, k_over_16, den);
+  double q = num_r / den_r;
+
+  constexpr double ATAN_TAYLOR[] = {
+      -0x1.5555555555555p-2,
+      0x1.999999999999ap-3,
+      -0x1.2492492492492p-3,
+      0x1.c71c71c71c71cp-4,
+  };
+  double q2 = q * q;
+  double q3 = q2 * q;
+  double q4 = q2 * q2;
+  double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
+  double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
+  double d = fputil::multiply_add(q4, c1, c0);
+  return fputil::multiply_add(q3, d, q);
+}
+
 // > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
 //                 [|1, D...|], [0, 0.5]);
-constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
-                                    0x1.6db6cc1541b31p-5, 0x1.f1caff324770ep-6,
-                                    0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
-                                    0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6,
-                                    -0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
+static constexpr double ASIN_COEFFS[10] = {
+    0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
+    0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
+    0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
+    0x1.02311ecf99c28p-5};
 
 // Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
-LIBC_INLINE double asin_eval(double xsq) {
+LIBC_INLINE static double asin_eval(double xsq) {
   double x4 = xsq * xsq;
   double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
                                ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);

@lntue lntue merged commit a7da702 into llvm:main Dec 4, 2024
9 checks passed
@lntue lntue deleted the atan2f branch December 4, 2024 21:37
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants