-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[libc][math] Add small code size options for atan2f. #118532
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@llvm/pr-subscribers-libc Author: None (lntue) ChangesFull diff: https://github.com/llvm/llvm-project/pull/118532.diff 3 Files Affected:
diff --git a/libc/src/__support/macros/optimization.h b/libc/src/__support/macros/optimization.h
index 41ecd2bd6d7191..a2634950d431bc 100644
--- a/libc/src/__support/macros/optimization.h
+++ b/libc/src/__support/macros/optimization.h
@@ -48,7 +48,7 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
#ifndef LIBC_MATH
#define LIBC_MATH 0
-#else
+#endif // LIBC_MATH
#if (LIBC_MATH & LIBC_MATH_SKIP_ACCURATE_PASS)
#define LIBC_MATH_HAS_SKIP_ACCURATE_PASS
@@ -58,6 +58,4 @@ LIBC_INLINE constexpr bool expects_bool_condition(T value, T expected) {
#define LIBC_MATH_HAS_SMALL_TABLES
#endif
-#endif // LIBC_MATH
-
#endif // LLVM_LIBC_SRC___SUPPORT_MACROS_OPTIMIZATION_H
diff --git a/libc/src/math/generic/atan2f.cpp b/libc/src/math/generic/atan2f.cpp
index a2e5499809a340..db7639396cdd7a 100644
--- a/libc/src/math/generic/atan2f.cpp
+++ b/libc/src/math/generic/atan2f.cpp
@@ -21,6 +21,8 @@ namespace LIBC_NAMESPACE_DECL {
namespace {
+#ifndef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
// Look up tables for accurate pass:
// atan(i/16) with i = 0..16, generated by Sollya with:
@@ -163,6 +165,8 @@ float atan2f_double_double(double num_d, double den_d, double q_d, int idx,
return static_cast<float>(cpp::bit_cast<double>(rr_bits));
}
+#endif // !LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
} // anonymous namespace
// There are several range reduction steps we can take for atan2(y, x) as
@@ -283,14 +287,24 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
fputil::DoubleDouble const_term = CONST_ADJ[x_sign][y_sign][recip];
double q_d = num_d / den_d;
- double k_d = fputil::nearest_integer(q_d * 0x1.0p4f);
+ double k_d = fputil::nearest_integer(q_d * 0x1.0p4);
int idx = static_cast<int>(k_d);
+ double r;
+
+#ifdef LIBC_MATH_HAS_SMALL_TABLES
+ double p = atan_eval_no_table(num_d, den_d, k_d * 0x1.0p-4);
+ r = final_sign * (p + (const_term.hi + ATAN_K_OVER_16[idx]));
+#else
q_d = fputil::multiply_add(k_d, -0x1.0p-4, q_d);
double p = atan_eval(q_d, idx);
- double r = final_sign *
- fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+ r = final_sign *
+ fputil::multiply_add(q_d, p, const_term.hi + ATAN_COEFFS[idx][0]);
+#endif // LIBC_MATH_HAS_SMALL_TABLES
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+ return static_cast<float>(r);
+#else
constexpr uint32_t LOWER_ERR = 4;
// Mask sticky bits in double precision before rounding to single precision.
constexpr uint32_t MASK =
@@ -306,6 +320,7 @@ LLVM_LIBC_FUNCTION(float, atan2f, (float y, float x)) {
return atan2f_double_double(num_d, den_d, q_d, idx, k_d, final_sign,
const_term);
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/libc/src/math/generic/inv_trigf_utils.h b/libc/src/math/generic/inv_trigf_utils.h
index bdabec9c1b6bb9..8b47aba342995b 100644
--- a/libc/src/math/generic/inv_trigf_utils.h
+++ b/libc/src/math/generic/inv_trigf_utils.h
@@ -17,14 +17,35 @@
namespace LIBC_NAMESPACE_DECL {
// PI and PI / 2
-constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
-constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
+static constexpr double M_MATH_PI = 0x1.921fb54442d18p+1;
+static constexpr double M_MATH_PI_2 = 0x1.921fb54442d18p+0;
extern double ATAN_COEFFS[17][9];
+// Look-up table for atan(k/16) with k = 0..16.
+static constexpr double ATAN_K_OVER_16[17] = {
+ 0.0,
+ 0x1.ff55bb72cfdeap-5,
+ 0x1.fd5ba9aac2f6ep-4,
+ 0x1.7b97b4bce5b02p-3,
+ 0x1.f5b75f92c80ddp-3,
+ 0x1.362773707ebccp-2,
+ 0x1.6f61941e4def1p-2,
+ 0x1.a64eec3cc23fdp-2,
+ 0x1.dac670561bb4fp-2,
+ 0x1.0657e94db30dp-1,
+ 0x1.1e00babdefeb4p-1,
+ 0x1.345f01cce37bbp-1,
+ 0x1.4978fa3269ee1p-1,
+ 0x1.5d58987169b18p-1,
+ 0x1.700a7c5784634p-1,
+ 0x1.819d0b7158a4dp-1,
+ 0x1.921fb54442d18p-1,
+};
+
// For |x| <= 1/32 and 0 <= i <= 16, return Q(x) such that:
// Q(x) ~ (atan(x + i/16) - atan(i/16)) / x.
-LIBC_INLINE double atan_eval(double x, int i) {
+LIBC_INLINE static double atan_eval(double x, unsigned i) {
double x2 = x * x;
double c0 = fputil::multiply_add(x, ATAN_COEFFS[i][2], ATAN_COEFFS[i][1]);
@@ -39,16 +60,43 @@ LIBC_INLINE double atan_eval(double x, int i) {
return p;
}
+// Evaluate atan without big lookup table.
+// atan(n/d) - atan(k/16) = atan((n/d - k/16) / (1 + (n/d) * (k/16)))
+// = atan((n - d * k/16)) / (d + n * k/16))
+// So we let q = (n - d * k/16) / (d + n * k/16),
+// and approximate with Taylor polynomial:
+// atan(q) ~ q - q^3/3 + q^5/5 - q^7/7 + q^9/9
+LIBC_INLINE static double atan_eval_no_table(double num, double den,
+ double k_over_16) {
+ double num_r = fputil::multiply_add(den, -k_over_16, num);
+ double den_r = fputil::multiply_add(num, k_over_16, den);
+ double q = num_r / den_r;
+
+ constexpr double ATAN_TAYLOR[] = {
+ -0x1.5555555555555p-2,
+ 0x1.999999999999ap-3,
+ -0x1.2492492492492p-3,
+ 0x1.c71c71c71c71cp-4,
+ };
+ double q2 = q * q;
+ double q3 = q2 * q;
+ double q4 = q2 * q2;
+ double c0 = fputil::multiply_add(q2, ATAN_TAYLOR[1], ATAN_TAYLOR[0]);
+ double c1 = fputil::multiply_add(q2, ATAN_TAYLOR[3], ATAN_TAYLOR[2]);
+ double d = fputil::multiply_add(q4, c1, c0);
+ return fputil::multiply_add(q3, d, q);
+}
+
// > Q = fpminimax(asin(x)/x, [|0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20|],
// [|1, D...|], [0, 0.5]);
-constexpr double ASIN_COEFFS[10] = {0x1.5555555540fa1p-3, 0x1.333333512edc2p-4,
- 0x1.6db6cc1541b31p-5, 0x1.f1caff324770ep-6,
- 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
- 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6,
- -0x1.df946fa875ddp-8, 0x1.02311ecf99c28p-5};
+static constexpr double ASIN_COEFFS[10] = {
+ 0x1.5555555540fa1p-3, 0x1.333333512edc2p-4, 0x1.6db6cc1541b31p-5,
+ 0x1.f1caff324770ep-6, 0x1.6e43899f5f4f4p-6, 0x1.1f847cf652577p-6,
+ 0x1.9b60f47f87146p-7, 0x1.259e2634c494fp-6, -0x1.df946fa875ddp-8,
+ 0x1.02311ecf99c28p-5};
// Evaluate P(x^2) - 1, where P(x^2) ~ asin(x)/x
-LIBC_INLINE double asin_eval(double xsq) {
+LIBC_INLINE static double asin_eval(double xsq) {
double x4 = xsq * xsq;
double r1 = fputil::polyeval(x4, ASIN_COEFFS[0], ASIN_COEFFS[2],
ASIN_COEFFS[4], ASIN_COEFFS[6], ASIN_COEFFS[8]);
|
jhuber6
approved these changes
Dec 4, 2024
michaelrj-google
approved these changes
Dec 4, 2024
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
No description provided.