-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[libc][math] Add float-only implementation for atanf. #167004
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Member
|
@llvm/pr-subscribers-libc Author: None (lntue) ChangesAlgorithm: With all the computations are done in single precision (float), the total of approximation errors and rounding errors is bounded by 4 ULPs. Full diff: https://github.com/llvm/llvm-project/pull/167004.diff 6 Files Affected:
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index ddc0159b10ce4..13163ec0fd252 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -248,6 +248,7 @@ add_header_library(
add_header_library(
atanf
HDRS
+ atanf_float.h
atanf.h
DEPENDS
.inv_trigf_utils
diff --git a/libc/src/__support/math/atanf.h b/libc/src/__support/math/atanf.h
index 92799dc8db3cc..a16e386d58106 100644
--- a/libc/src/__support/math/atanf.h
+++ b/libc/src/__support/math/atanf.h
@@ -18,6 +18,14 @@
#include "src/__support/macros/config.h"
#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) && \
+ defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
+
+// We use float-float implementation to reduce size.
+#include "atanf_float.h"
+
+#else
+
namespace LIBC_NAMESPACE_DECL {
namespace math {
@@ -126,4 +134,6 @@ LIBC_INLINE static constexpr float atanf(float x) {
} // namespace LIBC_NAMESPACE_DECL
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
#endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H
diff --git a/libc/src/__support/math/atanf_float.h b/libc/src/__support/math/atanf_float.h
new file mode 100644
index 0000000000000..3e5adf30d37b0
--- /dev/null
+++ b/libc/src/__support/math/atanf_float.h
@@ -0,0 +1,168 @@
+//===-- Single-precision atanf float function -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H
+#define LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace atanf_internal {
+
+using fputil::FloatFloat;
+// atan(i/64) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+// a = round(atan(i/16), SG, RN);
+// b = round(atan(i/16) - a, SG, RN);
+// print("{", b, ",", a, "},");
+// };
+static constexpr FloatFloat ATAN_I[17] = {
+ {0.0f, 0.0f},
+ {-0x1.1a6042p-30f, 0x1.ff55bcp-5f},
+ {-0x1.54f424p-30f, 0x1.fd5baap-4f},
+ {0x1.79cb6p-28f, 0x1.7b97b4p-3f},
+ {-0x1.b4dfc8p-29f, 0x1.f5b76p-3f},
+ {-0x1.1f0286p-27f, 0x1.362774p-2f},
+ {0x1.e4defp-30f, 0x1.6f6194p-2f},
+ {0x1.e611fep-29f, 0x1.a64eecp-2f},
+ {0x1.586ed4p-28f, 0x1.dac67p-2f},
+ {-0x1.6499e6p-26f, 0x1.0657eap-1f},
+ {0x1.7bdfd6p-26f, 0x1.1e00bap-1f},
+ {-0x1.98e422p-28f, 0x1.345f02p-1f},
+ {0x1.934f7p-28f, 0x1.4978fap-1f},
+ {0x1.c5a6c6p-27f, 0x1.5d5898p-1f},
+ {0x1.5e118cp-27f, 0x1.700a7cp-1f},
+ {-0x1.1d4eb6p-26f, 0x1.819d0cp-1f},
+ {-0x1.777a5cp-26f, 0x1.921fb6p-1f},
+};
+
+// 1 / (1 + (i/16)^2) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+// a = round(1 / (1 + (i/16)^2), SG, RN);
+// print(a, ",");
+// };
+static constexpr float ATANF_REDUCED_ARG[17] = {
+ 0x1.0p0f, 0x1.fe01fep-1f, 0x1.f81f82p-1f, 0x1.ee9c8p-1f,
+ 0x1.e1e1e2p-1f, 0x1.d272cap-1f, 0x1.c0e07p-1f, 0x1.adbe88p-1f,
+ 0x1.99999ap-1f, 0x1.84f00cp-1f, 0x1.702e06p-1f, 0x1.5babccp-1f,
+ 0x1.47ae14p-1f, 0x1.34679ap-1f, 0x1.21fb78p-1f, 0x1.107fbcp-1f,
+ 0x1p-1f,
+};
+
+// Approximating atan( u / (1 + u * k/16) )
+// atan( u / (1 + u * k/16) ) / u ~ 1 - k/16 * u + (k^2/256 - 1/3) * u^2 +
+// + (k/16 - (k/16)^3) * u^3 + O(u^4)
+LIBC_INLINE static float atanf_eval(float u, float k_over_16) {
+ // (k/16)^2
+ float c2 = k_over_16 * k_over_16;
+ // -(k/16)^3
+ float c3 = fputil::multiply_add(-k_over_16, c2, k_over_16);
+ float u2 = u * u;
+ // (k^2/256 - 1/3) + u * (k/16 - (k/16)^3)
+ float a0 = fputil::multiply_add(c3, u, c2 - 0x1.555556p-2f);
+ // -k/16 + u*(k^2/256 - 1/3) + u^2 * (k/16 - (k/16)^3)
+ float a1 = fputil::multiply_add(u, a0, -k_over_16);
+ // u - u^2 * k/16 + u^3 * ((k^2/256 - 1/3) + u^4 * (k/16 - (k/16)^3))
+ return fputil::multiply_add(u2, a1, u);
+}
+
+} // namespace atanf_internal
+
+// There are several range reduction steps we can take for atan2(y, x) as
+// follow:
+
+LIBC_INLINE static float atanf(float x) {
+ using namespace atanf_internal;
+ using FPBits = typename fputil::FPBits<float>;
+ using FPBits = typename fputil::FPBits<float>;
+
+ constexpr float SIGN[2] = {1.0f, -1.0f};
+ constexpr FloatFloat PI_OVER_2 = {-0x1.777a5cp-25f, 0x1.921fb6p0f};
+
+ FPBits x_bits(x);
+ Sign s = x_bits.sign();
+ float sign = SIGN[s.is_neg()];
+ uint32_t x_abs = x_bits.uintval() & 0x7fff'ffffU;
+
+ // x is inf or nan, |x| <= 2^-11 or |x|= > 2^11.
+ if (LIBC_UNLIKELY(x_abs <= 0x3a00'0000U || x_abs >= 0x4500'0000U)) {
+ if (LIBC_UNLIKELY(x_bits.is_inf()))
+ return sign * PI_OVER_2.hi;
+ // atan(NaN) = NaN
+ if (LIBC_UNLIKELY(x_bits.is_nan())) {
+ if (x_bits.is_signaling_nan()) {
+ fputil::raise_except_if_required(FE_INVALID);
+ return FPBits::quiet_nan().get_val();
+ }
+
+ return x;
+ }
+ // |x| >= 2^11:
+ // atan(x) = sign(x) * pi/2 - atan(1/x)
+ // ~ sign(x) * pi/2 - 1/x
+ if (LIBC_UNLIKELY(x_abs >= 0x4200'0000))
+ return fputil::multiply_add(sign, PI_OVER_2.hi, -1.0f / x);
+ // x <= 2^-11:
+ // atan(x) ~ x
+ return x;
+ }
+
+ // Range reduction steps:
+ // 1) atan(x) = sign(x) * atan(|x|)
+ // 2) If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
+ // 3) For 1/16 < |x| < 1 + 1/32, we find k such that: | |x| - k/16 | <= 1/32.
+ // Let y = |x| - k/16, then using the angle summation formula, we have:
+ // atan(|x|) = atan(k/16) + atan( (|x| - k/16) / (1 + |x| * k/16) )
+ // = atan(k/16) + atan( y / (1 + (y + k/16) * k/16 )
+ // = atan(k/16) + atan( y / ((1 + k^2/256) + y * k/16) )
+ // 4) Let u = y / (1 + k^2/256), then we can rewritten the above as:
+ // atan(|x|) = atan(k/16) + atan( u / (1 + u * k/16) )
+ // ~ atan(k/16) + (u - k/16 * u^2 + (k^2/256 - 1/3) * u^3 +
+ // + (k/16 - (k/16)^3) * u^4) + O(u^5)
+ float x_a = cpp::bit_cast<float>(x_abs);
+ // |x| > 1 + 1/32, we need to invert x, so we will perform the division in
+ // float-float.
+ if (x_abs > 0x3f84'0000U)
+ x_a = 1.0f / x_a;
+ // Perform range reduction.
+ // k = nearestint(x * 16)
+ float k_f = fputil::nearest_integer(x_a * 0x1.0p4f);
+ unsigned idx = static_cast<unsigned>(k_f);
+ float k_over_16 = k_f * 0x1.0p-4f;
+ float y = x_a - k_over_16;
+ // u = (x - k/16) / (1 + (k/16)^2)
+ float u = y * ATANF_REDUCED_ARG[idx];
+
+ // atan(x) = sign(x) * atan(|x|)
+ // = sign(x) * (atan(k/16) + atan(|))
+ // p ~ atan(u)
+ float p = atanf_eval(u, k_over_16);
+ // |x| > 1 + 1/32: q ~ (pi/2 - atan(1/|x|))
+ // |x| <= 1 + 1/32: q ~ atan(|x|)
+ float q = (p + ATAN_I[idx].lo) + ATAN_I[idx].hi;
+ if (x_abs > 0x3f84'0000U)
+ q = PI_OVER_2.hi + (PI_OVER_2.lo - q);
+ // |x| > 1 + 1/32: sign(x) * (pi/2 - atan(1/|x|))
+ // |x| <= 1 + 1/32: sign(x) * atan(|x|)
+ return sign * q;
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H
diff --git a/libc/test/src/math/atanf_test.cpp b/libc/test/src/math/atanf_test.cpp
index 30b42b57231ff..a342fc0f02305 100644
--- a/libc/test/src/math/atanf_test.cpp
+++ b/libc/test/src/math/atanf_test.cpp
@@ -9,11 +9,18 @@
#include "hdr/math_macros.h"
#include "hdr/stdint_proxy.h"
#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/optimization.h"
#include "src/math/atanf.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+#define TOLERANCE 4
+#else
+#define TOLERANCE 0
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
using LlvmLibcAtanfTest = LIBC_NAMESPACE::testing::FPTest<float>;
namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
@@ -49,9 +56,9 @@ TEST_F(LlvmLibcAtanfTest, InFloatRange) {
for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
float x = FPBits(v).get_val();
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan, x,
- LIBC_NAMESPACE::atanf(x), 0.5);
+ LIBC_NAMESPACE::atanf(x), TOLERANCE + 0.5);
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan, -x,
- LIBC_NAMESPACE::atanf(-x), 0.5);
+ LIBC_NAMESPACE::atanf(-x), TOLERANCE + 0.5);
}
}
@@ -72,6 +79,6 @@ TEST_F(LlvmLibcAtanfTest, SpecialValues) {
for (uint32_t v : val_arr) {
float x = FPBits(v).get_val();
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan, x,
- LIBC_NAMESPACE::atanf(x), 0.5);
+ LIBC_NAMESPACE::atanf(x), TOLERANCE + 0.5);
}
}
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
index 2ff4f02041776..9ca4f93e6c411 100644
--- a/libc/test/src/math/exhaustive/CMakeLists.txt
+++ b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -550,6 +550,21 @@ add_fp_unittest(
-lpthread
)
+add_fp_unittest(
+ atanf_float_test
+ NO_RUN_POSTBUILD
+ NEED_MPFR
+ SUITE
+ libc_math_exhaustive_tests
+ SRCS
+ atanf_float_test.cpp
+ LINK_LIBRARIES
+ -lpthread
+ DEPENDS
+ .exhaustive_test
+ libc.src.__support.math.atanf
+)
+
add_fp_unittest(
asinf_test
NO_RUN_POSTBUILD
diff --git a/libc/test/src/math/exhaustive/atanf_float_test.cpp b/libc/test/src/math/exhaustive/atanf_float_test.cpp
new file mode 100644
index 0000000000000..16f85f8a6839f
--- /dev/null
+++ b/libc/test/src/math/exhaustive/atanf_float_test.cpp
@@ -0,0 +1,41 @@
+//===-- Exhaustive test for atanf - float-only ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/math/atanf_float.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+float atanf_fast(float x) { return LIBC_NAMESPACE::math::atanf(x); }
+
+using LlvmLibcAtanfFloatExhaustiveTest =
+ LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Atan, atanf_fast,
+ 4>;
+
+// Range: [0, Inf];
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcAtanfFloatExhaustiveTest, PostiveRange) {
+ std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+ << POS_START << ", 0x" << POS_STOP << ") --" << std::dec
+ << std::endl;
+ test_full_range(mpfr::RoundingMode::Nearest, POS_START, POS_STOP);
+}
+
+// Range: [-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcAtanfFloatExhaustiveTest, NegativeRange) {
+ std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+ << NEG_START << ", 0x" << NEG_STOP << ") --" << std::dec
+ << std::endl;
+ test_full_range(mpfr::RoundingMode::Nearest, NEG_START, NEG_STOP);
+}
|
petrhosek
approved these changes
Nov 20, 2025
aadeshps-mcw
pushed a commit
to aadeshps-mcw/llvm-project
that referenced
this pull request
Nov 26, 2025
Algorithm:
```
1) atan(x) = sign(x) * atan(|x|)
2) If |x| > 1 + 1/32, atan(|x|) = pi/2 - atan(1/|x|)
3) For 1/16 < |x| < 1 + 1/32, we find k such that: | |x| - k/16 | <= 1/32.
Let y = |x| - k/16, then using the angle summation formula, we have:
atan(|x|) = atan(k/16) + atan( (|x| - k/16) / (1 + |x| * k/16) )
= atan(k/16) + atan( y / (1 + (y + k/16) * k/16 )
= atan(k/16) + atan( y / ((1 + k^2/256) + y * k/16) )
4) Let u = y / (1 + k^2/256), then we can rewritten the above as:
atan(|x|) = atan(k/16) + atan( u / (1 + u * k/16) )
~ atan(k/16) + (u - k/16 * u^2 + (k^2/256 - 1/3) * u^3 +
+ (k/16 - (k/16)^3) * u^4) + O(u^5)
```
With all the computations are done in single precision (float), the
total of approximation errors and rounding errors is bounded by 4 ULPs.
Priyanshu3820
pushed a commit
to Priyanshu3820/llvm-project
that referenced
this pull request
Nov 26, 2025
Algorithm:
```
1) atan(x) = sign(x) * atan(|x|)
2) If |x| > 1 + 1/32, atan(|x|) = pi/2 - atan(1/|x|)
3) For 1/16 < |x| < 1 + 1/32, we find k such that: | |x| - k/16 | <= 1/32.
Let y = |x| - k/16, then using the angle summation formula, we have:
atan(|x|) = atan(k/16) + atan( (|x| - k/16) / (1 + |x| * k/16) )
= atan(k/16) + atan( y / (1 + (y + k/16) * k/16 )
= atan(k/16) + atan( y / ((1 + k^2/256) + y * k/16) )
4) Let u = y / (1 + k^2/256), then we can rewritten the above as:
atan(|x|) = atan(k/16) + atan( u / (1 + u * k/16) )
~ atan(k/16) + (u - k/16 * u^2 + (k^2/256 - 1/3) * u^3 +
+ (k/16 - (k/16)^3) * u^4) + O(u^5)
```
With all the computations are done in single precision (float), the
total of approximation errors and rounding errors is bounded by 4 ULPs.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Algorithm:
With all the computations are done in single precision (float), the total of approximation errors and rounding errors is bounded by 4 ULPs.