[libc][math] Add float-only implementation for atanf. (#167004)

lntue · web-flow · commit aa3f930931e6 · 2025-11-20T09:42:13.000-05:00
Algorithm:
```
  1)  atan(x) = sign(x) * atan(|x|)

  2)  If |x| &gt; 1 + 1/32, atan(|x|) = pi/2 - atan(1/|x|)

  3)  For 1/16 &lt; |x| &lt; 1 + 1/32, we find k such that: | |x| - k/16 | &lt;= 1/32.
      Let y = |x| - k/16, then using the angle summation formula, we have:
    atan(|x|) = atan(k/16) + atan( (|x| - k/16) / (1 + |x| * k/16) )
              = atan(k/16) + atan( y / (1 + (y + k/16) * k/16 )
              = atan(k/16) + atan( y / ((1 + k^2/256) + y * k/16) )

  4)  Let u = y / (1 + k^2/256), then we can rewritten the above as:
    atan(|x|) = atan(k/16) + atan( u / (1 + u * k/16) )
              ~ atan(k/16) + (u - k/16 * u^2 + (k^2/256 - 1/3) * u^3 +
                              + (k/16 - (k/16)^3) * u^4) + O(u^5)
```

With all the computations are done in single precision (float), the
total of approximation errors and rounding errors is bounded by 4 ULPs.
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
@@ -248,6 +248,7 @@ add_header_library(
 add_header_library(
   atanf
   HDRS
+    atanf_float.h
     atanf.h
   DEPENDS
     .inv_trigf_utils
diff --git a/libc/src/__support/math/atanf.h b/libc/src/__support/math/atanf.h
@@ -18,6 +18,14 @@
 #include "src/__support/macros/config.h"
 #include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
 
+#if defined(LIBC_MATH_HAS_SKIP_ACCURATE_PASS) &&                               \
+    defined(LIBC_MATH_HAS_INTERMEDIATE_COMP_IN_FLOAT)
+
+// We use float-float implementation to reduce size.
+#include "atanf_float.h"
+
+#else
+
 namespace LIBC_NAMESPACE_DECL {
 
 namespace math {
@@ -126,4 +134,6 @@ LIBC_INLINE static constexpr float atanf(float x) {
 
 } // namespace LIBC_NAMESPACE_DECL
 
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
 #endif // LLVM_LIBC_SRC___SUPPORT_MATH_ATANF_H
diff --git a/libc/src/__support/math/atanf_float.h b/libc/src/__support/math/atanf_float.h
@@ -0,0 +1,168 @@
+//===-- Single-precision atanf float function -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H
+#define LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H
+
+#include "src/__support/FPUtil/FEnvImpl.h"
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/FPUtil/double_double.h"
+#include "src/__support/FPUtil/multiply_add.h"
+#include "src/__support/FPUtil/nearest_integer.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+namespace atanf_internal {
+
+using fputil::FloatFloat;
+// atan(i/64) with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+//     a = round(atan(i/16), SG, RN);
+//     b = round(atan(i/16) - a, SG, RN);
+//     print("{", b, ",", a, "},");
+//   };
+static constexpr FloatFloat ATAN_I[17] = {
+    {0.0f, 0.0f},
+    {-0x1.1a6042p-30f, 0x1.ff55bcp-5f},
+    {-0x1.54f424p-30f, 0x1.fd5baap-4f},
+    {0x1.79cb6p-28f, 0x1.7b97b4p-3f},
+    {-0x1.b4dfc8p-29f, 0x1.f5b76p-3f},
+    {-0x1.1f0286p-27f, 0x1.362774p-2f},
+    {0x1.e4defp-30f, 0x1.6f6194p-2f},
+    {0x1.e611fep-29f, 0x1.a64eecp-2f},
+    {0x1.586ed4p-28f, 0x1.dac67p-2f},
+    {-0x1.6499e6p-26f, 0x1.0657eap-1f},
+    {0x1.7bdfd6p-26f, 0x1.1e00bap-1f},
+    {-0x1.98e422p-28f, 0x1.345f02p-1f},
+    {0x1.934f7p-28f, 0x1.4978fap-1f},
+    {0x1.c5a6c6p-27f, 0x1.5d5898p-1f},
+    {0x1.5e118cp-27f, 0x1.700a7cp-1f},
+    {-0x1.1d4eb6p-26f, 0x1.819d0cp-1f},
+    {-0x1.777a5cp-26f, 0x1.921fb6p-1f},
+};
+
+// 1 / (1 + (i/16)^2)  with i = 0..16, generated by Sollya with:
+// > for i from 0 to 16 do {
+//     a = round(1 / (1 + (i/16)^2), SG, RN);
+//     print(a, ",");
+//   };
+static constexpr float ATANF_REDUCED_ARG[17] = {
+    0x1.0p0f,       0x1.fe01fep-1f, 0x1.f81f82p-1f, 0x1.ee9c8p-1f,
+    0x1.e1e1e2p-1f, 0x1.d272cap-1f, 0x1.c0e07p-1f,  0x1.adbe88p-1f,
+    0x1.99999ap-1f, 0x1.84f00cp-1f, 0x1.702e06p-1f, 0x1.5babccp-1f,
+    0x1.47ae14p-1f, 0x1.34679ap-1f, 0x1.21fb78p-1f, 0x1.107fbcp-1f,
+    0x1p-1f,
+};
+
+// Approximating atan( u / (1 + u * k/16) )
+//   atan( u / (1 + u * k/16) ) / u ~ 1 - k/16 * u + (k^2/256 - 1/3) * u^2 +
+//                                    + (k/16 - (k/16)^3) * u^3 + O(u^4)
+LIBC_INLINE static float atanf_eval(float u, float k_over_16) {
+  // (k/16)^2
+  float c2 = k_over_16 * k_over_16;
+  // -(k/16)^3
+  float c3 = fputil::multiply_add(-k_over_16, c2, k_over_16);
+  float u2 = u * u;
+  // (k^2/256 - 1/3) + u * (k/16 - (k/16)^3)
+  float a0 = fputil::multiply_add(c3, u, c2 - 0x1.555556p-2f);
+  // -k/16 + u*(k^2/256 - 1/3) + u^2 * (k/16 - (k/16)^3)
+  float a1 = fputil::multiply_add(u, a0, -k_over_16);
+  // u - u^2 * k/16 + u^3 * ((k^2/256 - 1/3) + u^4 * (k/16 - (k/16)^3))
+  return fputil::multiply_add(u2, a1, u);
+}
+
+} // namespace atanf_internal
+
+// There are several range reduction steps we can take for atan2(y, x) as
+// follow:
+
+LIBC_INLINE static float atanf(float x) {
+  using namespace atanf_internal;
+  using FPBits = typename fputil::FPBits<float>;
+  using FPBits = typename fputil::FPBits<float>;
+
+  constexpr float SIGN[2] = {1.0f, -1.0f};
+  constexpr FloatFloat PI_OVER_2 = {-0x1.777a5cp-25f, 0x1.921fb6p0f};
+
+  FPBits x_bits(x);
+  Sign s = x_bits.sign();
+  float sign = SIGN[s.is_neg()];
+  uint32_t x_abs = x_bits.uintval() & 0x7fff'ffffU;
+
+  // x is inf or nan, |x| <= 2^-11 or |x|= > 2^11.
+  if (LIBC_UNLIKELY(x_abs <= 0x3a00'0000U || x_abs >= 0x4500'0000U)) {
+    if (LIBC_UNLIKELY(x_bits.is_inf()))
+      return sign * PI_OVER_2.hi;
+    // atan(NaN) = NaN
+    if (LIBC_UNLIKELY(x_bits.is_nan())) {
+      if (x_bits.is_signaling_nan()) {
+        fputil::raise_except_if_required(FE_INVALID);
+        return FPBits::quiet_nan().get_val();
+      }
+
+      return x;
+    }
+    // |x| >= 2^11:
+    // atan(x) = sign(x) * pi/2 - atan(1/x)
+    //         ~ sign(x) * pi/2 - 1/x
+    if (LIBC_UNLIKELY(x_abs >= 0x4200'0000))
+      return fputil::multiply_add(sign, PI_OVER_2.hi, -1.0f / x);
+    // x <= 2^-11:
+    // atan(x) ~ x
+    return x;
+  }
+
+  // Range reduction steps:
+  // 1)  atan(x) = sign(x) * atan(|x|)
+  // 2)  If |x| > 1, atan(|x|) = pi/2 - atan(1/|x|)
+  // 3)  For 1/16 < |x| < 1 + 1/32, we find k such that: | |x| - k/16 | <= 1/32.
+  //     Let y = |x| - k/16, then using the angle summation formula, we have:
+  //   atan(|x|) = atan(k/16) + atan( (|x| - k/16) / (1 + |x| * k/16) )
+  //             = atan(k/16) + atan( y / (1 + (y + k/16) * k/16 )
+  //             = atan(k/16) + atan( y / ((1 + k^2/256) + y * k/16) )
+  // 4)  Let u = y / (1 + k^2/256), then we can rewritten the above as:
+  //   atan(|x|) = atan(k/16) + atan( u / (1 + u * k/16) )
+  //             ~ atan(k/16) + (u - k/16 * u^2 + (k^2/256 - 1/3) * u^3 +
+  //                             + (k/16 - (k/16)^3) * u^4) + O(u^5)
+  float x_a = cpp::bit_cast<float>(x_abs);
+  // |x| > 1 + 1/32, we need to invert x, so we will perform the division in
+  // float-float.
+  if (x_abs > 0x3f84'0000U)
+    x_a = 1.0f / x_a;
+  // Perform range reduction.
+  //   k = nearestint(x * 16)
+  float k_f = fputil::nearest_integer(x_a * 0x1.0p4f);
+  unsigned idx = static_cast<unsigned>(k_f);
+  float k_over_16 = k_f * 0x1.0p-4f;
+  float y = x_a - k_over_16;
+  // u = (x - k/16) / (1 + (k/16)^2)
+  float u = y * ATANF_REDUCED_ARG[idx];
+
+  // atan(x) = sign(x) * atan(|x|)
+  //         = sign(x) * (atan(k/16) + atan(|))
+  // p ~ atan(u)
+  float p = atanf_eval(u, k_over_16);
+  // |x|  > 1 + 1/32: q ~ (pi/2 - atan(1/|x|))
+  // |x| <= 1 + 1/32: q ~ atan(|x|)
+  float q = (p + ATAN_I[idx].lo) + ATAN_I[idx].hi;
+  if (x_abs > 0x3f84'0000U)
+    q = PI_OVER_2.hi + (PI_OVER_2.lo - q);
+  // |x|  > 1 + 1/32: sign(x) * (pi/2 - atan(1/|x|))
+  // |x| <= 1 + 1/32: sign(x) * atan(|x|)
+  return sign * q;
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_SRC___SUPPORT_MATH_ATANF_FLOAT_H
diff --git a/libc/test/src/math/atanf_test.cpp b/libc/test/src/math/atanf_test.cpp
@@ -9,11 +9,18 @@
 #include "hdr/math_macros.h"
 #include "hdr/stdint_proxy.h"
 #include "src/__support/FPUtil/FPBits.h"
+#include "src/__support/macros/optimization.h"
 #include "src/math/atanf.h"
 #include "test/UnitTest/FPMatcher.h"
 #include "test/UnitTest/Test.h"
 #include "utils/MPFRWrapper/MPFRUtils.h"
 
+#ifdef LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+#define TOLERANCE 4
+#else
+#define TOLERANCE 0
+#endif // LIBC_MATH_HAS_SKIP_ACCURATE_PASS
+
 using LlvmLibcAtanfTest = LIBC_NAMESPACE::testing::FPTest<float>;
 
 namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
@@ -49,9 +56,9 @@ TEST_F(LlvmLibcAtanfTest, InFloatRange) {
   for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) {
     float x = FPBits(v).get_val();
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan, x,
-                                   LIBC_NAMESPACE::atanf(x), 0.5);
+                                   LIBC_NAMESPACE::atanf(x), TOLERANCE + 0.5);
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan, -x,
-                                   LIBC_NAMESPACE::atanf(-x), 0.5);
+                                   LIBC_NAMESPACE::atanf(-x), TOLERANCE + 0.5);
   }
 }
 
@@ -72,6 +79,6 @@ TEST_F(LlvmLibcAtanfTest, SpecialValues) {
   for (uint32_t v : val_arr) {
     float x = FPBits(v).get_val();
     EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Atan, x,
-                                   LIBC_NAMESPACE::atanf(x), 0.5);
+                                   LIBC_NAMESPACE::atanf(x), TOLERANCE + 0.5);
   }
 }
diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt
@@ -550,6 +550,21 @@ add_fp_unittest(
     -lpthread
 )
 
+add_fp_unittest(
+  atanf_float_test
+  NO_RUN_POSTBUILD
+  NEED_MPFR
+  SUITE
+    libc_math_exhaustive_tests
+  SRCS
+    atanf_float_test.cpp
+  LINK_LIBRARIES
+    -lpthread
+  DEPENDS
+    .exhaustive_test
+    libc.src.__support.math.atanf
+)
+
 add_fp_unittest(
   asinf_test
   NO_RUN_POSTBUILD
diff --git a/libc/test/src/math/exhaustive/atanf_float_test.cpp b/libc/test/src/math/exhaustive/atanf_float_test.cpp
@@ -0,0 +1,41 @@
+//===-- Exhaustive test for atanf - float-only ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "exhaustive_test.h"
+#include "src/__support/math/atanf_float.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+
+namespace mpfr = LIBC_NAMESPACE::testing::mpfr;
+
+float atanf_fast(float x) { return LIBC_NAMESPACE::math::atanf(x); }
+
+using LlvmLibcAtanfFloatExhaustiveTest =
+    LlvmLibcUnaryOpExhaustiveMathTest<float, mpfr::Operation::Atan, atanf_fast,
+                                      4>;
+
+// Range: [0, Inf];
+static constexpr uint32_t POS_START = 0x0000'0000U;
+static constexpr uint32_t POS_STOP = 0x7f80'0000U;
+
+TEST_F(LlvmLibcAtanfFloatExhaustiveTest, PostiveRange) {
+  std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+            << POS_START << ", 0x" << POS_STOP << ") --" << std::dec
+            << std::endl;
+  test_full_range(mpfr::RoundingMode::Nearest, POS_START, POS_STOP);
+}
+
+// Range: [-Inf, 0];
+static constexpr uint32_t NEG_START = 0x8000'0000U;
+static constexpr uint32_t NEG_STOP = 0xff80'0000U;
+
+TEST_F(LlvmLibcAtanfFloatExhaustiveTest, NegativeRange) {
+  std::cout << "-- Testing for FE_TONEAREST in range [0x" << std::hex
+            << NEG_START << ", 0x" << NEG_STOP << ") --" << std::dec
+            << std::endl;
+  test_full_range(mpfr::RoundingMode::Nearest, NEG_START, NEG_STOP);
+}