[libc][math] Implement double precision log2 function correctly round…

…ed to all rounding modes. Implement double precision log2 function correctly rounded to all rounding modes. See https://reviews.llvm.org/D150014 for a more detail description of the algorithm. **Performance** - For `0.5 <= x <= 2`, the fast pass hitting rate is about 99.91%. - Reciprocal throughput from CORE-MATH's perf tool on Ryzen 5900X: ``` $ ./perf.sh log2 GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH reciprocal throughput -- with FMA [####################] 100 % Ntrial = 20 ; Min = 15.458 + 0.204 clc/call; Median-Min = 0.224 clc/call; Max = 15.867 clc/call; -- CORE-MATH reciprocal throughput -- without FMA (-march=x86-64-v2) [####################] 100 % Ntrial = 20 ; Min = 23.711 + 0.524 clc/call; Median-Min = 0.443 clc/call; Max = 25.307 clc/call; -- System LIBC reciprocal throughput -- [####################] 100 % Ntrial = 20 ; Min = 14.807 + 0.199 clc/call; Median-Min = 0.211 clc/call; Max = 15.137 clc/call; -- LIBC reciprocal throughput -- with FMA [####################] 100 % Ntrial = 20 ; Min = 17.666 + 0.274 clc/call; Median-Min = 0.298 clc/call; Max = 18.531 clc/call; -- LIBC reciprocal throughput -- without FMA [####################] 100 % Ntrial = 20 ; Min = 26.534 + 0.418 clc/call; Median-Min = 0.462 clc/call; Max = 27.327 clc/call; ``` - Latency from CORE-MATH's perf tool on Ryzen 5900X: ``` $ ./perf.sh log2 --latency GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- with FMA [####################] 100 % Ntrial = 20 ; Min = 46.048 + 1.643 clc/call; Median-Min = 1.694 clc/call; Max = 48.018 clc/call; -- CORE-MATH latency -- without FMA (-march=x86-64-v2) [####################] 100 % Ntrial = 20 ; Min = 62.333 + 0.138 clc/call; Median-Min = 0.119 clc/call; Max = 62.583 clc/call; -- System LIBC latency -- [####################] 100 % Ntrial = 20 ; Min = 45.206 + 1.503 clc/call; Median-Min = 1.467 clc/call; Max = 47.229 clc/call; -- LIBC latency -- with FMA [####################] 100 % Ntrial = 20 ; Min = 43.042 + 0.454 clc/call; Median-Min = 0.484 clc/call; Max = 43.912 clc/call; -- LIBC latency -- without FMA [####################] 100 % Ntrial = 20 ; Min = 57.016 + 1.636 clc/call; Median-Min = 1.655 clc/call; Max = 58.816 clc/call; ``` - Accurate pass latency: ``` $ ./perf.sh log2 --latency --simple_stat GNU libc version: 2.35 GNU libc release: stable -- CORE-MATH latency -- with FMA 177.632 -- CORE-MATH latency -- without FMA (-march=x86-64-v2) 231.332 -- LIBC latency -- with FMA 459.751 -- LIBC latency -- without FMA 463.850 ``` Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D150374
llvm · May 23, 2023 · 111d274 · 111d274
1 parent 34c7f2a
commit 111d274
Show file tree

Hide file tree

Showing 13 changed files with 1,158 additions and 5 deletions.
diff --git a/libc/config/darwin/arm/entrypoints.txt b/libc/config/darwin/arm/entrypoints.txt
@@ -177,6 +177,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.log10
     libc.src.math.log10f
     libc.src.math.log1pf
+    libc.src.math.log2
     libc.src.math.log2f
     libc.src.math.log
     libc.src.math.logf

diff --git a/libc/config/linux/aarch64/entrypoints.txt b/libc/config/linux/aarch64/entrypoints.txt
@@ -288,6 +288,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.log10
     libc.src.math.log10f
     libc.src.math.log1pf
+    libc.src.math.log2
     libc.src.math.log2f
     libc.src.math.log
     libc.src.math.logf

diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt
@@ -293,6 +293,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.log10
     libc.src.math.log10f
     libc.src.math.log1pf
+    libc.src.math.log2
     libc.src.math.log2f
     libc.src.math.log
     libc.src.math.logf

diff --git a/libc/config/windows/entrypoints.txt b/libc/config/windows/entrypoints.txt
@@ -170,6 +170,7 @@ set(TARGET_LIBM_ENTRYPOINTS
     libc.src.math.log10
     libc.src.math.log10f
     libc.src.math.log1pf
+    libc.src.math.log2
     libc.src.math.log2f
     libc.src.math.log
     libc.src.math.logf

diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td
@@ -410,6 +410,7 @@ def StdC : StandardSpec<"stdc"> {
 
           FunctionSpec<"log1pf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
 
+          FunctionSpec<"log2", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
           FunctionSpec<"log2f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,
 
           FunctionSpec<"log", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,

diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt
@@ -116,6 +116,7 @@ add_math_entrypoint_object(log10f)
 
 add_math_entrypoint_object(log1pf)
 
+add_math_entrypoint_object(log2)
 add_math_entrypoint_object(log2f)
 
 add_math_entrypoint_object(log)

diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
@@ -832,6 +832,26 @@ add_entrypoint_object(
     -O3
 )
 
+add_entrypoint_object(
+  log2
+  SRCS
+    log2.cpp
+  HDRS
+    ../log2.h
+  DEPENDS
+    .common_constants
+    .log_range_reduction
+    libc.src.__support.FPUtil.fenv_impl
+    libc.src.__support.FPUtil.fp_bits
+    libc.src.__support.FPUtil.multiply_add
+    libc.src.__support.FPUtil.polyeval
+    libc.src.__support.FPUtil.double_double
+    libc.src.__support.FPUtil.dyadic_float
+    libc.src.__support.macros.optimization
+  COMPILE_OPTIONS
+    -O3
+)
+
 add_entrypoint_object(
   log2f
   SRCS

diff --git a/libc/src/math/generic/log2.cpp b/libc/src/math/generic/log2.cpp
diff --git a/libc/src/math/log2.h b/libc/src/math/log2.h
@@ -0,0 +1,18 @@
+//===-- Implementation header for log2 --------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC_MATH_LOG2_H
+#define LLVM_LIBC_SRC_MATH_LOG2_H
+
+namespace __llvm_libc {
+
+double log2(double x);
+
+} // namespace __llvm_libc
+
+#endif // LLVM_LIBC_SRC_MATH_LOG2_H
diff --git a/libc/src/string/memory_utils/utils.h b/libc/src/string/memory_utils/utils.h
@@ -39,20 +39,20 @@ static constexpr bool is_power2(size_t value) {
 }
 
 // Compile time version of log2 that handles 0.
-static constexpr size_t log2(size_t value) {
-  return (value == 0 || value == 1) ? 0 : 1 + log2(value / 2);
+static constexpr size_t log2s(size_t value) {
+  return (value == 0 || value == 1) ? 0 : 1 + log2s(value / 2);
 }
 
 // Returns the first power of two preceding value or value if it is already a
 // power of two (or 0 when value is 0).
 static constexpr size_t le_power2(size_t value) {
-  return value == 0 ? value : 1ULL << log2(value);
+  return value == 0 ? value : 1ULL << log2s(value);
 }
 
 // Returns the first power of two following value or value if it is already a
 // power of two (or 0 when value is 0).
 static constexpr size_t ge_power2(size_t value) {
-  return is_power2_or_zero(value) ? value : 1ULL << (log2(value) + 1);
+  return is_power2_or_zero(value) ? value : 1ULL << (log2s(value) + 1);
 }
 
 // Returns the number of bytes to substract from ptr to get to the previous

diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt
@@ -1295,6 +1295,20 @@ add_fp_unittest(
     libc.src.__support.FPUtil.fp_bits
 )
 
+add_fp_unittest(
+log2_test
+ NEED_MPFR
+ SUITE
+   libc_math_unittests
+ SRCS
+   log2_test.cpp
+ DEPENDS
+   libc.src.errno.errno
+   libc.include.math
+   libc.src.math.log2
+   libc.src.__support.FPUtil.fp_bits
+)
+
 add_fp_unittest(
   log2f_test
   NEED_MPFR

diff --git a/libc/test/src/math/log2_test.cpp b/libc/test/src/math/log2_test.cpp
@@ -0,0 +1,138 @@
+//===-- Unittests for log2 ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/FPUtil/FPBits.h"
+#include "src/errno/libc_errno.h"
+#include "src/math/log2.h"
+#include "test/UnitTest/FPMatcher.h"
+#include "test/UnitTest/Test.h"
+#include "utils/MPFRWrapper/MPFRUtils.h"
+#include <math.h>
+
+#include <errno.h>
+#include <stdint.h>
+
+namespace mpfr = __llvm_libc::testing::mpfr;
+using __llvm_libc::testing::tlog;
+
+DECLARE_SPECIAL_CONSTANTS(double)
+
+TEST(LlvmLibcLog2Test, SpecialNumbers) {
+  EXPECT_FP_EQ(aNaN, __llvm_libc::log2(aNaN));
+  EXPECT_FP_EQ(inf, __llvm_libc::log2(inf));
+  EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log2(neg_inf), FE_INVALID);
+  EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log2(0.0), FE_DIVBYZERO);
+  EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log2(-0.0), FE_DIVBYZERO);
+  EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log2(-1.0), FE_INVALID);
+  EXPECT_FP_EQ_ALL_ROUNDING(zero, __llvm_libc::log2(1.0));
+}
+
+TEST(LlvmLibcLog2Test, TrickyInputs) {
+  constexpr int N = 30;
+  constexpr uint64_t INPUTS[N] = {
+      0x3ff0000000000000, // x = 1.0
+      0x4024000000000000, // x = 10.0
+      0x4059000000000000, // x = 10^2
+      0x408f400000000000, // x = 10^3
+      0x40c3880000000000, // x = 10^4
+      0x40f86a0000000000, // x = 10^5
+      0x412e848000000000, // x = 10^6
+      0x416312d000000000, // x = 10^7
+      0x4197d78400000000, // x = 10^8
+      0x41cdcd6500000000, // x = 10^9
+      0x4202a05f20000000, // x = 10^10
+      0x42374876e8000000, // x = 10^11
+      0x426d1a94a2000000, // x = 10^12
+      0x42a2309ce5400000, // x = 10^13
+      0x42d6bcc41e900000, // x = 10^14
+      0x430c6bf526340000, // x = 10^15
+      0x4341c37937e08000, // x = 10^16
+      0x4376345785d8a000, // x = 10^17
+      0x43abc16d674ec800, // x = 10^18
+      0x43e158e460913d00, // x = 10^19
+      0x4415af1d78b58c40, // x = 10^20
+      0x444b1ae4d6e2ef50, // x = 10^21
+      0x4480f0cf064dd592, // x = 10^22
+      0x3fefffffffef06ad, 0x3fefde0f22c7d0eb, 0x225e7812faadb32f,
+      0x3fee1076964c2903, 0x3fdfe93fff7fceb0, 0x3ff012631ad8df10,
+      0x3fefbfdaa448ed98,
+  };
+  for (int i = 0; i < N; ++i) {
+    double x = double(FPBits(INPUTS[i]));
+    EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x,
+                                   __llvm_libc::log2(x), 0.5);
+  }
+}
+
+TEST(LlvmLibcLog2Test, AllExponents) {
+  double x = 0x1.0p-1074;
+  for (int i = -1074; i < 1024; ++i, x *= 2.0) {
+    ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x,
+                                   __llvm_libc::log2(x), 0.5);
+  }
+}
+
+TEST(LlvmLibcLog2Test, InDoubleRange) {
+  constexpr uint64_t COUNT = 1234561;
+  constexpr uint64_t START = 0x3FD0'0000'0000'0000ULL; // 0.25
+  constexpr uint64_t STOP = 0x4010'0000'0000'0000ULL;  // 4.0
+  // constexpr uint64_t START = 0x3FF0'0000'0000'0000ULL;  // 1.0
+  // constexpr uint64_t STOP = 0x4000'0000'0000'0000ULL;   // 2.0
+  constexpr uint64_t STEP = (STOP - START) / COUNT;
+
+  auto test = [&](mpfr::RoundingMode rounding_mode) {
+    mpfr::ForceRoundingMode __r(rounding_mode);
+    uint64_t fails = 0;
+    uint64_t count = 0;
+    uint64_t cc = 0;
+    double mx, mr = 0.0;
+    double tol = 0.5;
+
+    for (uint64_t i = 0, v = START; i <= COUNT; ++i, v += STEP) {
+      double x = FPBits(v).get_val();
+      if (isnan(x) || isinf(x) || x < 0.0)
+        continue;
+      libc_errno = 0;
+      double result = __llvm_libc::log2(x);
+      ++cc;
+      if (isnan(result) || isinf(result))
+        continue;
+
+      ++count;
+      // ASSERT_MPFR_MATCH(mpfr::Operation::Log2, x, result, 0.5);
+      if (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Log2, x, result,
+                                               0.5, rounding_mode)) {
+        ++fails;
+        while (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(
+            mpfr::Operation::Log2, x, result, tol, rounding_mode)) {
+          mx = x;
+          mr = result;
+          tol *= 2.0;
+        }
+      }
+    }
+    tlog << " Log2 failed: " << fails << "/" << count << "/" << cc
+         << " tests.\n";
+    tlog << "   Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
+    if (fails) {
+      EXPECT_MPFR_MATCH(mpfr::Operation::Log2, mx, mr, 0.5, rounding_mode);
+    }
+  };
+
+  tlog << " Test Rounding To Nearest...\n";
+  test(mpfr::RoundingMode::Nearest);
+
+  tlog << " Test Rounding Downward...\n";
+  test(mpfr::RoundingMode::Downward);
+
+  tlog << " Test Rounding Upward...\n";
+  test(mpfr::RoundingMode::Upward);
+
+  tlog << " Test Rounding Toward Zero...\n";
+  test(mpfr::RoundingMode::TowardZero);
+}
diff --git a/libc/test/src/string/memory_utils/utils_test.cpp b/libc/test/src/string/memory_utils/utils_test.cpp
@@ -45,7 +45,7 @@ TEST(LlvmLibcUtilsTest, Log2) {
       6                                               // 64
   };
   for (size_t i = 0; i < kExpectedValues.size(); ++i)
-    EXPECT_EQ(log2(i), kExpectedValues[i]);
+    EXPECT_EQ(log2s(i), kExpectedValues[i]);
 }
 
 TEST(LlvmLibcUtilsTest, LEPowerOf2) {