Skip to content

Commit

Permalink
[libc][math] Implement double precision log2 function correctly round…
Browse files Browse the repository at this point in the history
…ed to all rounding modes.

Implement double precision log2 function correctly rounded to all
rounding modes.

See https://reviews.llvm.org/D150014 for a more detail description of the algorithm.

**Performance**

  - For `0.5 <= x <= 2`, the fast pass hitting rate is about 99.91%.

  - Reciprocal throughput from CORE-MATH's perf tool on Ryzen 5900X:
```
$ ./perf.sh log2
GNU libc version: 2.35
GNU libc release: stable

-- CORE-MATH reciprocal throughput -- with FMA
[####################] 100 %
Ntrial = 20 ; Min = 15.458 + 0.204 clc/call; Median-Min = 0.224 clc/call; Max = 15.867 clc/call;

-- CORE-MATH reciprocal throughput -- without FMA (-march=x86-64-v2)
[####################] 100 %
Ntrial = 20 ; Min = 23.711 + 0.524 clc/call; Median-Min = 0.443 clc/call; Max = 25.307 clc/call;

-- System LIBC reciprocal throughput --
[####################] 100 %
Ntrial = 20 ; Min = 14.807 + 0.199 clc/call; Median-Min = 0.211 clc/call; Max = 15.137 clc/call;

-- LIBC reciprocal throughput -- with FMA
[####################] 100 %
Ntrial = 20 ; Min = 17.666 + 0.274 clc/call; Median-Min = 0.298 clc/call; Max = 18.531 clc/call;

-- LIBC reciprocal throughput -- without FMA
[####################] 100 %
Ntrial = 20 ; Min = 26.534 + 0.418 clc/call; Median-Min = 0.462 clc/call; Max = 27.327 clc/call;

```
  - Latency from CORE-MATH's perf tool on Ryzen 5900X:
```
$ ./perf.sh log2 --latency
GNU libc version: 2.35
GNU libc release: stable

-- CORE-MATH latency -- with FMA
[####################] 100 %
Ntrial = 20 ; Min = 46.048 + 1.643 clc/call; Median-Min = 1.694 clc/call; Max = 48.018 clc/call;

-- CORE-MATH latency -- without FMA (-march=x86-64-v2)
[####################] 100 %
Ntrial = 20 ; Min = 62.333 + 0.138 clc/call; Median-Min = 0.119 clc/call; Max = 62.583 clc/call;

-- System LIBC latency --
[####################] 100 %
Ntrial = 20 ; Min = 45.206 + 1.503 clc/call; Median-Min = 1.467 clc/call; Max = 47.229 clc/call;

-- LIBC latency -- with FMA
[####################] 100 %
Ntrial = 20 ; Min = 43.042 + 0.454 clc/call; Median-Min = 0.484 clc/call; Max = 43.912 clc/call;

-- LIBC latency -- without FMA
[####################] 100 %
Ntrial = 20 ; Min = 57.016 + 1.636 clc/call; Median-Min = 1.655 clc/call; Max = 58.816 clc/call;
```
  - Accurate pass latency:
```
$ ./perf.sh log2 --latency --simple_stat
GNU libc version: 2.35
GNU libc release: stable

-- CORE-MATH latency -- with FMA
177.632

-- CORE-MATH latency -- without FMA (-march=x86-64-v2)
231.332

-- LIBC latency -- with FMA
459.751

-- LIBC latency -- without FMA
463.850
```

Reviewed By: zimmermann6

Differential Revision: https://reviews.llvm.org/D150374
  • Loading branch information
lntue committed May 23, 2023
1 parent 34c7f2a commit 111d274
Show file tree
Hide file tree
Showing 13 changed files with 1,158 additions and 5 deletions.
1 change: 1 addition & 0 deletions libc/config/darwin/arm/entrypoints.txt
Expand Up @@ -177,6 +177,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.log10
libc.src.math.log10f
libc.src.math.log1pf
libc.src.math.log2
libc.src.math.log2f
libc.src.math.log
libc.src.math.logf
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/aarch64/entrypoints.txt
Expand Up @@ -288,6 +288,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.log10
libc.src.math.log10f
libc.src.math.log1pf
libc.src.math.log2
libc.src.math.log2f
libc.src.math.log
libc.src.math.logf
Expand Down
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Expand Up @@ -293,6 +293,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.log10
libc.src.math.log10f
libc.src.math.log1pf
libc.src.math.log2
libc.src.math.log2f
libc.src.math.log
libc.src.math.logf
Expand Down
1 change: 1 addition & 0 deletions libc/config/windows/entrypoints.txt
Expand Up @@ -170,6 +170,7 @@ set(TARGET_LIBM_ENTRYPOINTS
libc.src.math.log10
libc.src.math.log10f
libc.src.math.log1pf
libc.src.math.log2
libc.src.math.log2f
libc.src.math.log
libc.src.math.logf
Expand Down
1 change: 1 addition & 0 deletions libc/spec/stdc.td
Expand Up @@ -410,6 +410,7 @@ def StdC : StandardSpec<"stdc"> {

FunctionSpec<"log1pf", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

FunctionSpec<"log2", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
FunctionSpec<"log2f", RetValSpec<FloatType>, [ArgSpec<FloatType>]>,

FunctionSpec<"log", RetValSpec<DoubleType>, [ArgSpec<DoubleType>]>,
Expand Down
1 change: 1 addition & 0 deletions libc/src/math/CMakeLists.txt
Expand Up @@ -116,6 +116,7 @@ add_math_entrypoint_object(log10f)

add_math_entrypoint_object(log1pf)

add_math_entrypoint_object(log2)
add_math_entrypoint_object(log2f)

add_math_entrypoint_object(log)
Expand Down
20 changes: 20 additions & 0 deletions libc/src/math/generic/CMakeLists.txt
Expand Up @@ -832,6 +832,26 @@ add_entrypoint_object(
-O3
)

add_entrypoint_object(
log2
SRCS
log2.cpp
HDRS
../log2.h
DEPENDS
.common_constants
.log_range_reduction
libc.src.__support.FPUtil.fenv_impl
libc.src.__support.FPUtil.fp_bits
libc.src.__support.FPUtil.multiply_add
libc.src.__support.FPUtil.polyeval
libc.src.__support.FPUtil.double_double
libc.src.__support.FPUtil.dyadic_float
libc.src.__support.macros.optimization
COMPILE_OPTIONS
-O3
)

add_entrypoint_object(
log2f
SRCS
Expand Down
957 changes: 957 additions & 0 deletions libc/src/math/generic/log2.cpp

Large diffs are not rendered by default.

18 changes: 18 additions & 0 deletions libc/src/math/log2.h
@@ -0,0 +1,18 @@
//===-- Implementation header for log2 --------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_MATH_LOG2_H
#define LLVM_LIBC_SRC_MATH_LOG2_H

namespace __llvm_libc {

double log2(double x);

} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_MATH_LOG2_H
8 changes: 4 additions & 4 deletions libc/src/string/memory_utils/utils.h
Expand Up @@ -39,20 +39,20 @@ static constexpr bool is_power2(size_t value) {
}

// Compile time version of log2 that handles 0.
static constexpr size_t log2(size_t value) {
return (value == 0 || value == 1) ? 0 : 1 + log2(value / 2);
static constexpr size_t log2s(size_t value) {
return (value == 0 || value == 1) ? 0 : 1 + log2s(value / 2);
}

// Returns the first power of two preceding value or value if it is already a
// power of two (or 0 when value is 0).
static constexpr size_t le_power2(size_t value) {
return value == 0 ? value : 1ULL << log2(value);
return value == 0 ? value : 1ULL << log2s(value);
}

// Returns the first power of two following value or value if it is already a
// power of two (or 0 when value is 0).
static constexpr size_t ge_power2(size_t value) {
return is_power2_or_zero(value) ? value : 1ULL << (log2(value) + 1);
return is_power2_or_zero(value) ? value : 1ULL << (log2s(value) + 1);
}

// Returns the number of bytes to substract from ptr to get to the previous
Expand Down
14 changes: 14 additions & 0 deletions libc/test/src/math/CMakeLists.txt
Expand Up @@ -1295,6 +1295,20 @@ add_fp_unittest(
libc.src.__support.FPUtil.fp_bits
)

add_fp_unittest(
log2_test
NEED_MPFR
SUITE
libc_math_unittests
SRCS
log2_test.cpp
DEPENDS
libc.src.errno.errno
libc.include.math
libc.src.math.log2
libc.src.__support.FPUtil.fp_bits
)

add_fp_unittest(
log2f_test
NEED_MPFR
Expand Down
138 changes: 138 additions & 0 deletions libc/test/src/math/log2_test.cpp
@@ -0,0 +1,138 @@
//===-- Unittests for log2 ------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/FPUtil/FPBits.h"
#include "src/errno/libc_errno.h"
#include "src/math/log2.h"
#include "test/UnitTest/FPMatcher.h"
#include "test/UnitTest/Test.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
#include <math.h>

#include <errno.h>
#include <stdint.h>

namespace mpfr = __llvm_libc::testing::mpfr;
using __llvm_libc::testing::tlog;

DECLARE_SPECIAL_CONSTANTS(double)

TEST(LlvmLibcLog2Test, SpecialNumbers) {
EXPECT_FP_EQ(aNaN, __llvm_libc::log2(aNaN));
EXPECT_FP_EQ(inf, __llvm_libc::log2(inf));
EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log2(neg_inf), FE_INVALID);
EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log2(0.0), FE_DIVBYZERO);
EXPECT_FP_EQ_WITH_EXCEPTION(neg_inf, __llvm_libc::log2(-0.0), FE_DIVBYZERO);
EXPECT_FP_IS_NAN_WITH_EXCEPTION(__llvm_libc::log2(-1.0), FE_INVALID);
EXPECT_FP_EQ_ALL_ROUNDING(zero, __llvm_libc::log2(1.0));
}

TEST(LlvmLibcLog2Test, TrickyInputs) {
constexpr int N = 30;
constexpr uint64_t INPUTS[N] = {
0x3ff0000000000000, // x = 1.0
0x4024000000000000, // x = 10.0
0x4059000000000000, // x = 10^2
0x408f400000000000, // x = 10^3
0x40c3880000000000, // x = 10^4
0x40f86a0000000000, // x = 10^5
0x412e848000000000, // x = 10^6
0x416312d000000000, // x = 10^7
0x4197d78400000000, // x = 10^8
0x41cdcd6500000000, // x = 10^9
0x4202a05f20000000, // x = 10^10
0x42374876e8000000, // x = 10^11
0x426d1a94a2000000, // x = 10^12
0x42a2309ce5400000, // x = 10^13
0x42d6bcc41e900000, // x = 10^14
0x430c6bf526340000, // x = 10^15
0x4341c37937e08000, // x = 10^16
0x4376345785d8a000, // x = 10^17
0x43abc16d674ec800, // x = 10^18
0x43e158e460913d00, // x = 10^19
0x4415af1d78b58c40, // x = 10^20
0x444b1ae4d6e2ef50, // x = 10^21
0x4480f0cf064dd592, // x = 10^22
0x3fefffffffef06ad, 0x3fefde0f22c7d0eb, 0x225e7812faadb32f,
0x3fee1076964c2903, 0x3fdfe93fff7fceb0, 0x3ff012631ad8df10,
0x3fefbfdaa448ed98,
};
for (int i = 0; i < N; ++i) {
double x = double(FPBits(INPUTS[i]));
EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x,
__llvm_libc::log2(x), 0.5);
}
}

TEST(LlvmLibcLog2Test, AllExponents) {
double x = 0x1.0p-1074;
for (int i = -1074; i < 1024; ++i, x *= 2.0) {
ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Log2, x,
__llvm_libc::log2(x), 0.5);
}
}

TEST(LlvmLibcLog2Test, InDoubleRange) {
constexpr uint64_t COUNT = 1234561;
constexpr uint64_t START = 0x3FD0'0000'0000'0000ULL; // 0.25
constexpr uint64_t STOP = 0x4010'0000'0000'0000ULL; // 4.0
// constexpr uint64_t START = 0x3FF0'0000'0000'0000ULL; // 1.0
// constexpr uint64_t STOP = 0x4000'0000'0000'0000ULL; // 2.0
constexpr uint64_t STEP = (STOP - START) / COUNT;

auto test = [&](mpfr::RoundingMode rounding_mode) {
mpfr::ForceRoundingMode __r(rounding_mode);
uint64_t fails = 0;
uint64_t count = 0;
uint64_t cc = 0;
double mx, mr = 0.0;
double tol = 0.5;

for (uint64_t i = 0, v = START; i <= COUNT; ++i, v += STEP) {
double x = FPBits(v).get_val();
if (isnan(x) || isinf(x) || x < 0.0)
continue;
libc_errno = 0;
double result = __llvm_libc::log2(x);
++cc;
if (isnan(result) || isinf(result))
continue;

++count;
// ASSERT_MPFR_MATCH(mpfr::Operation::Log2, x, result, 0.5);
if (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(mpfr::Operation::Log2, x, result,
0.5, rounding_mode)) {
++fails;
while (!EXPECT_MPFR_MATCH_ROUNDING_SILENTLY(
mpfr::Operation::Log2, x, result, tol, rounding_mode)) {
mx = x;
mr = result;
tol *= 2.0;
}
}
}
tlog << " Log2 failed: " << fails << "/" << count << "/" << cc
<< " tests.\n";
tlog << " Max ULPs is at most: " << static_cast<uint64_t>(tol) << ".\n";
if (fails) {
EXPECT_MPFR_MATCH(mpfr::Operation::Log2, mx, mr, 0.5, rounding_mode);
}
};

tlog << " Test Rounding To Nearest...\n";
test(mpfr::RoundingMode::Nearest);

tlog << " Test Rounding Downward...\n";
test(mpfr::RoundingMode::Downward);

tlog << " Test Rounding Upward...\n";
test(mpfr::RoundingMode::Upward);

tlog << " Test Rounding Toward Zero...\n";
test(mpfr::RoundingMode::TowardZero);
}
2 changes: 1 addition & 1 deletion libc/test/src/string/memory_utils/utils_test.cpp
Expand Up @@ -45,7 +45,7 @@ TEST(LlvmLibcUtilsTest, Log2) {
6 // 64
};
for (size_t i = 0; i < kExpectedValues.size(); ++i)
EXPECT_EQ(log2(i), kExpectedValues[i]);
EXPECT_EQ(log2s(i), kExpectedValues[i]);
}

TEST(LlvmLibcUtilsTest, LEPowerOf2) {
Expand Down

0 comments on commit 111d274

Please sign in to comment.