Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[libc] Implement sinf function that is correctly rounded to all round…
…ing modes. Implement sinf function that is correctly rounded to all rounding modes. - We use a simple range reduction for `pi/16 < |x|` : Let `k = round(x / pi)` and `y = (x/pi) - k`. So `k` is an integer and `-0.5 <= y <= 0.5`. Then ``` sin(x) = sin(y*pi + k*pi) = (-1)^(k & 1) * sin(y*pi) ~ (-1)^(k & 1) * y * P(y^2) ``` where `y*P(y^2)` is a degree-15 minimax polynomial generated by Sollya with: ``` > P = fpminimax(sin(x*pi)/x, [|0, 2, 4, 6, 8, 10, 12, 14|], [|D...|], [0, 0.5]); ``` - Performance benchmark using perf tool from CORE-MATH project (https://gitlab.inria.fr/core-math/core-math/-/tree/master) on Ryzen 1700: Before this patch (not correctly rounded): ``` $ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh sinf CORE-MATH reciprocal throughput : 17.892 System LIBC reciprocal throughput : 25.559 LIBC reciprocal throughput : 29.381 ``` After this patch (correctly rounded): ``` $ CORE_MATH_PERF_MODE="rdtsc" ./perf.sh sinf CORE-MATH reciprocal throughput : 17.896 System LIBC reciprocal throughput : 25.740 LIBC reciprocal throughput : 27.872 LIBC reciprocal throughput : 20.012 (with `-msse4.2` flag) LIBC reciprocal throughput : 14.244 (with `-mfma` flag) ``` Reviewed By: zimmermann6 Differential Revision: https://reviews.llvm.org/D123154
- Loading branch information
Showing
12 changed files
with
669 additions
and
75 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
//===-- Common header for helpers to set exceptional values -----*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H | ||
#define LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H | ||
|
||
#include "FEnvImpl.h" | ||
#include "FPBits.h" | ||
|
||
namespace __llvm_libc { | ||
|
||
namespace fputil { | ||
|
||
template <typename T, int N> struct ExceptionalValues { | ||
using UIntType = typename FPBits<T>::UIntType; | ||
static constexpr int SIZE = N; | ||
// Input bits. | ||
UIntType inputs[SIZE]; | ||
// Output bits contains 4 values: | ||
// output[i][0]: output bits corresponding to FE_TOWARDZERO | ||
// output[i][1]: offset for FE_UPWARD | ||
// output[i][2]: offset for FE_DOWNWARD | ||
// output[i][3]: offset for FE_TONEAREST | ||
UIntType outputs[SIZE][4]; | ||
}; | ||
|
||
template <typename T, int N> struct ExceptionChecker { | ||
using UIntType = typename FPBits<T>::UIntType; | ||
using FPBits = FPBits<T>; | ||
using ExceptionalValues = ExceptionalValues<T, N>; | ||
|
||
static bool check_odd_func(const ExceptionalValues &ExceptVals, | ||
UIntType x_abs, bool sign, T &result) { | ||
for (int i = 0; i < N; ++i) { | ||
if (unlikely(x_abs == ExceptVals.inputs[i])) { | ||
UIntType out_bits = ExceptVals.outputs[i][0]; // FE_TOWARDZERO | ||
switch (fputil::get_round()) { | ||
case FE_UPWARD: | ||
out_bits += | ||
sign ? ExceptVals.outputs[i][2] : ExceptVals.outputs[i][1]; | ||
break; | ||
case FE_DOWNWARD: | ||
out_bits += | ||
sign ? ExceptVals.outputs[i][1] : ExceptVals.outputs[i][2]; | ||
break; | ||
case FE_TONEAREST: | ||
out_bits += ExceptVals.outputs[i][3]; | ||
break; | ||
} | ||
result = FPBits(out_bits).get_val(); | ||
if (sign) | ||
result = -result; | ||
|
||
return true; | ||
} | ||
} | ||
return false; | ||
} | ||
}; | ||
|
||
} // namespace fputil | ||
|
||
} // namespace __llvm_libc | ||
|
||
#endif // LLVM_LIBC_SRC_SUPPORT_FPUTIL_EXCEPT_VALUE_UTILS_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,131 @@ | ||
//===-- Utilities for trigonometric functions -------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H | ||
#define LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H | ||
|
||
#include "src/__support/FPUtil/FPBits.h" | ||
#include "src/__support/FPUtil/except_value_utils.h" | ||
#include "src/__support/FPUtil/multiply_add.h" | ||
#include "src/__support/FPUtil/nearest_integer.h" | ||
|
||
namespace __llvm_libc { | ||
|
||
namespace generic { | ||
|
||
static constexpr uint32_t FAST_PASS_BOUND = 0x4c80'0000U; // 2^26 | ||
|
||
static constexpr int N_ENTRIES = 8; | ||
|
||
// We choose to split bits of 1/pi into 28-bit precision pieces, so that the | ||
// product of x * ONE_OVER_PI_28[i] is exact. | ||
// These are generated by Sollya with: | ||
// > a1 = D(round(1/pi, 28, RN)); a1; | ||
// > a2 = D(round(1/pi - a1, 28, RN)); a2; | ||
// > a3 = D(round(1/pi - a1 - a2, 28, RN)); a3; | ||
// > a4 = D(round(1/pi - a1 - a2 - a3, 28, RN)); a4; | ||
// ... | ||
static constexpr double ONE_OVER_PI_28[N_ENTRIES] = { | ||
0x1.45f306ep-2, -0x1.b1bbeaep-33, 0x1.3f84ebp-62, -0x1.7056592p-92, | ||
0x1.c0db62ap-121, -0x1.4cd8778p-150, -0x1.bef806cp-179, 0x1.63abdecp-209}; | ||
|
||
// Exponents of the least significant bits of the corresponding entries in | ||
// ONE_OVER_PI_28. | ||
static constexpr int ONE_OVER_PI_28_LSB_EXP[N_ENTRIES] = { | ||
-29, -60, -86, -119, -148, -175, -205, -235}; | ||
|
||
// Return (k mod 2) and y, where | ||
// k = round(x / pi) and y = (x / pi) - k. | ||
static inline int64_t small_range_reduction(double x, double &y) { | ||
double prod = x * ONE_OVER_PI_28[0]; | ||
double kd = fputil::nearest_integer(prod); | ||
y = prod - kd; | ||
y = fputil::multiply_add(x, ONE_OVER_PI_28[1], y); | ||
y = fputil::multiply_add(x, ONE_OVER_PI_28[2], y); | ||
return static_cast<int64_t>(kd); | ||
} | ||
|
||
// Return k and y, where | ||
// k = round(x / pi) and y = (x / pi) - k. | ||
// For large range, there are at most 2 parts of ONE_OVER_PI_28 contributing to | ||
// the unit binary digit (k & 1). If the least significant bit of x * the least | ||
// significant bit of ONE_OVER_PI_28[i] > 1, we can completely ignore | ||
// ONE_OVER_PI_28[i]. | ||
static inline int64_t large_range_reduction(double x, int x_exp, double &y) { | ||
int idx = 0; | ||
y = 0; | ||
int x_lsb_exp = x_exp - fputil::FloatProperties<float>::MANTISSA_WIDTH; | ||
|
||
// Skipping the first parts of 1/pi such that: | ||
// LSB of x * LSB of ONE_OVER_PI_28[i] > 1. | ||
while (x_lsb_exp + ONE_OVER_PI_28_LSB_EXP[idx] > 0) | ||
++idx; | ||
|
||
double prod_hi = x * ONE_OVER_PI_28[idx]; | ||
// Get the integral part of x * ONE_OVER_PI_28[idx] | ||
double k_hi = fputil::nearest_integer(prod_hi); | ||
// Get the fractional part of x * ONE_OVER_PI_28[idx] | ||
double frac = prod_hi - k_hi; | ||
double prod_lo = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 1], frac); | ||
double k_lo = fputil::nearest_integer(prod_lo); | ||
|
||
// Now y is the fractional parts. | ||
y = prod_lo - k_lo; | ||
y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 2], y); | ||
y = fputil::multiply_add(x, ONE_OVER_PI_28[idx + 3], y); | ||
|
||
return static_cast<int64_t>(k_hi + k_lo); | ||
} | ||
|
||
// Exceptional cases. | ||
static constexpr int N_EXCEPT_SMALL = 4; | ||
|
||
static constexpr fputil::ExceptionalValues<float, N_EXCEPT_SMALL> SmallExcepts{ | ||
/* inputs */ { | ||
0x3fa7832a, // x = 0x1.4f0654p0 | ||
0x46199998, // x = 0x1.33333p13 | ||
0x4afdece4, // x = 0x1.fbd9c8p22 | ||
0x4c2332e9, // x = 0x1.4665d2p25 | ||
}, | ||
/* outputs (RZ, RU offset, RD offset, RN offset) */ | ||
{ | ||
{0x3f7741b5, 1, 0, 1}, // x = 0x1.4f0654p0, sin(x) = 0x1.ee836ap-1 (RZ) | ||
{0xbeb1fa5d, 0, 1, 0}, // x = 0x1.33333p13, sin(x) = -0x1.63f4bap-2 (RZ) | ||
{0xbf7fb6e0, 0, 1, 1}, // x = 0x1.fbd9c8p22, sin(x) = -0x1.ff6dcp-1 (RZ) | ||
{0xbf7fffff, 0, 1, | ||
1}, // x = 0x1.4665d2p25, sin(x) = -0x1.fffffep-1 (RZ) | ||
}}; | ||
|
||
static constexpr int N_EXCEPT_LARGE = 5; | ||
|
||
static constexpr fputil::ExceptionalValues<float, N_EXCEPT_LARGE> LargeExcepts{ | ||
/* inputs */ { | ||
0x523947f6, // x = 0x1.728fecp37 | ||
0x53b146a6, // x = 0x1.628d4cp40 | ||
0x55cafb2a, // x = 0x1.95f654p44 | ||
0x6a1976f1, // x = 0x1.32ede2p85 | ||
0x77584625, // x = 0x1.b08c4ap111 | ||
}, | ||
/* outputs (RZ, RU offset, RD offset, RN offset) */ | ||
{ | ||
{0xbf12791d, 0, 1, | ||
1}, // x = 0x1.728fecp37, sin(x) = -0x1.24f23ap-1 (RZ) | ||
{0xbf7fffff, 0, 1, | ||
1}, // x = 0x1.628d4cp40, sin(x) = -0x1.fffffep-1 (RZ) | ||
{0xbf7e7a16, 0, 1, | ||
1}, // x = 0x1.95f654p44, sin(x) = -0x1.fcf42cp-1 (RZ) | ||
{0x3f7fffff, 1, 0, 1}, // x = 0x1.32ede2p85, sin(x) = 0x1.fffffep-1 (RZ) | ||
{0xbf7fffff, 0, 1, | ||
1}, // x = 0x1.b08c4ap111, sin(x) = -0x1.fffffep-1 (RZ) | ||
}}; | ||
|
||
} // namespace generic | ||
|
||
} // namespace __llvm_libc | ||
|
||
#endif // LLVM_LIBC_SRC_MATH_GENERIC_RANGE_REDUCTION_H |
Oops, something went wrong.