180 changes: 180 additions & 0 deletions libc/src/__support/FPUtil/XFloat.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
//===-- Utility class to manipulate wide floats. ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "FPBits.h"
#include "NormalFloat.h"
#include "UInt.h"

#include <stdint.h>

namespace __llvm_libc {
namespace fputil {

// Store and manipulate positive double precision numbers at |Precision| bits.
template <size_t Precision> class XFloat {
static constexpr uint64_t OneMask = (uint64_t(1) << 63);
UInt<Precision> man;
static constexpr uint64_t WordCount = Precision / 64;
int exp;

size_t bit_width(uint64_t x) {
if (x == 0)
return 0;
size_t shift = 0;
while ((OneMask & x) == 0) {
++shift;
x <<= 1;
}
return 64 - shift;
}

public:
XFloat() : exp(0) {
for (int i = 0; i < WordCount; ++i)
man[i] = 0;
}

XFloat(const XFloat &other) : exp(other.exp) {
for (int i = 0; i < WordCount; ++i)
man[i] = other.man[i];
}

explicit XFloat(double x) {
auto xn = NormalFloat<double>(x);
exp = xn.exponent;
man[WordCount - 1] = xn.mantissa << 11;
for (int i = 0; i < WordCount - 1; ++i)
man[i] = 0;
}

XFloat(int e, const UInt<Precision> &bits) : exp(e) {
for (size_t i = 0; i < WordCount; ++i)
man[i] = bits[i];
}

// Multiply this number with x and store the result in this number.
void mul(double x) {
auto xn = NormalFloat<double>(x);
exp += xn.exponent;
uint64_t carry = man.mul(xn.mantissa << 11);
size_t carry_width = bit_width(carry);
carry_width = (carry_width == 64 ? 64 : 63);
man.shift_right(carry_width);
man[WordCount - 1] = man[WordCount - 1] + (carry << (64 - carry_width));
exp += carry_width == 64 ? 1 : 0;
normalize();
}

void drop_int() {
if (exp < 0)
return;
if (exp > int(Precision - 1)) {
for (size_t i = 0; i < WordCount; ++i)
man[i] = 0;
return;
}

man.shift_left(exp + 1);
man.shift_right(exp + 1);

normalize();
}

double mul(const XFloat<Precision> &other) {
constexpr size_t row_words = 2 * WordCount + 1;
constexpr size_t row_precision = row_words * 64;
constexpr size_t result_bits = 2 * Precision;
UInt<row_precision> rows[WordCount];

for (size_t r = 0; r < WordCount; ++r) {
for (size_t i = 0; i < row_words; ++i) {
if (i < WordCount)
rows[r][i] = man[i];
else
rows[r][i] = 0;
}
rows[r].mul(other.man[r]);
rows[r].shift_left(r * 64);
}

for (size_t r = 1; r < WordCount; ++r) {
rows[0].add(rows[r]);
}
int result_exp = exp + other.exp;
uint64_t carry = rows[0][row_words - 1];
if (carry) {
size_t carry_width = bit_width(carry);
rows[0].shift_right(carry_width);
rows[0][row_words - 2] =
rows[0][row_words - 2] + (carry << (64 - carry_width));
result_exp += carry_width;
}

if (rows[0][row_words - 2] & OneMask) {
++result_exp;
} else {
rows[0].shift_left(1);
}

UInt<result_bits> result_man;
for (size_t i = 0; i < result_bits / 64; ++i)
result_man[i] = rows[0][i];
XFloat<result_bits> result(result_exp, result_man);
result.normalize();
return double(result);
}

explicit operator double() {
normalize();

constexpr uint64_t one = uint64_t(1) << 10;
constexpr uint64_t excess_mask = (one << 1) - 1;
uint64_t excess = man[WordCount - 1] & excess_mask;
uint64_t new_man = man[WordCount - 1] >> 11;
if (excess > one) {
// We have to round up.
++new_man;
} else if (excess == one) {
bool greater_than_one = false;
for (size_t i = 0; i < WordCount - 1; ++i) {
greater_than_one = (man[i] != 0);
if (greater_than_one)
break;
}
if (greater_than_one || (new_man & 1) != 0) {
++new_man;
}
}

if (new_man == (uint64_t(1) << 53))
++exp;

// We use NormalFloat as it can produce subnormal numbers or underflow to 0
// if necessary.
NormalFloat<double> d(exp, new_man, 0);
return double(d);
}

// Normalizes this number.
void normalize() {
uint64_t man_bits = 0;
for (size_t i = 0; i < WordCount; ++i)
man_bits |= man[i];

if (man_bits == 0)
return;

while ((man[WordCount - 1] & OneMask) == 0) {
man.shift_left(1);
--exp;
}
}
};

} // namespace fputil
} // namespace __llvm_libc
12 changes: 12 additions & 0 deletions libc/src/math/generic/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -992,3 +992,15 @@ add_entrypoint_object(
COMPILE_OPTIONS
-O2
)

add_object_library(
dp_trig
SRCS
dp_trig.cpp
HDRS
dp_trig.h
DEPENDS
libc.src.__support.FPUtil.fputil
COMPILE_OPTIONS
-O3
)
105 changes: 105 additions & 0 deletions libc/src/math/generic/dp_trig.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
//===-- Utilities for double precision trigonometric functions ------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/FPUtil/FPBits.h"
#include "src/__support/FPUtil/ManipulationFunctions.h"
#include "src/__support/FPUtil/UInt.h"
#include "src/__support/FPUtil/XFloat.h"

using FPBits = __llvm_libc::fputil::FPBits<double>;

namespace __llvm_libc {

// Implementation is based on the Payne and Hanek range reduction algorithm.
// The caller should ensure that x is positive.
// Consider:
// x/y = x * 1/y = I + F
// I is the integral part and F the fractional part of the result of the
// division operation. Then M = mod(x, y) = F * y. In order to compute M, we
// first compute F. We do it by dropping bits from 1/y which would only
// contribute integral results in the operation x * 1/y. This helps us get
// accurate values of F even when x is a very large number.
//
// Internal operations are performed at 192 bits of precision.
static double mod_impl(double x, const uint64_t y_bits[3],
const uint64_t inv_y_bits[20], int y_exponent,
int inv_y_exponent) {
FPBits bits(x);
int exponent = bits.getExponent();
int bit_drop = (exponent - 52) + inv_y_exponent + 1;
bit_drop = bit_drop >= 0 ? bit_drop : 0;
int word_drop = bit_drop / 64;
bit_drop %= 64;
fputil::UInt<256> man4;
for (size_t i = 0; i < 4; ++i) {
man4[3 - i] = inv_y_bits[word_drop + i];
}
man4.shift_left(bit_drop);
fputil::UInt<192> man_bits;
for (size_t i = 0; i < 3; ++i)
man_bits[i] = man4[i + 1];
fputil::XFloat<192> result(inv_y_exponent - word_drop * 64 - bit_drop,
man_bits);
result.mul(x);
result.drop_int(); // |result| now holds fractional part of x/y.

fputil::UInt<192> y_man;
for (size_t i = 0; i < 3; ++i)
y_man[i] = y_bits[2 - i];
fputil::XFloat<192> y_192(y_exponent, y_man);
return result.mul(y_192);
}

static constexpr int TwoPIExponent = 2;

// The mantissa bits of 2 * PI.
// The most signification bits are in the first uint64_t word
// and the least signification bits are in the last word. The
// first word includes the implicit '1' bit.
static constexpr uint64_t TwoPI[] = {0xc90fdaa22168c234, 0xc4c6628b80dc1cd1,
0x29024e088a67cc74};

static constexpr int InvTwoPIExponent = -3;

// The mantissa bits of 1/(2 * PI).
// The most signification bits are in the first uint64_t word
// and the least signification bits are in the last word. The
// first word includes the implicit '1' bit.
static constexpr uint64_t InvTwoPI[] = {
0xa2f9836e4e441529, 0xfc2757d1f534ddc0, 0xdb6295993c439041,
0xfe5163abdebbc561, 0xb7246e3a424dd2e0, 0x6492eea09d1921c,
0xfe1deb1cb129a73e, 0xe88235f52ebb4484, 0xe99c7026b45f7e41,
0x3991d639835339f4, 0x9c845f8bbdf9283b, 0x1ff897ffde05980f,
0xef2f118b5a0a6d1f, 0x6d367ecf27cb09b7, 0x4f463f669e5fea2d,
0x7527bac7ebe5f17b, 0x3d0739f78a5292ea, 0x6bfb5fb11f8d5d08,
0x56033046fc7b6bab, 0xf0cfbc209af4361e};

double mod_2pi(double x) {
static constexpr double _2pi = 6.283185307179586;
if (x < _2pi)
return x;
return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent, InvTwoPIExponent);
}

// Returns mod(x, pi/2)
double mod_pi_over_2(double x) {
static constexpr double pi_over_2 = 1.5707963267948966;
if (x < pi_over_2)
return x;
return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent - 2, InvTwoPIExponent + 2);
}

// Returns mod(x, pi/4)
double mod_pi_over_4(double x) {
static constexpr double pi_over_4 = 0.7853981633974483;
if (x < pi_over_4)
return x;
return mod_impl(x, TwoPI, InvTwoPI, TwoPIExponent - 3, InvTwoPIExponent + 3);
}

} // namespace __llvm_libc
22 changes: 22 additions & 0 deletions libc/src/math/generic/dp_trig.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Utilities for double precision trigonometric functions --*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H
#define LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H

namespace __llvm_libc {

double mod_2pi(double);

double mod_pi_over_2(double);

double mod_pi_over_4(double);

} // namespace __llvm_libc

#endif // LLVM_LIBC_SRC_MATH_GENERIC_DP_TRIG_H
12 changes: 12 additions & 0 deletions libc/test/src/math/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1169,6 +1169,18 @@ add_fp_unittest(
libc.src.__support.FPUtil.fputil
)

add_fp_unittest(
mod_k_pi_test
NEED_MPFR
SUITE
libc-long-running-tests
SRCS
mod_k_pi_test.cpp
DEPENDS
libc.src.math.generic.dp_trig
libc.src.__support.FPUtil.fputil
)

add_subdirectory(generic)
add_subdirectory(exhaustive)
add_subdirectory(differential_testing)
56 changes: 56 additions & 0 deletions libc/test/src/math/mod_k_pi_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
//===-- Unittests mod_2pi, mod_pi_over_4 and mod_pi_over_2 ----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/FPUtil/TestHelpers.h"
#include "src/math/generic/dp_trig.h"
#include "utils/MPFRWrapper/MPFRUtils.h"
#include "utils/UnitTest/Test.h"

#include <math.h>

namespace mpfr = __llvm_libc::testing::mpfr;
using FPBits = __llvm_libc::fputil::FPBits<double>;
using UIntType = FPBits::UIntType;

TEST(LlvmLibcMod2PITest, Range) {
constexpr UIntType count = 1000000000;
constexpr UIntType step = UIntType(-1) / count;
for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
double x = double(FPBits(v));
if (isnan(x) || isinf(x) || x <= 0.0)
continue;

ASSERT_MPFR_MATCH(mpfr::Operation::Mod2PI, x, __llvm_libc::mod_2pi(x), 0);
}
}

TEST(LlvmLibcModPIOver2Test, Range) {
constexpr UIntType count = 1000000000;
constexpr UIntType step = UIntType(-1) / count;
for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
double x = double(FPBits(v));
if (isnan(x) || isinf(x) || x <= 0.0)
continue;

ASSERT_MPFR_MATCH(mpfr::Operation::ModPIOver2, x,
__llvm_libc::mod_pi_over_2(x), 0);
}
}

TEST(LlvmLibcModPIOver4Test, Range) {
constexpr UIntType count = 1000000000;
constexpr UIntType step = UIntType(-1) / count;
for (UIntType i = 0, v = 0; i <= count; ++i, v += step) {
double x = double(FPBits(v));
if (isnan(x) || isinf(x) || x <= 0.0)
continue;

ASSERT_MPFR_MATCH(mpfr::Operation::ModPIOver4, x,
__llvm_libc::mod_pi_over_4(x), 0);
}
}
38 changes: 37 additions & 1 deletion libc/utils/MPFRWrapper/MPFRUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class MPFRNumber {
mpfr_t value;

public:
MPFRNumber() : mpfrPrecision(128) { mpfr_init2(value, mpfrPrecision); }
MPFRNumber() : mpfrPrecision(256) { mpfr_init2(value, mpfrPrecision); }

// We use explicit EnableIf specializations to disallow implicit
// conversions. Implicit conversions can potentially lead to loss of
Expand Down Expand Up @@ -202,6 +202,33 @@ class MPFRNumber {
return result;
}

MPFRNumber mod_2pi() const {
MPFRNumber result(0.0, 1280);
MPFRNumber _2pi(0.0, 1280);
mpfr_const_pi(_2pi.value, MPFR_RNDN);
mpfr_mul_si(_2pi.value, _2pi.value, 2, MPFR_RNDN);
mpfr_fmod(result.value, value, _2pi.value, MPFR_RNDN);
return result;
}

MPFRNumber mod_pi_over_2() const {
MPFRNumber result(0.0, 1280);
MPFRNumber pi_over_2(0.0, 1280);
mpfr_const_pi(pi_over_2.value, MPFR_RNDN);
mpfr_mul_d(pi_over_2.value, pi_over_2.value, 0.5, MPFR_RNDN);
mpfr_fmod(result.value, value, pi_over_2.value, MPFR_RNDN);
return result;
}

MPFRNumber mod_pi_over_4() const {
MPFRNumber result(0.0, 1280);
MPFRNumber pi_over_4(0.0, 1280);
mpfr_const_pi(pi_over_4.value, MPFR_RNDN);
mpfr_mul_d(pi_over_4.value, pi_over_4.value, 0.25, MPFR_RNDN);
mpfr_fmod(result.value, value, pi_over_4.value, MPFR_RNDN);
return result;
}

MPFRNumber sin() const {
MPFRNumber result;
mpfr_sin(result.value, value, MPFR_RNDN);
Expand Down Expand Up @@ -281,6 +308,9 @@ class MPFRNumber {
template <typename T>
cpp::EnableIfType<cpp::IsFloatingPointType<T>::Value, double> ulp(T input) {
T thisAsT = as<T>();
if (thisAsT == input)
return T(0.0);

int thisExponent = fputil::FPBits<T>(thisAsT).getExponent();
int inputExponent = fputil::FPBits<T>(input).getExponent();
if (thisAsT * input < 0 || thisExponent == inputExponent) {
Expand Down Expand Up @@ -339,6 +369,12 @@ unaryOperation(Operation op, InputType input) {
return mpfrInput.expm1();
case Operation::Floor:
return mpfrInput.floor();
case Operation::Mod2PI:
return mpfrInput.mod_2pi();
case Operation::ModPIOver2:
return mpfrInput.mod_pi_over_2();
case Operation::ModPIOver4:
return mpfrInput.mod_pi_over_4();
case Operation::Round:
return mpfrInput.round();
case Operation::Sin:
Expand Down
3 changes: 3 additions & 0 deletions libc/utils/MPFRWrapper/MPFRUtils.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ enum class Operation : int {
Exp2,
Expm1,
Floor,
Mod2PI,
ModPIOver2,
ModPIOver4,
Round,
Sin,
Sqrt,
Expand Down