Expand Up
@@ -21,25 +21,54 @@
namespace __llvm_libc {
static constexpr int EXP_bits_p = 5 ;
static constexpr int EXP_num_p = 1 << EXP_bits_p;
constexpr double mlp = EXP_num_p;
constexpr double mmld = -1.0 / mlp;
// Wolfram alpha: N[Table[2^x-1,{x,-16/32,15/32,1/32}],27]
// printf("%.13a,\n", d[i]);
extern const double EXP_2_POW[EXP_num_p];
// Look up table for bit fields of 2^(i/16) for i = 0..15, generated by Sollya
// with:
// > for i from 0 to 15 do printdouble(round(2^(i/16), D, RN));
inline constexpr int64_t EXP_2_M[16 ] = {
0x3ff0000000000000 , 0x3ff0b5586cf9890f , 0x3ff172b83c7d517b ,
0x3ff2387a6e756238 , 0x3ff306fe0a31b715 , 0x3ff3dea64c123422 ,
0x3ff4bfdad5362a27 , 0x3ff5ab07dd485429 , 0x3ff6a09e667f3bcd ,
0x3ff7a11473eb0187 , 0x3ff8ace5422aa0db , 0x3ff9c49182a3f090 ,
0x3ffae89f995ad3ad , 0x3ffc199bdd85529c , 0x3ffd5818dcfba487 ,
0x3ffea4afa2a490da };
struct ExpBase {
// Base = e
static constexpr int MID_BITS = 5 ;
static constexpr int MID_MASK = (1 << MID_BITS) - 1 ;
// log2(e) * 2^5
static constexpr double LOG2_B = 0x1 .71547652b82fep+0 * (1 << MID_BITS);
// High and low parts of -log(2) * 2^(-5)
static constexpr double M_LOGB_2_HI = -0x1 .62e42fefa0000p-1 / (1 << MID_BITS);
static constexpr double M_LOGB_2_LO =
-0x1 .cf79abc9e3b3ap-40 / (1 << MID_BITS);
// Look up table for bit fields of 2^(i/32) for i = 0..31, generated by Sollya
// with:
// > for i from 0 to 31 do printdouble(round(2^(i/32), D, RN));
static constexpr int64_t EXP_2_MID[1 << MID_BITS] = {
0x3ff0000000000000 , 0x3ff059b0d3158574 , 0x3ff0b5586cf9890f ,
0x3ff11301d0125b51 , 0x3ff172b83c7d517b , 0x3ff1d4873168b9aa ,
0x3ff2387a6e756238 , 0x3ff29e9df51fdee1 , 0x3ff306fe0a31b715 ,
0x3ff371a7373aa9cb , 0x3ff3dea64c123422 , 0x3ff44e086061892d ,
0x3ff4bfdad5362a27 , 0x3ff5342b569d4f82 , 0x3ff5ab07dd485429 ,
0x3ff6247eb03a5585 , 0x3ff6a09e667f3bcd , 0x3ff71f75e8ec5f74 ,
0x3ff7a11473eb0187 , 0x3ff82589994cce13 , 0x3ff8ace5422aa0db ,
0x3ff93737b0cdc5e5 , 0x3ff9c49182a3f090 , 0x3ffa5503b23e255d ,
0x3ffae89f995ad3ad , 0x3ffb7f76f2fb5e47 , 0x3ffc199bdd85529c ,
0x3ffcb720dcef9069 , 0x3ffd5818dcfba487 , 0x3ffdfc97337b9b5f ,
0x3ffea4afa2a490da , 0x3fff50765b6e4540 ,
};
// Approximating e^dx with degree-5 minimax polynomial generated by Sollya:
// > Q = fpminimax(expm1(x)/x, 4, [|1, D...|], [-log(2)/64, log(2)/64]);
// Then:
// e^dx ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[4] * dx^6.
static constexpr double COEFFS[4 ] = {
0x1 .ffffffffe5bc8p -2 , 0x1 .555555555cd67p-3 , 0x1 .5555c2a9b48b4p-5 ,
0x1 .11112a0e34bdbp-7 };
static constexpr double powb_lo (double dx) {
using fputil::multiply_add;
double dx2 = dx * dx;
double c0 = 1.0 + dx;
// c1 = COEFFS[0] + COEFFS[1] * dx
double c1 = multiply_add (dx, ExpBase::COEFFS[1 ], ExpBase::COEFFS[0 ]);
// c2 = COEFFS[2] + COEFFS[3] * dx
double c2 = multiply_add (dx, ExpBase::COEFFS[3 ], ExpBase::COEFFS[2 ]);
// r = c4 + c5 * dx^4
// = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[5] * dx^7
return fputil::polyeval (dx2, c0, c1, c2);
}
};
constexpr int LOG_P1_BITS = 6 ;
constexpr int LOG_P1_SIZE = 1 << LOG_P1_BITS;
Expand All
@@ -55,65 +84,50 @@ extern const double LOG_P1_1_OVER[LOG_P1_SIZE];
extern const double K_LOG2_ODD[4 ];
extern const double K_LOG2_EVEN[4 ];
// The algorithm represents exp(x) as
// exp(x) = 2^(ln(2) * i) * 2^(ln(2) * j / NUM_P )) * exp(dx)
// where i integer value, j integer in range [-NUM_P/2, NUM_P/2).
// 2^(ln(2) * j / NUM_P )) is a table values: 1.0 + EXP_M
// exp(dx) calculates by taylor expansion.
// Inversion of ln(2). Multiplication by EXP_num_p due to sampling by 1 /
// EXP_num_p Precise value of the constant is not needed.
static constexpr double LN2_INV = 0x1 .71547652b82fep+0 * EXP_num_p;
// log2(e) * 2^4
static constexpr double LOG2_E_4 = 0x1 .71547652b82fep+4 ;
// LN2_HIGH + LN2_LOW = ln(2) with precision higher than double(ln(2))
// Minus sign is to use FMA directly.
static constexpr double LN2_HIGH = -0x1 .62e42fefa0000p-1 / EXP_num_p;
static constexpr double LN2_LOW = -0x1 .cf79abc9e3b3ap-40 / EXP_num_p;
// -log(2) * 2^(-4)
static constexpr double M_LN2_4_HI = -0x1 .62e42fefa0000p-5 ;
static constexpr double M_LN2_4_LO = -0x1 .cf79abc9e3b3ap-44 ;
struct exe_eval_result_t {
// exp(x) = 2^MULT_POWER2 * mult_exp * (r + 1.0)
// where
// MULT_POWER2 template parameter;
// mult_exp = 2^e;
// r in range [~-0.3, ~0.41]
double mult_exp;
double r;
// Output of range reduction for exp_b: (2^(mid + hi), lo)
// where:
// b^x = 2^(mid + hi) * b^lo
struct exp_b_reduc_t {
double mh; // 2^(mid + hi)
double lo;
};
// The function correctly calculates exp value with at least float precision
// in range not narrow than [-log(2^-150), 90]
template <int MULT_POWER2 = 0 >
inline static exe_eval_result_t exp_eval (double x) {
double ps_dbl = fputil::nearest_integer (LN2_INV * x);
// Negative sign due to multiply_add optimization
double mult_e1, ml;
{
int ps =
static_cast <int >(ps_dbl) + (1 << (EXP_bits_p - 1 )) +
((fputil::FPBits<double >::EXPONENT_BIAS + MULT_POWER2) << EXP_bits_p);
int table_index = ps & (EXP_num_p - 1 );
fputil::FPBits<double > bs;
bs.set_unbiased_exponent (ps >> EXP_bits_p);
ml = EXP_2_POW[table_index];
mult_e1 = bs.get_val ();
}
double dx = fputil::multiply_add (ps_dbl, LN2_LOW,
fputil::multiply_add (ps_dbl, LN2_HIGH, x));
// Taylor series coefficients
double pe = dx * fputil::polyeval (dx, 1.0 , 0x1 .0p-1 , 0x1 .5555555555555p-3 ,
0x1 .5555555555555p-5 , 0x1 .1111111111111p-7 ,
0x1 .6c16c16c16c17p-10 );
double r = fputil::multiply_add (ml, pe, pe) + ml;
return {mult_e1, r};
// The function correctly calculates b^x value with at least float precision
// in a limited range.
// Range reduction:
// b^x = 2^(hi + mid) * b^lo
// where:
// x = (hi + mid) * log_b(2) + lo
// hi is an integer,
// 0 <= mid * 2^MID_BITS < 2^MID_BITS is an integer
// -2^(-MID_BITS - 1) <= lo * log2(b) <= 2^(-MID_BITS - 1)
// Base class needs to provide the following constants:
// - MID_BITS : number of bits after decimal points used for mid
// - MID_MASK : 2^MID_BITS - 1, mask to extract mid bits
// - LOG2_B : log2(b) * 2^MID_BITS for scaling
// - M_LOGB_2_HI : high part of -log_b(2) * 2^(-MID_BITS)
// - M_LOGB_2_LO : low part of -log_b(2) * 2^(-MID_BITS)
// - EXP_2_MID : look up table for bit fields of 2^mid
// Return:
// { 2^(hi + mid), lo }
template <class Base > static inline exp_b_reduc_t exp_b_range_reduc (float x) {
double xd = static_cast <double >(x);
// kd = round((hi + mid) * log2(b) * 2^MID_BITS)
double kd = fputil::nearest_integer (Base::LOG2_B * xd);
// k = round((hi + mid) * log2(b) * 2^MID_BITS)
int k = static_cast <int >(kd);
// hi = floor(kd * 2^(-MID_BITS))
// exp_hi = shift hi to the exponent field of double precision.
int64_t exp_hi = static_cast <int64_t >((k >> Base::MID_BITS))
<< fputil::FloatProperties<double >::MANTISSA_WIDTH;
// mh = 2^hi * 2^mid
// mh_bits = bit field of mh
int64_t mh_bits = Base::EXP_2_MID[k & Base::MID_MASK] + exp_hi;
double mh = fputil::FPBits<double >(uint64_t (mh_bits)).get_val ();
// dx = lo = x - (hi + mid) * log(2)
double dx = fputil::multiply_add (
kd, Base::M_LOGB_2_LO, fputil::multiply_add (kd, Base::M_LOGB_2_HI, xd));
return {mh, dx};
}
// The function correctly calculates sinh(x) and cosh(x) by calculating exp(x)
Expand All
@@ -122,17 +136,17 @@ inline static exe_eval_result_t exp_eval(double x) {
// reduction: find hi, mid, lo such that:
// x = (hi + mid) * log(2) + lo, in which
// hi is an integer,
// 0 <= mid * 2^4 < 16 is an integer
// -2^(-5 ) <= lo * log2(e) <= 2^-5 .
// 0 <= mid * 2^5 < 32 is an integer
// -2^(-6 ) <= lo * log2(e) <= 2^-6 .
// In particular,
// hi + mid = round(x * log2(e) * 2^4 ) * 2^(-4 ).
// hi + mid = round(x * log2(e) * 2^5 ) * 2^(-5 ).
// Then,
// e^x = 2^(hi + mid) * e^lo = 2^hi * 2^mid * e^lo.
// 2^mid is stored in the lookup table EXP_2_M of 16 elements.
// e^lo is computed using a degree-6 minimax polynomial
// 2^mid is stored in the lookup table of 32 elements.
// e^lo is computed using a degree-5 minimax polynomial
// generated by Sollya:
// e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c6 * lo^6
// = (1 + c2*lo^2 + c4*lo^4 + c6*lo^6 ) + lo * (1 + c3*lo^2 + c5*lo^4)
// e^lo ~ P(lo) = 1 + lo + c2 * lo^2 + ... + c5 * lo^5
// = (1 + c2*lo^2 + c4*lo^4) + lo * (1 + c3*lo^2 + c5*lo^4)
// = P_even + lo * P_odd
// We perform 2^hi * 2^mid by simply add hi to the exponent field
// of 2^mid.
Expand All
@@ -156,24 +170,25 @@ inline static exe_eval_result_t exp_eval(double x) {
template <bool is_sinh> static inline double exp_pm_eval (float x) {
double xd = static_cast <double >(x);
// round(x * log2(e) * 2^4 )
double kd = fputil::nearest_integer (LOG2_E_4 * xd);
// round(x * log2(e) * 2^5 )
double kd = fputil::nearest_integer (ExpBase::LOG2_B * xd);
// k_p = round(x * log2(e) * 2^4 )
// k_p = round(x * log2(e) * 2^5 )
int k_p = static_cast <int >(kd);
// k_m = round(-x * log2(e) * 2^4 )
// k_m = round(-x * log2(e) * 2^5 )
int k_m = -k_p;
// hi = floor(kf * 2^(-4 ))
// hi = floor(kf * 2^(-5 ))
// exp_hi = shift hi to the exponent field of double precision.
int64_t exp_hi_p = static_cast <int64_t >((k_p >> 4 ))
int64_t exp_hi_p = static_cast <int64_t >((k_p >> ExpBase::MID_BITS ))
<< fputil::FloatProperties<double >::MANTISSA_WIDTH;
int64_t exp_hi_m = static_cast <int64_t >((k_m >> 4 ))
int64_t exp_hi_m = static_cast <int64_t >((k_m >> ExpBase::MID_BITS ))
<< fputil::FloatProperties<double >::MANTISSA_WIDTH;
// mh = 2^hi * 2^mid
// mh_bits = bit field of mh
int64_t mh_bits_p = EXP_2_M[k_p & 15 ] + exp_hi_p;
int64_t mh_bits_m = EXP_2_M[k_m & 15 ] + exp_hi_m;
// mh_p = 2^(hi + mid)
// mh_m = 2^(-(hi + mid))
// mh_bits_* = bit field of mh_*
int64_t mh_bits_p = ExpBase::EXP_2_MID[k_p & ExpBase::MID_MASK] + exp_hi_p;
int64_t mh_bits_m = ExpBase::EXP_2_MID[k_m & ExpBase::MID_MASK] + exp_hi_m;
double mh_p = fputil::FPBits<double >(uint64_t (mh_bits_p)).get_val ();
double mh_m = fputil::FPBits<double >(uint64_t (mh_bits_m)).get_val ();
// mh_sum = 2^(hi + mid) + 2^(-(hi + mid))
Expand All
@@ -182,31 +197,18 @@ template <bool is_sinh> static inline double exp_pm_eval(float x) {
double mh_diff = mh_p - mh_m;
// dx = lo = x - (hi + mid) * log(2)
double dx = fputil::multiply_add (kd, M_LN2_4_LO,
fputil::multiply_add (kd, M_LN2_4_HI, xd));
double dx =
fputil::multiply_add (kd, ExpBase::M_LOGB_2_LO,
fputil::multiply_add (kd, ExpBase::M_LOGB_2_HI, xd));
double dx2 = dx * dx;
// Polynomials generated by Sollya with:
// Q = fpminimax(expm1(x)/x, 5, [|1, D...|], [-1/32*log(2), 1/32*log(2)]);
// Then:
// e^lo ~ P(dx) = 1 + dx + COEFFS[0] * dx^2 + ... + COEFFS[4] * dx^6.
constexpr double COEFFS[5 ] = {0x1 .fffffffffffep -2 , 0x1 .55555554ad3f3p-3 ,
0x1 .55555557179cap-5 , 0x1 .111228f3478c9p-7 ,
0x1 .6c161beccc69dp-10 };
// c0 = 1 + COEFFS[0] * lo^2
double c0 = fputil::multiply_add (dx2, COEFFS[0 ], 1.0 );
// c1 = 1 + COEFFS[0] * lo^2
double c1 = fputil::multiply_add (dx2, COEFFS[1 ], 1.0 );
// c2 = COEFFS[2] + COEFFS[4] * lo^2
double c2 = fputil::multiply_add (dx2, COEFFS[4 ], COEFFS[2 ]);
double dx4 = dx2 * dx2;
// P_even = c0 + c2 * lo^4
// = (1 + COEFFS[0] * lo^2) + lo^4 * (COEFFS[2] + COEFFS[4] * lo^2)
// = 1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4 + COEFFS[4] * lo^6
double p_even = fputil::multiply_add (dx4, c2, c0);
// P_odd = c1 + COEFFS[3] * lo^4
// = 1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4
double p_odd = fputil::multiply_add (dx4, COEFFS[3 ], c1);
// P_even = 1 + COEFFS[0] * lo^2 + COEFFS[2] * lo^4
double p_even =
fputil::polyeval (dx2, 1.0 , ExpBase::COEFFS[0 ], ExpBase::COEFFS[2 ]);
// P_odd = 1 + COEFFS[1] * lo^2 + COEFFS[3] * lo^4
double p_odd =
fputil::polyeval (dx2, 1.0 , ExpBase::COEFFS[1 ], ExpBase::COEFFS[3 ]);
double r;
if constexpr (is_sinh)
Expand Down