177 changes: 177 additions & 0 deletions libc/AOR_v20.02/math/exp.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
/*
* Double-precision e^x function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"

#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]

/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;

if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble (sbits);
y = 0x1p1009 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble (sbits);
y = scale + scale * tmp;
if (y < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double (hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}

/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
return asuint64 (x) >> 52;
}

/* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
If hastail is 0 then xtail is assumed to be 0 too. */
static inline double
exp_inline (double x, double xtail, int hastail)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;

abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12 (1024.0))
{
if (asuint64 (x) == asuint64 (-INFINITY))
return 0.0;
if (abstop >= top12 (INFINITY))
return 1.0 + x;
if (asuint64 (x) >> 63)
return __math_uflow (0);
else
return __math_oflow (0);
}
/* Large x is special cased below. */
abstop = 0;
}

/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd) >> 16;
kd = (double_t) (int32_t) ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
if (hastail)
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}

double
exp (double x)
{
return exp_inline (x, 0, 0);
}

/* May be useful for implementing pow where more than double
precision input is needed. */
double
__exp_dd (double x, double xtail)
{
return exp_inline (x, xtail, 1);
}
#if USE_GLIBC_ABI
strong_alias (exp, __exp_finite)
hidden_alias (exp, __ieee754_exp)
hidden_alias (__exp_dd, __exp1)
# if LDBL_MANT_DIG == 53
long double expl (long double x) { return exp (x); }
# endif
#endif
144 changes: 144 additions & 0 deletions libc/AOR_v20.02/math/exp2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
/*
* Double-precision 2^x function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"

#define N (1 << EXP_TABLE_BITS)
#define Shift __exp_data.exp2_shift
#define T __exp_data.tab
#define C1 __exp_data.exp2_poly[0]
#define C2 __exp_data.exp2_poly[1]
#define C3 __exp_data.exp2_poly[2]
#define C4 __exp_data.exp2_poly[3]
#define C5 __exp_data.exp2_poly[4]
#define C6 __exp_data.exp2_poly[5]

/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;

if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by 1. */
sbits -= 1ull << 52;
scale = asdouble (sbits);
y = 2 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
scale = asdouble (sbits);
y = scale + scale * tmp;
if (y < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo;
lo = scale - y + scale * tmp;
hi = 1.0 + y;
lo = 1.0 - hi + y + lo;
y = eval_as_double (hi + lo) - 1.0;
/* Avoid -0.0 with downward rounding. */
if (WANT_ROUNDING && y == 0.0)
y = 0.0;
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}

/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
return asuint64 (x) >> 52;
}

double
exp2 (double x)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, r, r2, scale, tail, tmp;

abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
return WANT_ROUNDING ? 1.0 + x : 1.0;
if (abstop >= top12 (1024.0))
{
if (asuint64 (x) == asuint64 (-INFINITY))
return 0.0;
if (abstop >= top12 (INFINITY))
return 1.0 + x;
if (!(asuint64 (x) >> 63))
return __math_oflow (0);
else if (asuint64 (x) >= asuint64 (-1075.0))
return __math_uflow (0);
}
if (2 * asuint64 (x) > 2 * asuint64 (928.0))
/* Large x is special cased below. */
abstop = 0;
}

/* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */
/* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */
kd = eval_as_double (x + Shift);
ki = asuint64 (kd); /* k. */
kd -= Shift; /* k/N for int k. */
r = x - kd;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = ki << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.5/N ulp larger. */
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
#if EXP2_POLY_ORDER == 4
tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP2_POLY_ORDER == 5
tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP2_POLY_ORDER == 6
tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}
#if USE_GLIBC_ABI
strong_alias (exp2, __exp2_finite)
hidden_alias (exp2, __ieee754_exp2)
# if LDBL_MANT_DIG == 53
long double exp2l (long double x) { return exp2 (x); }
# endif
#endif
81 changes: 81 additions & 0 deletions libc/AOR_v20.02/math/exp2f.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Single-precision 2^x function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <math.h>
#include <stdint.h>
#include "math_config.h"

/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.)
Wrong count: 168353 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/

#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define C __exp2f_data.poly
#define SHIFT __exp2f_data.shift_scaled

static inline uint32_t
top12 (float x)
{
return asuint (x) >> 20;
}

float
exp2f (float x)
{
uint32_t abstop;
uint64_t ki, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, xd, z, r, r2, y, s;

xd = (double_t) x;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop >= top12 (128.0f)))
{
/* |x| >= 128 or x is nan. */
if (asuint (x) == asuint (-INFINITY))
return 0.0f;
if (abstop >= top12 (INFINITY))
return x + x;
if (x > 0.0f)
return __math_oflowf (0);
if (x <= -150.0f)
return __math_uflowf (0);
#if WANT_ERRNO_UFLOW
if (x < -149.0f)
return __math_may_uflowf (0);
#endif
}

/* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */
kd = eval_as_double (xd + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT; /* k/N for int k. */
r = xd - kd;

/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (exp2f, __exp2f_finite)
hidden_alias (exp2f, __ieee754_exp2f)
#endif
79 changes: 79 additions & 0 deletions libc/AOR_v20.02/math/exp2f_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/*
* Shared data between expf, exp2f and powf.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "math_config.h"

#define N (1 << EXP2F_TABLE_BITS)

const struct exp2f_data __exp2f_data = {
/* tab[i] = uint(2^(i/N)) - (i << 52-BITS)
used for computing 2^(k/N) for an int |k| < 150 N as
double(tab[k%N] + (k << 52-BITS)) */
.tab = {
#if N == 8
0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27,
0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487,
#elif N == 16
0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238,
0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429,
0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090,
0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da,
#elif N == 32
0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
#elif N == 64
0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8,
0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0,
0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6,
0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b,
0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7,
0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0,
0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da,
0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225,
0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9,
0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed,
0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50,
0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf,
0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2,
0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c,
0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6,
0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8,
#endif
},
.shift_scaled = 0x1.8p+52 / N,
.poly = {
#if N == 8
0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1,
#elif N == 16
0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1,
#elif N == 32
0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
#elif N == 64
0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1,
#endif
},
.shift = 0x1.8p+52,
.invln2_scaled = 0x1.71547652b82fep+0 * N,
.poly_scaled = {
#if N == 8
0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N,
#elif N == 16
0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N,
#elif N == 32
0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N,
#elif N == 64
0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N,
#endif
},
};
1,121 changes: 1,121 additions & 0 deletions libc/AOR_v20.02/math/exp_data.c

Large diffs are not rendered by default.

92 changes: 92 additions & 0 deletions libc/AOR_v20.02/math/expf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Single-precision e^x function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <math.h>
#include <stdint.h>
#include "math_config.h"

/*
EXP2F_TABLE_BITS = 5
EXP2F_POLY_ORDER = 3
ULP error: 0.502 (nearest rounding.)
Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.)
Wrong count: 170635 (all nearest rounding wrong results with fma.)
Non-nearest ULP error: 1 (rounded ULP error)
*/

#define N (1 << EXP2F_TABLE_BITS)
#define InvLn2N __exp2f_data.invln2_scaled
#define T __exp2f_data.tab
#define C __exp2f_data.poly_scaled

static inline uint32_t
top12 (float x)
{
return asuint (x) >> 20;
}

float
expf (float x)
{
uint32_t abstop;
uint64_t ki, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, xd, z, r, r2, y, s;

xd = (double_t) x;
abstop = top12 (x) & 0x7ff;
if (unlikely (abstop >= top12 (88.0f)))
{
/* |x| >= 88 or x is nan. */
if (asuint (x) == asuint (-INFINITY))
return 0.0f;
if (abstop >= top12 (INFINITY))
return x + x;
if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */
return __math_oflowf (0);
if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */
return __math_uflowf (0);
#if WANT_ERRNO_UFLOW
if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */
return __math_may_uflowf (0);
#endif
}

/* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */
z = InvLn2N * xd;

/* Round and convert z to int, the result is in [-150*N, 128*N] and
ideally nearest int is used, otherwise the magnitude of r can be
bigger which gives larger approximation error. */
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#else
# define SHIFT __exp2f_data.shift
kd = eval_as_double (z + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT;
#endif
r = z - kd;

/* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
t += ki << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (expf, __expf_finite)
hidden_alias (expf, __ieee754_expf)
#endif
101 changes: 101 additions & 0 deletions libc/AOR_v20.02/math/include/mathlib.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Public API.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#ifndef _MATHLIB_H
#define _MATHLIB_H

float expf (float);
float exp2f (float);
float logf (float);
float log2f (float);
float powf (float, float);
float sinf (float);
float cosf (float);
void sincosf (float, float*, float*);

double exp (double);
double exp2 (double);
double log (double);
double log2 (double);
double pow (double, double);

/* Scalar functions using the vector algorithm with identical result. */
float __s_sinf (float);
float __s_cosf (float);
float __s_expf (float);
float __s_expf_1u (float);
float __s_exp2f (float);
float __s_exp2f_1u (float);
float __s_logf (float);
float __s_powf (float, float);
double __s_sin (double);
double __s_cos (double);
double __s_exp (double);
double __s_log (double);
double __s_pow (double, double);

#if __aarch64__
#if __GNUC__ >= 5
typedef __Float32x4_t __f32x4_t;
typedef __Float64x2_t __f64x2_t;
#elif __clang_major__*100+__clang_minor__ >= 305
typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t;
typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t;
#else
#error Unsupported compiler
#endif

/* Vector functions following the base PCS. */
__f32x4_t __v_sinf (__f32x4_t);
__f32x4_t __v_cosf (__f32x4_t);
__f32x4_t __v_expf (__f32x4_t);
__f32x4_t __v_expf_1u (__f32x4_t);
__f32x4_t __v_exp2f (__f32x4_t);
__f32x4_t __v_exp2f_1u (__f32x4_t);
__f32x4_t __v_logf (__f32x4_t);
__f32x4_t __v_powf (__f32x4_t, __f32x4_t);
__f64x2_t __v_sin (__f64x2_t);
__f64x2_t __v_cos (__f64x2_t);
__f64x2_t __v_exp (__f64x2_t);
__f64x2_t __v_log (__f64x2_t);
__f64x2_t __v_pow (__f64x2_t, __f64x2_t);

#if __GNUC__ >= 9 || __clang_major__ >= 8
#define __vpcs __attribute__((__aarch64_vector_pcs__))

/* Vector functions following the vector PCS. */
__vpcs __f32x4_t __vn_sinf (__f32x4_t);
__vpcs __f32x4_t __vn_cosf (__f32x4_t);
__vpcs __f32x4_t __vn_expf (__f32x4_t);
__vpcs __f32x4_t __vn_expf_1u (__f32x4_t);
__vpcs __f32x4_t __vn_exp2f (__f32x4_t);
__vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t);
__vpcs __f32x4_t __vn_logf (__f32x4_t);
__vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t);
__vpcs __f64x2_t __vn_sin (__f64x2_t);
__vpcs __f64x2_t __vn_cos (__f64x2_t);
__vpcs __f64x2_t __vn_exp (__f64x2_t);
__vpcs __f64x2_t __vn_log (__f64x2_t);
__vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t);

/* Vector functions following the vector PCS using ABI names. */
__vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t);
__vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t);
__vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t);
__vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t);
#endif
#endif

#endif
163 changes: 163 additions & 0 deletions libc/AOR_v20.02/math/log.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
* Double-precision log(x) function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"

#define T __log_data.tab
#define T2 __log_data.tab2
#define B __log_data.poly1
#define A __log_data.poly
#define Ln2hi __log_data.ln2hi
#define Ln2lo __log_data.ln2lo
#define N (1 << LOG_TABLE_BITS)
#define OFF 0x3fe6000000000000

/* Top 16 bits of a double. */
static inline uint32_t
top16 (double x)
{
return asuint64 (x) >> 48;
}

double
log (double x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;

ix = asuint64 (x);
top = top16 (x);

#if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11
# define LO asuint64 (1.0 - 0x1p-5)
# define HI asuint64 (1.0 + 0x1.1p-5)
#elif LOG_POLY1_ORDER == 12
# define LO asuint64 (1.0 - 0x1p-4)
# define HI asuint64 (1.0 + 0x1.09p-4)
#endif
if (unlikely (ix - LO < HI - LO))
{
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
r2 = r * r;
r3 = r * r2;
#if LOG_POLY1_ORDER == 10
/* Worst-case error is around 0.516 ULP. */
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 11
/* Worst-case error is around 0.516 ULP. */
y = r3 * (B[1] + r * B[2]
+ r2 * (B[3] + r * B[4] + r2 * B[5]
+ r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9])));
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
#elif LOG_POLY1_ORDER == 12
y = r3 * (B[1] + r * B[2] + r2 * B[3]
+ r3 * (B[4] + r * B[5] + r2 * B[6]
+ r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10])));
# if N <= 64
/* Worst-case error is around 0.532 ULP. */
w = B[0] * r2; /* B[0] == -0.5. */
hi = r + w;
y += r - hi + w;
y += hi;
# else
/* Worst-case error is around 0.507 ULP. */
w = r * 0x1p27;
double_t rhi = r + w - w;
double_t rlo = r - rhi;
w = rhi * rhi * B[0]; /* B[0] == -0.5. */
hi = r + w;
lo = r - hi + w;
lo += B[0] * rlo * (rhi + r);
y += lo;
y += hi;
# endif
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}

/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);

/* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
#else
/* rounding error: 0x1p-55/N + 0x1p-66. */
r = (z - T2[i].chi - T2[i].clo) * invc;
#endif
kd = (double_t) k;

/* hi + lo = r + log(c) + k*Ln2. */
w = kd * Ln2hi + logc;
hi = w + r;
lo = w - hi + r + kd * Ln2lo;

/* log(x) = lo + (log1p(r) - r) + hi. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
/* Worst case error if |y| > 0x1p-5:
0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma)
Worst case error if |y| > 0x1p-4:
0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */
#if LOG_POLY_ORDER == 6
y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi;
#elif LOG_POLY_ORDER == 7
y = lo
+ r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3])
+ r2 * r2 * (A[4] + r * A[5]))
+ hi;
#endif
return eval_as_double (y);
}
#if USE_GLIBC_ABI
strong_alias (log, __log_finite)
hidden_alias (log, __ieee754_log)
# if LDBL_MANT_DIG == 53
long double logl (long double x) { return log (x); }
# endif
#endif
142 changes: 142 additions & 0 deletions libc/AOR_v20.02/math/log2.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
/*
* Double-precision log2(x) function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"

#define T __log2_data.tab
#define T2 __log2_data.tab2
#define B __log2_data.poly1
#define A __log2_data.poly
#define InvLn2hi __log2_data.invln2hi
#define InvLn2lo __log2_data.invln2lo
#define N (1 << LOG2_TABLE_BITS)
#define OFF 0x3fe6000000000000

/* Top 16 bits of a double. */
static inline uint32_t
top16 (double x)
{
return asuint64 (x) >> 48;
}

double
log2 (double x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p;
uint64_t ix, iz, tmp;
uint32_t top;
int k, i;

ix = asuint64 (x);
top = top16 (x);

#if LOG2_POLY1_ORDER == 11
# define LO asuint64 (1.0 - 0x1.5b51p-5)
# define HI asuint64 (1.0 + 0x1.6ab2p-5)
#endif
if (unlikely (ix - LO < HI - LO))
{
/* Handle close to 1.0 inputs separately. */
/* Fix sign of zero with downward rounding when x==1. */
if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0)))
return 0;
r = x - 1.0;
#if HAVE_FAST_FMA
hi = r * InvLn2hi;
lo = r * InvLn2lo + fma (r, InvLn2hi, -hi);
#else
double_t rhi, rlo;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
hi = rhi * InvLn2hi;
lo = rlo * InvLn2hi + r * InvLn2lo;
#endif
r2 = r * r; /* rounding error: 0x1p-62. */
r4 = r2 * r2;
#if LOG2_POLY1_ORDER == 11
/* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */
p = r2 * (B[0] + r * B[1]);
y = hi + p;
lo += hi - y + p;
lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5])
+ r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9])));
y += lo;
#endif
return eval_as_double (y);
}
if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010))
{
/* x < 0x1p-1022 or inf or nan. */
if (ix * 2 == 0)
return __math_divzero (1);
if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */
return x;
if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0)
return __math_invalid (x);
/* x is subnormal, normalize it. */
ix = asuint64 (x * 0x1p52);
ix -= 52ULL << 52;
}

/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - LOG2_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
invc = T[i].invc;
logc = T[i].logc;
z = asdouble (iz);
kd = (double_t) k;

/* log2(x) = log2(z/c) + log2(c) + k. */
/* r ~= z/c - 1, |r| < 1/(2*N). */
#if HAVE_FAST_FMA
/* rounding error: 0x1p-55/N. */
r = fma (z, invc, -1.0);
t1 = r * InvLn2hi;
t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1);
#else
double_t rhi, rlo;
/* rounding error: 0x1p-55/N + 0x1p-65. */
r = (z - T2[i].chi - T2[i].clo) * invc;
rhi = asdouble (asuint64 (r) & -1ULL << 32);
rlo = r - rhi;
t1 = rhi * InvLn2hi;
t2 = rlo * InvLn2hi + r * InvLn2lo;
#endif

/* hi + lo = r/ln2 + log2(c) + k. */
t3 = kd + logc;
hi = t3 + t1;
lo = t3 - hi + t1 + t2;

/* log2(r+1) = r/ln2 + r^2*poly(r). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r; /* rounding error: 0x1p-54/N^2. */
r4 = r2 * r2;
#if LOG2_POLY_ORDER == 7
/* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma).
~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */
p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]);
y = lo + r2 * p + hi;
#endif
return eval_as_double (y);
}
#if USE_GLIBC_ABI
strong_alias (log2, __log2_finite)
hidden_alias (log2, __ieee754_log2)
# if LDBL_MANT_DIG == 53
long double log2l (long double x) { return log2 (x); }
# endif
#endif
210 changes: 210 additions & 0 deletions libc/AOR_v20.02/math/log2_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,210 @@
/*
* Data for log2.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "math_config.h"

#define N (1 << LOG2_TABLE_BITS)

const struct log2_data __log2_data = {
// First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0
.invln2hi = 0x1.7154765200000p+0,
.invln2lo = 0x1.705fc2eefa200p-33,
.poly1 = {
#if LOG2_POLY1_ORDER == 11
// relative error: 0x1.2fad8188p-63
// in -0x1.5b51p-5 0x1.6ab2p-5
-0x1.71547652b82fep-1,
0x1.ec709dc3a03f7p-2,
-0x1.71547652b7c3fp-2,
0x1.2776c50f05be4p-2,
-0x1.ec709dd768fe5p-3,
0x1.a61761ec4e736p-3,
-0x1.7153fbc64a79bp-3,
0x1.484d154f01b4ap-3,
-0x1.289e4a72c383cp-3,
0x1.0b32f285aee66p-3,
#endif
},
.poly = {
#if N == 64 && LOG2_POLY_ORDER == 7
// relative error: 0x1.a72c2bf8p-58
// abs error: 0x1.67a552c8p-66
// in -0x1.f45p-8 0x1.f45p-8
-0x1.71547652b8339p-1,
0x1.ec709dc3a04bep-2,
-0x1.7154764702ffbp-2,
0x1.2776c50034c48p-2,
-0x1.ec7b328ea92bcp-3,
0x1.a6225e117f92ep-3,
#endif
},
/* Algorithm:
x = 2^k z
log2(x) = k + log2(c) + log2(z/c)
log2(z/c) = poly(z/c - 1)
where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls
into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = (double)log2(c)
tab2[i].chi = (double)c
tab2[i].clo = (double)(c - (double)c)
where c is near the center of the subinterval and is chosen by trying +-2^29
floating point invc candidates around 1/center and selecting one for which
1) the rounding error in 0x1.8p10 + logc is 0,
2) the rounding error in z - chi - clo is < 0x1p-64 and
3) the rounding error in (double)log2(c) is minimized (< 0x1p-68).
Note: 1) ensures that k + logc can be computed without rounding error, 2)
ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a
single rounding error when there is no fast fma for z*invc - 1, 3) ensures
that logc + poly(z/c - 1) has small error, however near x == 1 when
|log2(x)| < 0x1p-4, this is not enough so that is special cased. */
.tab = {
#if N == 64
{0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1},
{0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1},
{0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1},
{0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2},
{0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2},
{0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2},
{0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2},
{0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2},
{0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2},
{0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2},
{0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2},
{0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2},
{0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2},
{0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2},
{0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2},
{0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2},
{0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2},
{0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2},
{0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2},
{0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2},
{0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3},
{0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3},
{0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3},
{0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3},
{0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3},
{0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3},
{0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3},
{0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3},
{0x1.19453847f2200p+0, -0x1.162595afdc000p-3},
{0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4},
{0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4},
{0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4},
{0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4},
{0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4},
{0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4},
{0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5},
{0x1.07325cac53b83p+0, -0x1.47a954f770000p-5},
{0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6},
{0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6},
{0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8},
{0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7},
{0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5},
{0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5},
{0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4},
{0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4},
{0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4},
{0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3},
{0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3},
{0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3},
{0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3},
{0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3},
{0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3},
{0x1.ac57026295039p-1, 0x1.0790ab4678000p-2},
{0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2},
{0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2},
{0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2},
{0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2},
{0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2},
{0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2},
{0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2},
{0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2},
{0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2},
{0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2},
{0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2},
#endif
},
#if !HAVE_FAST_FMA
.tab2 = {
# if N == 64
{0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55},
{0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57},
{0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55},
{0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55},
{0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55},
{0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56},
{0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56},
{0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57},
{0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55},
{0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57},
{0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55},
{0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55},
{0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56},
{0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56},
{0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56},
{0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55},
{0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57},
{0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55},
{0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55},
{0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58},
{0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55},
{0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58},
{0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56},
{0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56},
{0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57},
{0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56},
{0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56},
{0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55},
{0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58},
{0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56},
{0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55},
{0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56},
{0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55},
{0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56},
{0x1.ea00027edc00cp-1, -0x1.c848309459811p-55},
{0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55},
{0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55},
{0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59},
{0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58},
{0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55},
{0x1.0200004292367p+0, 0x1.b7ff365324681p-54},
{0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55},
{0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58},
{0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54},
{0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55},
{0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54},
{0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54},
{0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54},
{0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55},
{0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55},
{0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56},
{0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54},
{0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56},
{0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54},
{0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56},
{0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54},
{0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56},
{0x1.460000d387cb1p+0, 0x1.20837856599a6p-55},
{0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55},
{0x1.4e000043543f3p+0, -0x1.81125ed175329p-56},
{0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54},
{0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55},
{0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55},
{0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54},
# endif
},
#endif /* !HAVE_FAST_FMA */
};
81 changes: 81 additions & 0 deletions libc/AOR_v20.02/math/log2f.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Single-precision log2 function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <math.h>
#include <stdint.h>
#include "math_config.h"

/*
LOG2F_TABLE_BITS = 4
LOG2F_POLY_ORDER = 4
ULP error: 0.752 (nearest rounding.)
Relative error: 1.9 * 2^-26 (before rounding.)
*/

#define N (1 << LOG2F_TABLE_BITS)
#define T __log2f_data.tab
#define A __log2f_data.poly
#define OFF 0x3f330000

float
log2f (float x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, p, y, y0, invc, logc;
uint32_t ix, iz, top, tmp;
int k, i;

ix = asuint (x);
#if WANT_ROUNDING
/* Fix sign of zero with downward rounding when x==1. */
if (unlikely (ix == 0x3f800000))
return 0;
#endif
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
{
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof (1);
if (ix == 0x7f800000) /* log2(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf (x);
/* x is subnormal, normalize it. */
ix = asuint (x * 0x1p23f);
ix -= 23 << 23;
}

/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t) tmp >> 23; /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);

/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t) k;

/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
p = A[3] * r + y0;
y = y * r2 + p;
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (log2f, __log2f_finite)
hidden_alias (log2f, __ieee754_log2f)
#endif
34 changes: 34 additions & 0 deletions libc/AOR_v20.02/math/log2f_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Data definition for log2f.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "math_config.h"

const struct log2f_data __log2f_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 },
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 },
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 },
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 },
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 },
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 },
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 },
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 },
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 },
{ 0x1p+0, 0x0p+0 },
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 },
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 },
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 },
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 },
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 },
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 },
},
.poly = {
-0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1,
0x1.715475f35c8b8p0,
}
};
512 changes: 512 additions & 0 deletions libc/AOR_v20.02/math/log_data.c

Large diffs are not rendered by default.

80 changes: 80 additions & 0 deletions libc/AOR_v20.02/math/logf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Single-precision log function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <math.h>
#include <stdint.h>
#include "math_config.h"

/*
LOGF_TABLE_BITS = 4
LOGF_POLY_ORDER = 4
ULP error: 0.818 (nearest rounding.)
Relative error: 1.957 * 2^-26 (before rounding.)
*/

#define T __logf_data.tab
#define A __logf_data.poly
#define Ln2 __logf_data.ln2
#define N (1 << LOGF_TABLE_BITS)
#define OFF 0x3f330000

float
logf (float x)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, y, y0, invc, logc;
uint32_t ix, iz, tmp;
int k, i;

ix = asuint (x);
#if WANT_ROUNDING
/* Fix sign of zero with downward rounding when x==1. */
if (unlikely (ix == 0x3f800000))
return 0;
#endif
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000))
{
/* x < 0x1p-126 or inf or nan. */
if (ix * 2 == 0)
return __math_divzerof (1);
if (ix == 0x7f800000) /* log(inf) == inf. */
return x;
if ((ix & 0x80000000) || ix * 2 >= 0xff000000)
return __math_invalidf (x);
/* x is subnormal, normalize it. */
ix = asuint (x * 0x1p23f);
ix -= 23 << 23;
}

/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - LOGF_TABLE_BITS)) % N;
k = (int32_t) tmp >> 23; /* arithmetic shift */
iz = ix - (tmp & 0x1ff << 23);
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);

/* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */
r = z * invc - 1;
y0 = logc + (double_t) k * Ln2;

/* Pipelined polynomial evaluation to approximate log1p(r). */
r2 = r * r;
y = A[1] * r + A[2];
y = A[0] * r2 + y;
y = y * r2 + (y0 + r);
return eval_as_float (y);
}
#if USE_GLIBC_ABI
strong_alias (logf, __logf_finite)
hidden_alias (logf, __ieee754_logf)
#endif
34 changes: 34 additions & 0 deletions libc/AOR_v20.02/math/logf_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/*
* Data definition for logf.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "math_config.h"

const struct logf_data __logf_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 },
{ 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 },
{ 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 },
{ 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 },
{ 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 },
{ 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 },
{ 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 },
{ 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 },
{ 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 },
{ 0x1p+0, 0x0p+0 },
{ 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 },
{ 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 },
{ 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 },
{ 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 },
{ 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 },
{ 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 },
},
.ln2 = 0x1.62e42fefa39efp-1,
.poly = {
-0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2,
}
};
420 changes: 420 additions & 0 deletions libc/AOR_v20.02/math/math_config.h

Large diffs are not rendered by default.

81 changes: 81 additions & 0 deletions libc/AOR_v20.02/math/math_err.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Double-precision math error handling.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "math_config.h"

#if WANT_ERRNO
#include <errno.h>
/* NOINLINE reduces code size and avoids making math functions non-leaf
when the error handling is inlined. */
NOINLINE static double
with_errno (double y, int e)
{
errno = e;
return y;
}
#else
#define with_errno(x, e) (x)
#endif

/* NOINLINE reduces code size. */
NOINLINE static double
xflow (uint32_t sign, double y)
{
y = eval_as_double (opt_barrier_double (sign ? -y : y) * y);
return with_errno (y, ERANGE);
}

HIDDEN double
__math_uflow (uint32_t sign)
{
return xflow (sign, 0x1p-767);
}

#if WANT_ERRNO_UFLOW
/* Underflows to zero in some non-nearest rounding mode, setting errno
is valid even if the result is non-zero, but in the subnormal range. */
HIDDEN double
__math_may_uflow (uint32_t sign)
{
return xflow (sign, 0x1.8p-538);
}
#endif

HIDDEN double
__math_oflow (uint32_t sign)
{
return xflow (sign, 0x1p769);
}

HIDDEN double
__math_divzero (uint32_t sign)
{
double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0;
return with_errno (y, ERANGE);
}

HIDDEN double
__math_invalid (double x)
{
double y = (x - x) / (x - x);
return isnan (x) ? y : with_errno (y, EDOM);
}

/* Check result and set errno if necessary. */

HIDDEN double
__math_check_uflow (double y)
{
return y == 0.0 ? with_errno (y, ERANGE) : y;
}

HIDDEN double
__math_check_oflow (double y)
{
return isinf (y) ? with_errno (y, ERANGE) : y;
}
67 changes: 67 additions & 0 deletions libc/AOR_v20.02/math/math_errf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/*
* Single-precision math error handling.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "math_config.h"

#if WANT_ERRNO
#include <errno.h>
/* NOINLINE reduces code size and avoids making math functions non-leaf
when the error handling is inlined. */
NOINLINE static float
with_errnof (float y, int e)
{
errno = e;
return y;
}
#else
#define with_errnof(x, e) (x)
#endif

/* NOINLINE reduces code size. */
NOINLINE static float
xflowf (uint32_t sign, float y)
{
y = eval_as_float (opt_barrier_float (sign ? -y : y) * y);
return with_errnof (y, ERANGE);
}

HIDDEN float
__math_uflowf (uint32_t sign)
{
return xflowf (sign, 0x1p-95f);
}

#if WANT_ERRNO_UFLOW
/* Underflows to zero in some non-nearest rounding mode, setting errno
is valid even if the result is non-zero, but in the subnormal range. */
HIDDEN float
__math_may_uflowf (uint32_t sign)
{
return xflowf (sign, 0x1.4p-75f);
}
#endif

HIDDEN float
__math_oflowf (uint32_t sign)
{
return xflowf (sign, 0x1p97f);
}

HIDDEN float
__math_divzerof (uint32_t sign)
{
float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f;
return with_errnof (y, ERANGE);
}

HIDDEN float
__math_invalidf (float x)
{
float y = (x - x) / (x - x);
return isnan (x) ? y : with_errnof (y, EDOM);
}
381 changes: 381 additions & 0 deletions libc/AOR_v20.02/math/pow.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,381 @@
/*
* Double-precision x^y function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <float.h>
#include <math.h>
#include <stdint.h>
#include "math_config.h"

/*
Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53)
relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma)
ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma)
*/

#define T __pow_log_data.tab
#define A __pow_log_data.poly
#define Ln2hi __pow_log_data.ln2hi
#define Ln2lo __pow_log_data.ln2lo
#define N (1 << POW_LOG_TABLE_BITS)
#define OFF 0x3fe6955500000000

/* Top 12 bits of a double (sign and exponent bits). */
static inline uint32_t
top12 (double x)
{
return asuint64 (x) >> 52;
}

/* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about
additional 15 bits precision. IX is the bit representation of x, but
normalized in the subnormal range using the sign bit for the exponent. */
static inline double_t
log_inline (uint64_t ix, double_t *tail)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p;
uint64_t iz, tmp;
int k, i;

/* x = 2^k z; where z is in range [OFF,2*OFF) and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N;
k = (int64_t) tmp >> 52; /* arithmetic shift */
iz = ix - (tmp & 0xfffULL << 52);
z = asdouble (iz);
kd = (double_t) k;

/* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */
invc = T[i].invc;
logc = T[i].logc;
logctail = T[i].logctail;

/* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and
|z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */
#if HAVE_FAST_FMA
r = fma (z, invc, -1.0);
#else
/* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */
double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32));
double_t zlo = z - zhi;
double_t rhi = zhi * invc - 1.0;
double_t rlo = zlo * invc;
r = rhi + rlo;
#endif

/* k*Ln2 + log(c) + r. */
t1 = kd * Ln2hi + logc;
t2 = t1 + r;
lo1 = kd * Ln2lo + logctail;
lo2 = t1 - t2 + r;

/* Evaluation is optimized assuming superscalar pipelined execution. */
double_t ar, ar2, ar3, lo3, lo4;
ar = A[0] * r; /* A[0] = -0.5. */
ar2 = r * ar;
ar3 = r * ar2;
/* k*Ln2 + log(c) + r + A[0]*r*r. */
#if HAVE_FAST_FMA
hi = t2 + ar2;
lo3 = fma (ar, r, -ar2);
lo4 = t2 - hi + ar2;
#else
double_t arhi = A[0] * rhi;
double_t arhi2 = rhi * arhi;
hi = t2 + arhi2;
lo3 = rlo * (ar + arhi);
lo4 = t2 - hi + arhi2;
#endif
/* p = log1p(r) - r - A[0]*r*r. */
#if POW_LOG_POLY_ORDER == 8
p = (ar3
* (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6]))));
#endif
lo = lo1 + lo2 + lo3 + lo4 + p;
y = hi + lo;
*tail = hi - y + lo;
return y;
}

#undef N
#undef T
#define N (1 << EXP_TABLE_BITS)
#define InvLn2N __exp_data.invln2N
#define NegLn2hiN __exp_data.negln2hiN
#define NegLn2loN __exp_data.negln2loN
#define Shift __exp_data.shift
#define T __exp_data.tab
#define C2 __exp_data.poly[5 - EXP_POLY_ORDER]
#define C3 __exp_data.poly[6 - EXP_POLY_ORDER]
#define C4 __exp_data.poly[7 - EXP_POLY_ORDER]
#define C5 __exp_data.poly[8 - EXP_POLY_ORDER]
#define C6 __exp_data.poly[9 - EXP_POLY_ORDER]

/* Handle cases that may overflow or underflow when computing the result that
is scale*(1+TMP) without intermediate rounding. The bit representation of
scale is in SBITS, however it has a computed exponent that may have
overflown into the sign bit so that needs to be adjusted before using it as
a double. (int32_t)KI is the k used in the argument reduction and exponent
adjustment of scale, positive k here means the result may overflow and
negative k means the result may underflow. */
static inline double
specialcase (double_t tmp, uint64_t sbits, uint64_t ki)
{
double_t scale, y;

if ((ki & 0x80000000) == 0)
{
/* k > 0, the exponent of scale might have overflowed by <= 460. */
sbits -= 1009ull << 52;
scale = asdouble (sbits);
y = 0x1p1009 * (scale + scale * tmp);
return check_oflow (eval_as_double (y));
}
/* k < 0, need special care in the subnormal range. */
sbits += 1022ull << 52;
/* Note: sbits is signed scale. */
scale = asdouble (sbits);
y = scale + scale * tmp;
if (fabs (y) < 1.0)
{
/* Round y to the right precision before scaling it into the subnormal
range to avoid double rounding that can cause 0.5+E/2 ulp error where
E is the worst-case ulp error outside the subnormal range. So this
is only useful if the goal is better than 1 ulp worst-case error. */
double_t hi, lo, one = 1.0;
if (y < 0.0)
one = -1.0;
lo = scale - y + scale * tmp;
hi = one + y;
lo = one - hi + y + lo;
y = eval_as_double (hi + lo) - one;
/* Fix the sign of 0. */
if (y == 0.0)
y = asdouble (sbits & 0x8000000000000000);
/* The underflow exception needs to be signaled explicitly. */
force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022);
}
y = 0x1p-1022 * y;
return check_uflow (eval_as_double (y));
}

#define SIGN_BIAS (0x800 << EXP_TABLE_BITS)

/* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|.
The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */
static inline double
exp_inline (double_t x, double_t xtail, uint32_t sign_bias)
{
uint32_t abstop;
uint64_t ki, idx, top, sbits;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, scale, tail, tmp;

abstop = top12 (x) & 0x7ff;
if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54)))
{
if (abstop - top12 (0x1p-54) >= 0x80000000)
{
/* Avoid spurious underflow for tiny x. */
/* Note: 0 is common input. */
double_t one = WANT_ROUNDING ? 1.0 + x : 1.0;
return sign_bias ? -one : one;
}
if (abstop >= top12 (1024.0))
{
/* Note: inf and nan are already handled. */
if (asuint64 (x) >> 63)
return __math_uflow (sign_bias);
else
return __math_oflow (sign_bias);
}
/* Large x is special cased below. */
abstop = 0;
}

/* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */
/* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */
z = InvLn2N * x;
#if TOINT_INTRINSICS
kd = roundtoint (z);
ki = converttoint (z);
#elif EXP_USE_TOINT_NARROW
/* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd) >> 16;
kd = (double_t) (int32_t) ki;
#else
/* z - kd is in [-1, 1] in non-nearest rounding modes. */
kd = eval_as_double (z + Shift);
ki = asuint64 (kd);
kd -= Shift;
#endif
r = x + kd * NegLn2hiN + kd * NegLn2loN;
/* The code assumes 2^-200 < |xtail| < 2^-8/N. */
r += xtail;
/* 2^(k/N) ~= scale * (1 + tail). */
idx = 2 * (ki % N);
top = (ki + sign_bias) << (52 - EXP_TABLE_BITS);
tail = asdouble (T[idx]);
/* This is only a valid scale when -1023*N < k < 1024*N. */
sbits = T[idx + 1] + top;
/* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */
/* Evaluation is optimized assuming superscalar pipelined execution. */
r2 = r * r;
/* Without fma the worst case error is 0.25/N ulp larger. */
/* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */
#if EXP_POLY_ORDER == 4
tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4);
#elif EXP_POLY_ORDER == 5
tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
#elif EXP_POLY_ORDER == 6
tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6);
#endif
if (unlikely (abstop == 0))
return specialcase (tmp, sbits, ki);
scale = asdouble (sbits);
/* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there
is no spurious underflow here even without fma. */
return eval_as_double (scale + scale * tmp);
}

/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int
checkint (uint64_t iy)
{
int e = iy >> 52 & 0x7ff;
if (e < 0x3ff)
return 0;
if (e > 0x3ff + 52)
return 2;
if (iy & ((1ULL << (0x3ff + 52 - e)) - 1))
return 0;
if (iy & (1ULL << (0x3ff + 52 - e)))
return 1;
return 2;
}

/* Returns 1 if input is the bit representation of 0, infinity or nan. */
static inline int
zeroinfnan (uint64_t i)
{
return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1;
}

double
pow (double x, double y)
{
uint32_t sign_bias = 0;
uint64_t ix, iy;
uint32_t topx, topy;

ix = asuint64 (x);
iy = asuint64 (y);
topx = top12 (x);
topy = top12 (y);
if (unlikely (topx - 0x001 >= 0x7ff - 0x001
|| (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be))
{
/* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0
and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */
/* Special cases: (x < 0x1p-126 or inf or nan) or
(|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */
if (unlikely (zeroinfnan (iy)))
{
if (2 * iy == 0)
return issignaling_inline (x) ? x + y : 1.0;
if (ix == asuint64 (1.0))
return issignaling_inline (y) ? x + y : 1.0;
if (2 * ix > 2 * asuint64 (INFINITY)
|| 2 * iy > 2 * asuint64 (INFINITY))
return x + y;
if (2 * ix == 2 * asuint64 (1.0))
return 1.0;
if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63))
return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (unlikely (zeroinfnan (ix)))
{
double_t x2 = x * x;
if (ix >> 63 && checkint (iy) == 1)
{
x2 = -x2;
sign_bias = 1;
}
if (WANT_ERRNO && 2 * ix == 0 && iy >> 63)
return __math_divzero (sign_bias);
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy >> 63 ? opt_barrier_double (1 / x2) : x2;
}
/* Here x and y are non-zero finite. */
if (ix >> 63)
{
/* Finite x < 0. */
int yint = checkint (iy);
if (yint == 0)
return __math_invalid (x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffffffffffff;
topx &= 0x7ff;
}
if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)
{
/* Note: sign_bias == 0 here because y is not odd. */
if (ix == asuint64 (1.0))
return 1.0;
if ((topy & 0x7ff) < 0x3be)
{
/* |y| < 2^-65, x^y ~= 1 + y*log(x). */
if (WANT_ROUNDING)
return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y;
else
return 1.0;
}
return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0)
: __math_uflow (0);
}
if (topx == 0)
{
/* Normalize subnormal x so exponent becomes negative. */
/* Without the barrier some versions of clang evalutate the mul
unconditionally causing spurious overflow exceptions. */
ix = asuint64 (opt_barrier_double (x) * 0x1p52);
ix &= 0x7fffffffffffffff;
ix -= 52ULL << 52;
}
}

double_t lo;
double_t hi = log_inline (ix, &lo);
double_t ehi, elo;
#if HAVE_FAST_FMA
ehi = y * hi;
elo = y * lo + fma (y, hi, -ehi);
#else
double_t yhi = asdouble (iy & -1ULL << 27);
double_t ylo = y - yhi;
double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27);
double_t llo = hi - lhi + lo;
ehi = yhi * lhi;
elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */
#endif
return exp_inline (ehi, elo, sign_bias);
}
#if USE_GLIBC_ABI
strong_alias (pow, __pow_finite)
hidden_alias (pow, __ieee754_pow)
# if LDBL_MANT_DIG == 53
long double powl (long double x, long double y) { return pow (x, y); }
# endif
#endif
185 changes: 185 additions & 0 deletions libc/AOR_v20.02/math/pow_log_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,185 @@
/*
* Data for the log part of pow.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "math_config.h"

#define N (1 << POW_LOG_TABLE_BITS)

const struct pow_log_data __pow_log_data = {
.ln2hi = 0x1.62e42fefa3800p-1,
.ln2lo = 0x1.ef35793c76730p-45,
.poly = {
#if N == 128 && POW_LOG_POLY_ORDER == 8
// relative error: 0x1.11922ap-70
// in -0x1.6bp-8 0x1.6bp-8
// Coefficients are scaled to match the scaling during evaluation.
-0x1p-1,
0x1.555555555556p-2 * -2,
-0x1.0000000000006p-2 * -2,
0x1.999999959554ep-3 * 4,
-0x1.555555529a47ap-3 * 4,
0x1.2495b9b4845e9p-3 * -8,
-0x1.0002b8b263fc3p-3 * -8,
#endif
},
/* Algorithm:
x = 2^k z
log(x) = k ln2 + log(c) + log(z/c)
log(z/c) = poly(z/c - 1)
where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals
and z falls into the ith one, then table entries are computed as
tab[i].invc = 1/c
tab[i].logc = round(0x1p43*log(c))/0x1p43
tab[i].logctail = (double)(log(c) - logc)
where c is chosen near the center of the subinterval such that 1/c has only a
few precision bits so z/c - 1 is exactly representible as double:
1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2
Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97,
the last few bits of logc are rounded away so k*ln2hi + logc has no rounding
error and the interval for z is selected such that near x == 1, where log(x)
is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */
.tab = {
#if N == 128
#define A(a, b, c) {a, 0, b, c},
A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48)
A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46)
A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45)
A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49)
A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47)
A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46)
A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50)
A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45)
A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45)
A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45)
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46)
A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46)
A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46)
A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46)
A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45)
A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47)
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48)
A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47)
A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45)
A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46)
A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45)
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45)
A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46)
A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52)
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45)
A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45)
A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45)
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45)
A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46)
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46)
A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45)
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46)
A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48)
A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45)
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45)
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47)
A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45)
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45)
A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46)
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45)
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46)
A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45)
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46)
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45)
A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46)
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45)
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46)
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45)
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48)
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45)
A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45)
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50)
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46)
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0)
A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46)
A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45)
A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45)
A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47)
A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45)
A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46)
A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46)
A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47)
A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45)
A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45)
A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45)
A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49)
A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45)
A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46)
A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45)
A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45)
A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45)
A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45)
A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45)
A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47)
A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51)
A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45)
A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45)
A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46)
A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45)
A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46)
A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47)
A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47)
A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45)
A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47)
A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45)
A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48)
A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45)
A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51)
A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51)
A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46)
A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48)
A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45)
A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45)
A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45)
A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45)
A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47)
A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45)
A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45)
A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46)
A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46)
A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47)
A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45)
A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45)
A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45)
A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46)
A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47)
#endif
},
};
222 changes: 222 additions & 0 deletions libc/AOR_v20.02/math/powf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
/*
* Single-precision pow function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <math.h>
#include <stdint.h>
#include "math_config.h"

/*
POWF_LOG2_POLY_ORDER = 5
EXP2F_TABLE_BITS = 5
ULP error: 0.82 (~ 0.5 + relerr*2^24)
relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2)
relerr_log2: 1.83 * 2^-33 (Relative error of logx.)
relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).)
*/

#define N (1 << POWF_LOG2_TABLE_BITS)
#define T __powf_log2_data.tab
#define A __powf_log2_data.poly
#define OFF 0x3f330000

/* Subnormal input is normalized so ix has negative biased exponent.
Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */
static inline double_t
log2_inline (uint32_t ix)
{
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t z, r, r2, r4, p, q, y, y0, invc, logc;
uint32_t iz, top, tmp;
int k, i;

/* x = 2^k z; where z is in range [OFF,2*OFF] and exact.
The range is split into N subintervals.
The ith subinterval contains z and c is near its center. */
tmp = ix - OFF;
i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N;
top = tmp & 0xff800000;
iz = ix - top;
k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */
invc = T[i].invc;
logc = T[i].logc;
z = (double_t) asfloat (iz);

/* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */
r = z * invc - 1;
y0 = logc + (double_t) k;

/* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */
r2 = r * r;
y = A[0] * r + A[1];
p = A[2] * r + A[3];
r4 = r2 * r2;
q = A[4] * r + y0;
q = p * r2 + q;
y = y * r4 + q;
return y;
}

#undef N
#undef T
#define N (1 << EXP2F_TABLE_BITS)
#define T __exp2f_data.tab
#define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11))

/* The output of log2 and thus the input of exp2 is either scaled by N
(in case of fast toint intrinsics) or not. The unscaled xd must be
in [-1021,1023], sign_bias sets the sign of the result. */
static inline float
exp2_inline (double_t xd, uint32_t sign_bias)
{
uint64_t ki, ski, t;
/* double_t for better performance on targets with FLT_EVAL_METHOD==2. */
double_t kd, z, r, r2, y, s;

#if TOINT_INTRINSICS
# define C __exp2f_data.poly_scaled
/* N*x = k + r with r in [-1/2, 1/2] */
kd = roundtoint (xd); /* k */
ki = converttoint (xd);
#else
# define C __exp2f_data.poly
# define SHIFT __exp2f_data.shift_scaled
/* x = k/N + r with r in [-1/(2N), 1/(2N)] */
kd = eval_as_double (xd + SHIFT);
ki = asuint64 (kd);
kd -= SHIFT; /* k/N */
#endif
r = xd - kd;

/* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */
t = T[ki % N];
ski = ki + sign_bias;
t += ski << (52 - EXP2F_TABLE_BITS);
s = asdouble (t);
z = C[0] * r + C[1];
r2 = r * r;
y = C[2] * r + 1;
y = z * r2 + y;
y = y * s;
return eval_as_float (y);
}

/* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is
the bit representation of a non-zero finite floating-point value. */
static inline int
checkint (uint32_t iy)
{
int e = iy >> 23 & 0xff;
if (e < 0x7f)
return 0;
if (e > 0x7f + 23)
return 2;
if (iy & ((1 << (0x7f + 23 - e)) - 1))
return 0;
if (iy & (1 << (0x7f + 23 - e)))
return 1;
return 2;
}

static inline int
zeroinfnan (uint32_t ix)
{
return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
}

float
powf (float x, float y)
{
uint32_t sign_bias = 0;
uint32_t ix, iy;

ix = asuint (x);
iy = asuint (y);
if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy)))
{
/* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */
if (unlikely (zeroinfnan (iy)))
{
if (2 * iy == 0)
return issignalingf_inline (x) ? x + y : 1.0f;
if (ix == 0x3f800000)
return issignalingf_inline (y) ? x + y : 1.0f;
if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
return x + y;
if (2 * ix == 2 * 0x3f800000)
return 1.0f;
if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */
return y * y;
}
if (unlikely (zeroinfnan (ix)))
{
float_t x2 = x * x;
if (ix & 0x80000000 && checkint (iy) == 1)
{
x2 = -x2;
sign_bias = 1;
}
#if WANT_ERRNO
if (2 * ix == 0 && iy & 0x80000000)
return __math_divzerof (sign_bias);
#endif
/* Without the barrier some versions of clang hoist the 1/x2 and
thus division by zero exception can be signaled spuriously. */
return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2;
}
/* x and y are non-zero finite. */
if (ix & 0x80000000)
{
/* Finite x < 0. */
int yint = checkint (iy);
if (yint == 0)
return __math_invalidf (x);
if (yint == 1)
sign_bias = SIGN_BIAS;
ix &= 0x7fffffff;
}
if (ix < 0x00800000)
{
/* Normalize subnormal x so exponent becomes negative. */
ix = asuint (x * 0x1p23f);
ix &= 0x7fffffff;
ix -= 23 << 23;
}
}
double_t logx = log2_inline (ix);
double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec. */
if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff)
>= asuint64 (126.0 * POWF_SCALE) >> 47))
{
/* |y*log(x)| >= 126. */
if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE)
/* |x^y| > 0x1.ffffffp127. */
return __math_oflowf (sign_bias);
if (WANT_ROUNDING && WANT_ERRNO
&& ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE)
/* |x^y| > 0x1.fffffep127, check if we round away from 0. */
if ((!sign_bias
&& eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f)
|| (sign_bias
&& eval_as_float (-1.0f - opt_barrier_float (0x1p-25f))
!= -1.0f))
return __math_oflowf (sign_bias);
if (ylogx <= -150.0 * POWF_SCALE)
return __math_uflowf (sign_bias);
#if WANT_ERRNO_UFLOW
if (ylogx < -149.0 * POWF_SCALE)
return __math_may_uflowf (sign_bias);
#endif
}
return exp2_inline (ylogx, sign_bias);
}
#if USE_GLIBC_ABI
strong_alias (powf, __powf_finite)
hidden_alias (powf, __ieee754_powf)
#endif
35 changes: 35 additions & 0 deletions libc/AOR_v20.02/math/powf_log2_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Data definition for powf.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include "math_config.h"

const struct powf_log2_data __powf_log2_data = {
.tab = {
{ 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE },
{ 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE },
{ 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE },
{ 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE },
{ 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE },
{ 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE },
{ 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE },
{ 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE },
{ 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE },
{ 0x1p+0, 0x0p+0 * POWF_SCALE },
{ 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE },
{ 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE },
{ 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE },
{ 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE },
{ 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE },
{ 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE },
},
.poly = {
0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE,
0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE,
0x1.71547652ab82bp0 * POWF_SCALE,
}
};
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_cos.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_cos.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_cosf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_cosf.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_exp.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_exp.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_exp2f.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_exp2f.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_exp2f_1u.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_exp2f_1u.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_expf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_expf.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_expf_1u.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_expf_1u.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_log.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_log.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_logf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_logf.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_pow.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_pow.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_powf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_powf.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_sin.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_sin.c"
7 changes: 7 additions & 0 deletions libc/AOR_v20.02/math/s_sinf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
/*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/
#define SCALAR 1
#include "v_sinf.c"
80 changes: 80 additions & 0 deletions libc/AOR_v20.02/math/sincosf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Single-precision sin/cos function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <stdint.h>
#include <math.h>
#include "math_config.h"
#include "sincosf.h"

/* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative
error is 0.5303 * 2^-23. A single-step range reduction is used for
small values. Large inputs have their range reduced using fast integer
arithmetic. */
void
sincosf (float y, float *sinp, float *cosp)
{
double x = y;
double s;
int n;
const sincos_t *p = &__sincosf_table[0];

if (abstop12 (y) < abstop12 (pio4))
{
double x2 = x * x;

if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
{
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
force_eval_float (x2);
*sinp = y;
*cosp = 1.0f;
return;
}

sincosf_poly (x, x2, p, 0, sinp, cosp);
}
else if (abstop12 (y) < abstop12 (120.0f))
{
x = reduce_fast (x, p, &n);

/* Setup the signs for sin and cos. */
s = p->sign[n & 3];

if (n & 2)
p = &__sincosf_table[1];

sincosf_poly (x * s, x * x, p, n, sinp, cosp);
}
else if (likely (abstop12 (y) < abstop12 (INFINITY)))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;

x = reduce_large (xi, &n);

/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];

if ((n + sign) & 2)
p = &__sincosf_table[1];

sincosf_poly (x * s, x * x, p, n, sinp, cosp);
}
else
{
/* Return NaN if Inf or NaN for both sin and cos. */
*sinp = *cosp = y - y;
#if WANT_ERRNO
/* Needed to set errno for +-Inf, the add is a hack to work
around a gcc register allocation issue: just passing y
affects code generation in the fast path. */
__math_invalidf (y + y);
#endif
}
}
154 changes: 154 additions & 0 deletions libc/AOR_v20.02/math/sincosf.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
/*
* Header for sinf, cosf and sincosf.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <stdint.h>
#include <math.h>
#include "math_config.h"

/* 2PI * 2^-64. */
static const double pi63 = 0x1.921FB54442D18p-62;
/* PI / 4. */
static const double pio4 = 0x1.921FB54442D18p-1;

/* The constants and polynomials for sine and cosine. */
typedef struct
{
double sign[4]; /* Sign of sine in quadrants 0..3. */
double hpi_inv; /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS). */
double hpi; /* PI / 2. */
double c0, c1, c2, c3, c4; /* Cosine polynomial. */
double s1, s2, s3; /* Sine polynomial. */
} sincos_t;

/* Polynomial data (the cosine polynomial is negated in the 2nd entry). */
extern const sincos_t __sincosf_table[2] HIDDEN;

/* Table with 4/PI to 192 bit precision. */
extern const uint32_t __inv_pio4[] HIDDEN;

/* Top 12 bits of the float representation with the sign bit cleared. */
static inline uint32_t
abstop12 (float x)
{
return (asuint (x) >> 20) & 0x7ff;
}

/* Compute the sine and cosine of inputs X and X2 (X squared), using the
polynomial P and store the results in SINP and COSP. N is the quadrant,
if odd the cosine and sine polynomials are swapped. */
static inline void
sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp,
float *cosp)
{
double x3, x4, x5, x6, s, c, c1, c2, s1;

x4 = x2 * x2;
x3 = x2 * x;
c2 = p->c3 + x2 * p->c4;
s1 = p->s2 + x2 * p->s3;

/* Swap sin/cos result based on quadrant. */
float *tmp = (n & 1 ? cosp : sinp);
cosp = (n & 1 ? sinp : cosp);
sinp = tmp;

c1 = p->c0 + x2 * p->c1;
x5 = x3 * x2;
x6 = x4 * x2;

s = x + x3 * p->s1;
c = c1 + x4 * p->c2;

*sinp = s + x5 * s1;
*cosp = c + x6 * c2;
}

/* Return the sine of inputs X and X2 (X squared) using the polynomial P.
N is the quadrant, and if odd the cosine polynomial is used. */
static inline float
sinf_poly (double x, double x2, const sincos_t *p, int n)
{
double x3, x4, x6, x7, s, c, c1, c2, s1;

if ((n & 1) == 0)
{
x3 = x * x2;
s1 = p->s2 + x2 * p->s3;

x7 = x3 * x2;
s = x + x3 * p->s1;

return s + x7 * s1;
}
else
{
x4 = x2 * x2;
c2 = p->c3 + x2 * p->c4;
c1 = p->c0 + x2 * p->c1;

x6 = x4 * x2;
c = c1 + x4 * p->c2;

return c + x6 * c2;
}
}

/* Fast range reduction using single multiply-subtract. Return the modulo of
X as a value between -PI/4 and PI/4 and store the quadrant in NP.
The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double
is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4,
the result is accurate for |X| <= 120.0. */
static inline double
reduce_fast (double x, const sincos_t *p, int *np)
{
double r;
#if TOINT_INTRINSICS
/* Use fast round and lround instructions when available. */
r = x * p->hpi_inv;
*np = converttoint (r);
return x - roundtoint (r) * p->hpi;
#else
/* Use scaled float to int conversion with explicit rounding.
hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31.
This avoids inaccuracies introduced by truncating negative values. */
r = x * p->hpi_inv;
int n = ((int32_t)r + 0x800000) >> 24;
*np = n;
return x - n * p->hpi;
#endif
}

/* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic.
XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored).
Return the modulo between -PI/4 and PI/4 and store the quadrant in NP.
Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit
multiply computes the exact 2.62-bit fixed-point modulo. Since the result
can have at most 29 leading zeros after the binary point, the double
precision result is accurate to 33 bits. */
static inline double
reduce_large (uint32_t xi, int *np)
{
const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15];
int shift = (xi >> 23) & 7;
uint64_t n, res0, res1, res2;

xi = (xi & 0xffffff) | 0x800000;
xi <<= shift;

res0 = xi * arr[0];
res1 = (uint64_t)xi * arr[4];
res2 = (uint64_t)xi * arr[8];
res0 = (res2 >> 32) | (res0 << 32);
res0 += res1;

n = (res0 + (1ULL << 61)) >> 62;
res0 -= n << 62;
double x = (int64_t)res0;
*np = n;
return x * pi63;
}
64 changes: 64 additions & 0 deletions libc/AOR_v20.02/math/sincosf_data.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Data definition for sinf, cosf and sincosf.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <stdint.h>
#include <math.h>
#include "math_config.h"
#include "sincosf.h"

/* The constants and polynomials for sine and cosine. The 2nd entry
computes -cos (x) rather than cos (x) to get negation for free. */
const sincos_t __sincosf_table[2] =
{
{
{ 1.0, -1.0, -1.0, 1.0 },
#if TOINT_INTRINSICS
0x1.45F306DC9C883p-1,
#else
0x1.45F306DC9C883p+23,
#endif
0x1.921FB54442D18p0,
0x1p0,
-0x1.ffffffd0c621cp-2,
0x1.55553e1068f19p-5,
-0x1.6c087e89a359dp-10,
0x1.99343027bf8c3p-16,
-0x1.555545995a603p-3,
0x1.1107605230bc4p-7,
-0x1.994eb3774cf24p-13
},
{
{ 1.0, -1.0, -1.0, 1.0 },
#if TOINT_INTRINSICS
0x1.45F306DC9C883p-1,
#else
0x1.45F306DC9C883p+23,
#endif
0x1.921FB54442D18p0,
-0x1p0,
0x1.ffffffd0c621cp-2,
-0x1.55553e1068f19p-5,
0x1.6c087e89a359dp-10,
-0x1.99343027bf8c3p-16,
-0x1.555545995a603p-3,
0x1.1107605230bc4p-7,
-0x1.994eb3774cf24p-13
}
};

/* Table with 4/PI to 192 bit precision. To avoid unaligned accesses
only 8 new bits are added per entry, making the table 4 times larger. */
const uint32_t __inv_pio4[24] =
{
0xa2, 0xa2f9, 0xa2f983, 0xa2f9836e,
0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529,
0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1,
0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0,
0x34ddc0db, 0xddc0db62, 0xc0db6295, 0xdb629599,
0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041
};
68 changes: 68 additions & 0 deletions libc/AOR_v20.02/math/sinf.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* Single-precision sin function.
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
*/

#include <math.h>
#include "math_config.h"
#include "sincosf.h"

/* Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative
error is 0.5303 * 2^-23. A single-step range reduction is used for
small values. Large inputs have their range reduced using fast integer
arithmetic. */
float
sinf (float y)
{
double x = y;
double s;
int n;
const sincos_t *p = &__sincosf_table[0];

if (abstop12 (y) < abstop12 (pio4))
{
s = x * x;

if (unlikely (abstop12 (y) < abstop12 (0x1p-12f)))
{
if (unlikely (abstop12 (y) < abstop12 (0x1p-126f)))
/* Force underflow for tiny y. */
force_eval_float (s);
return y;
}

return sinf_poly (x, s, p, 0);
}
else if (likely (abstop12 (y) < abstop12 (120.0f)))
{
x = reduce_fast (x, p, &n);

/* Setup the signs for sin and cos. */
s = p->sign[n & 3];

if (n & 2)
p = &__sincosf_table[1];

return sinf_poly (x * s, x * x, p, n);
}
else if (abstop12 (y) < abstop12 (INFINITY))
{
uint32_t xi = asuint (y);
int sign = xi >> 31;

x = reduce_large (xi, &n);

/* Setup signs for sin and cos - include original sign. */
s = p->sign[(n + sign) & 3];

if ((n + sign) & 2)
p = &__sincosf_table[1];

return sinf_poly (x * s, x * x, p, n);
}
else
return __math_invalidf (y);
}
Loading