| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,177 @@ | ||
| /* | ||
| * Double-precision e^x function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <float.h> | ||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| #define N (1 << EXP_TABLE_BITS) | ||
| #define InvLn2N __exp_data.invln2N | ||
| #define NegLn2hiN __exp_data.negln2hiN | ||
| #define NegLn2loN __exp_data.negln2loN | ||
| #define Shift __exp_data.shift | ||
| #define T __exp_data.tab | ||
| #define C2 __exp_data.poly[5 - EXP_POLY_ORDER] | ||
| #define C3 __exp_data.poly[6 - EXP_POLY_ORDER] | ||
| #define C4 __exp_data.poly[7 - EXP_POLY_ORDER] | ||
| #define C5 __exp_data.poly[8 - EXP_POLY_ORDER] | ||
| #define C6 __exp_data.poly[9 - EXP_POLY_ORDER] | ||
|
|
||
| /* Handle cases that may overflow or underflow when computing the result that | ||
| is scale*(1+TMP) without intermediate rounding. The bit representation of | ||
| scale is in SBITS, however it has a computed exponent that may have | ||
| overflown into the sign bit so that needs to be adjusted before using it as | ||
| a double. (int32_t)KI is the k used in the argument reduction and exponent | ||
| adjustment of scale, positive k here means the result may overflow and | ||
| negative k means the result may underflow. */ | ||
| static inline double | ||
| specialcase (double_t tmp, uint64_t sbits, uint64_t ki) | ||
| { | ||
| double_t scale, y; | ||
|
|
||
| if ((ki & 0x80000000) == 0) | ||
| { | ||
| /* k > 0, the exponent of scale might have overflowed by <= 460. */ | ||
| sbits -= 1009ull << 52; | ||
| scale = asdouble (sbits); | ||
| y = 0x1p1009 * (scale + scale * tmp); | ||
| return check_oflow (eval_as_double (y)); | ||
| } | ||
| /* k < 0, need special care in the subnormal range. */ | ||
| sbits += 1022ull << 52; | ||
| scale = asdouble (sbits); | ||
| y = scale + scale * tmp; | ||
| if (y < 1.0) | ||
| { | ||
| /* Round y to the right precision before scaling it into the subnormal | ||
| range to avoid double rounding that can cause 0.5+E/2 ulp error where | ||
| E is the worst-case ulp error outside the subnormal range. So this | ||
| is only useful if the goal is better than 1 ulp worst-case error. */ | ||
| double_t hi, lo; | ||
| lo = scale - y + scale * tmp; | ||
| hi = 1.0 + y; | ||
| lo = 1.0 - hi + y + lo; | ||
| y = eval_as_double (hi + lo) - 1.0; | ||
| /* Avoid -0.0 with downward rounding. */ | ||
| if (WANT_ROUNDING && y == 0.0) | ||
| y = 0.0; | ||
| /* The underflow exception needs to be signaled explicitly. */ | ||
| force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022); | ||
| } | ||
| y = 0x1p-1022 * y; | ||
| return check_uflow (eval_as_double (y)); | ||
| } | ||
|
|
||
| /* Top 12 bits of a double (sign and exponent bits). */ | ||
| static inline uint32_t | ||
| top12 (double x) | ||
| { | ||
| return asuint64 (x) >> 52; | ||
| } | ||
|
|
||
| /* Computes exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. | ||
| If hastail is 0 then xtail is assumed to be 0 too. */ | ||
| static inline double | ||
| exp_inline (double x, double xtail, int hastail) | ||
| { | ||
| uint32_t abstop; | ||
| uint64_t ki, idx, top, sbits; | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t kd, z, r, r2, scale, tail, tmp; | ||
|
|
||
| abstop = top12 (x) & 0x7ff; | ||
| if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54))) | ||
| { | ||
| if (abstop - top12 (0x1p-54) >= 0x80000000) | ||
| /* Avoid spurious underflow for tiny x. */ | ||
| /* Note: 0 is common input. */ | ||
| return WANT_ROUNDING ? 1.0 + x : 1.0; | ||
| if (abstop >= top12 (1024.0)) | ||
| { | ||
| if (asuint64 (x) == asuint64 (-INFINITY)) | ||
| return 0.0; | ||
| if (abstop >= top12 (INFINITY)) | ||
| return 1.0 + x; | ||
| if (asuint64 (x) >> 63) | ||
| return __math_uflow (0); | ||
| else | ||
| return __math_oflow (0); | ||
| } | ||
| /* Large x is special cased below. */ | ||
| abstop = 0; | ||
| } | ||
|
|
||
| /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ | ||
| /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ | ||
| z = InvLn2N * x; | ||
| #if TOINT_INTRINSICS | ||
| kd = roundtoint (z); | ||
| ki = converttoint (z); | ||
| #elif EXP_USE_TOINT_NARROW | ||
| /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ | ||
| kd = eval_as_double (z + Shift); | ||
| ki = asuint64 (kd) >> 16; | ||
| kd = (double_t) (int32_t) ki; | ||
| #else | ||
| /* z - kd is in [-1, 1] in non-nearest rounding modes. */ | ||
| kd = eval_as_double (z + Shift); | ||
| ki = asuint64 (kd); | ||
| kd -= Shift; | ||
| #endif | ||
| r = x + kd * NegLn2hiN + kd * NegLn2loN; | ||
| /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ | ||
| if (hastail) | ||
| r += xtail; | ||
| /* 2^(k/N) ~= scale * (1 + tail). */ | ||
| idx = 2 * (ki % N); | ||
| top = ki << (52 - EXP_TABLE_BITS); | ||
| tail = asdouble (T[idx]); | ||
| /* This is only a valid scale when -1023*N < k < 1024*N. */ | ||
| sbits = T[idx + 1] + top; | ||
| /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ | ||
| /* Evaluation is optimized assuming superscalar pipelined execution. */ | ||
| r2 = r * r; | ||
| /* Without fma the worst case error is 0.25/N ulp larger. */ | ||
| /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ | ||
| #if EXP_POLY_ORDER == 4 | ||
| tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4); | ||
| #elif EXP_POLY_ORDER == 5 | ||
| tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); | ||
| #elif EXP_POLY_ORDER == 6 | ||
| tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6); | ||
| #endif | ||
| if (unlikely (abstop == 0)) | ||
| return specialcase (tmp, sbits, ki); | ||
| scale = asdouble (sbits); | ||
| /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there | ||
| is no spurious underflow here even without fma. */ | ||
| return eval_as_double (scale + scale * tmp); | ||
| } | ||
|
|
||
| double | ||
| exp (double x) | ||
| { | ||
| return exp_inline (x, 0, 0); | ||
| } | ||
|
|
||
| /* May be useful for implementing pow where more than double | ||
| precision input is needed. */ | ||
| double | ||
| __exp_dd (double x, double xtail) | ||
| { | ||
| return exp_inline (x, xtail, 1); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (exp, __exp_finite) | ||
| hidden_alias (exp, __ieee754_exp) | ||
| hidden_alias (__exp_dd, __exp1) | ||
| # if LDBL_MANT_DIG == 53 | ||
| long double expl (long double x) { return exp (x); } | ||
| # endif | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,144 @@ | ||
| /* | ||
| * Double-precision 2^x function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <float.h> | ||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| #define N (1 << EXP_TABLE_BITS) | ||
| #define Shift __exp_data.exp2_shift | ||
| #define T __exp_data.tab | ||
| #define C1 __exp_data.exp2_poly[0] | ||
| #define C2 __exp_data.exp2_poly[1] | ||
| #define C3 __exp_data.exp2_poly[2] | ||
| #define C4 __exp_data.exp2_poly[3] | ||
| #define C5 __exp_data.exp2_poly[4] | ||
| #define C6 __exp_data.exp2_poly[5] | ||
|
|
||
| /* Handle cases that may overflow or underflow when computing the result that | ||
| is scale*(1+TMP) without intermediate rounding. The bit representation of | ||
| scale is in SBITS, however it has a computed exponent that may have | ||
| overflown into the sign bit so that needs to be adjusted before using it as | ||
| a double. (int32_t)KI is the k used in the argument reduction and exponent | ||
| adjustment of scale, positive k here means the result may overflow and | ||
| negative k means the result may underflow. */ | ||
| static inline double | ||
| specialcase (double_t tmp, uint64_t sbits, uint64_t ki) | ||
| { | ||
| double_t scale, y; | ||
|
|
||
| if ((ki & 0x80000000) == 0) | ||
| { | ||
| /* k > 0, the exponent of scale might have overflowed by 1. */ | ||
| sbits -= 1ull << 52; | ||
| scale = asdouble (sbits); | ||
| y = 2 * (scale + scale * tmp); | ||
| return check_oflow (eval_as_double (y)); | ||
| } | ||
| /* k < 0, need special care in the subnormal range. */ | ||
| sbits += 1022ull << 52; | ||
| scale = asdouble (sbits); | ||
| y = scale + scale * tmp; | ||
| if (y < 1.0) | ||
| { | ||
| /* Round y to the right precision before scaling it into the subnormal | ||
| range to avoid double rounding that can cause 0.5+E/2 ulp error where | ||
| E is the worst-case ulp error outside the subnormal range. So this | ||
| is only useful if the goal is better than 1 ulp worst-case error. */ | ||
| double_t hi, lo; | ||
| lo = scale - y + scale * tmp; | ||
| hi = 1.0 + y; | ||
| lo = 1.0 - hi + y + lo; | ||
| y = eval_as_double (hi + lo) - 1.0; | ||
| /* Avoid -0.0 with downward rounding. */ | ||
| if (WANT_ROUNDING && y == 0.0) | ||
| y = 0.0; | ||
| /* The underflow exception needs to be signaled explicitly. */ | ||
| force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022); | ||
| } | ||
| y = 0x1p-1022 * y; | ||
| return check_uflow (eval_as_double (y)); | ||
| } | ||
|
|
||
| /* Top 12 bits of a double (sign and exponent bits). */ | ||
| static inline uint32_t | ||
| top12 (double x) | ||
| { | ||
| return asuint64 (x) >> 52; | ||
| } | ||
|
|
||
| double | ||
| exp2 (double x) | ||
| { | ||
| uint32_t abstop; | ||
| uint64_t ki, idx, top, sbits; | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t kd, r, r2, scale, tail, tmp; | ||
|
|
||
| abstop = top12 (x) & 0x7ff; | ||
| if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54))) | ||
| { | ||
| if (abstop - top12 (0x1p-54) >= 0x80000000) | ||
| /* Avoid spurious underflow for tiny x. */ | ||
| /* Note: 0 is common input. */ | ||
| return WANT_ROUNDING ? 1.0 + x : 1.0; | ||
| if (abstop >= top12 (1024.0)) | ||
| { | ||
| if (asuint64 (x) == asuint64 (-INFINITY)) | ||
| return 0.0; | ||
| if (abstop >= top12 (INFINITY)) | ||
| return 1.0 + x; | ||
| if (!(asuint64 (x) >> 63)) | ||
| return __math_oflow (0); | ||
| else if (asuint64 (x) >= asuint64 (-1075.0)) | ||
| return __math_uflow (0); | ||
| } | ||
| if (2 * asuint64 (x) > 2 * asuint64 (928.0)) | ||
| /* Large x is special cased below. */ | ||
| abstop = 0; | ||
| } | ||
|
|
||
| /* exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */ | ||
| /* x = k/N + r, with int k and r in [-1/2N, 1/2N]. */ | ||
| kd = eval_as_double (x + Shift); | ||
| ki = asuint64 (kd); /* k. */ | ||
| kd -= Shift; /* k/N for int k. */ | ||
| r = x - kd; | ||
| /* 2^(k/N) ~= scale * (1 + tail). */ | ||
| idx = 2 * (ki % N); | ||
| top = ki << (52 - EXP_TABLE_BITS); | ||
| tail = asdouble (T[idx]); | ||
| /* This is only a valid scale when -1023*N < k < 1024*N. */ | ||
| sbits = T[idx + 1] + top; | ||
| /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */ | ||
| /* Evaluation is optimized assuming superscalar pipelined execution. */ | ||
| r2 = r * r; | ||
| /* Without fma the worst case error is 0.5/N ulp larger. */ | ||
| /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */ | ||
| #if EXP2_POLY_ORDER == 4 | ||
| tmp = tail + r * C1 + r2 * C2 + r * r2 * (C3 + r * C4); | ||
| #elif EXP2_POLY_ORDER == 5 | ||
| tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); | ||
| #elif EXP2_POLY_ORDER == 6 | ||
| tmp = tail + r * C1 + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6); | ||
| #endif | ||
| if (unlikely (abstop == 0)) | ||
| return specialcase (tmp, sbits, ki); | ||
| scale = asdouble (sbits); | ||
| /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there | ||
| is no spurious underflow here even without fma. */ | ||
| return eval_as_double (scale + scale * tmp); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (exp2, __exp2_finite) | ||
| hidden_alias (exp2, __ieee754_exp2) | ||
| # if LDBL_MANT_DIG == 53 | ||
| long double exp2l (long double x) { return exp2 (x); } | ||
| # endif | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| /* | ||
| * Single-precision 2^x function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| /* | ||
| EXP2F_TABLE_BITS = 5 | ||
| EXP2F_POLY_ORDER = 3 | ||
| ULP error: 0.502 (nearest rounding.) | ||
| Relative error: 1.69 * 2^-34 in [-1/64, 1/64] (before rounding.) | ||
| Wrong count: 168353 (all nearest rounding wrong results with fma.) | ||
| Non-nearest ULP error: 1 (rounded ULP error) | ||
| */ | ||
|
|
||
| #define N (1 << EXP2F_TABLE_BITS) | ||
| #define T __exp2f_data.tab | ||
| #define C __exp2f_data.poly | ||
| #define SHIFT __exp2f_data.shift_scaled | ||
|
|
||
| static inline uint32_t | ||
| top12 (float x) | ||
| { | ||
| return asuint (x) >> 20; | ||
| } | ||
|
|
||
| float | ||
| exp2f (float x) | ||
| { | ||
| uint32_t abstop; | ||
| uint64_t ki, t; | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t kd, xd, z, r, r2, y, s; | ||
|
|
||
| xd = (double_t) x; | ||
| abstop = top12 (x) & 0x7ff; | ||
| if (unlikely (abstop >= top12 (128.0f))) | ||
| { | ||
| /* |x| >= 128 or x is nan. */ | ||
| if (asuint (x) == asuint (-INFINITY)) | ||
| return 0.0f; | ||
| if (abstop >= top12 (INFINITY)) | ||
| return x + x; | ||
| if (x > 0.0f) | ||
| return __math_oflowf (0); | ||
| if (x <= -150.0f) | ||
| return __math_uflowf (0); | ||
| #if WANT_ERRNO_UFLOW | ||
| if (x < -149.0f) | ||
| return __math_may_uflowf (0); | ||
| #endif | ||
| } | ||
|
|
||
| /* x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. */ | ||
| kd = eval_as_double (xd + SHIFT); | ||
| ki = asuint64 (kd); | ||
| kd -= SHIFT; /* k/N for int k. */ | ||
| r = xd - kd; | ||
|
|
||
| /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ | ||
| t = T[ki % N]; | ||
| t += ki << (52 - EXP2F_TABLE_BITS); | ||
| s = asdouble (t); | ||
| z = C[0] * r + C[1]; | ||
| r2 = r * r; | ||
| y = C[2] * r + 1; | ||
| y = z * r2 + y; | ||
| y = y * s; | ||
| return eval_as_float (y); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (exp2f, __exp2f_finite) | ||
| hidden_alias (exp2f, __ieee754_exp2f) | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,79 @@ | ||
| /* | ||
| * Shared data between expf, exp2f and powf. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include "math_config.h" | ||
|
|
||
| #define N (1 << EXP2F_TABLE_BITS) | ||
|
|
||
| const struct exp2f_data __exp2f_data = { | ||
| /* tab[i] = uint(2^(i/N)) - (i << 52-BITS) | ||
| used for computing 2^(k/N) for an int |k| < 150 N as | ||
| double(tab[k%N] + (k << 52-BITS)) */ | ||
| .tab = { | ||
| #if N == 8 | ||
| 0x3ff0000000000000, 0x3fef72b83c7d517b, 0x3fef06fe0a31b715, 0x3feebfdad5362a27, | ||
| 0x3feea09e667f3bcd, 0x3feeace5422aa0db, 0x3feee89f995ad3ad, 0x3fef5818dcfba487, | ||
| #elif N == 16 | ||
| 0x3ff0000000000000, 0x3fefb5586cf9890f, 0x3fef72b83c7d517b, 0x3fef387a6e756238, | ||
| 0x3fef06fe0a31b715, 0x3feedea64c123422, 0x3feebfdad5362a27, 0x3feeab07dd485429, | ||
| 0x3feea09e667f3bcd, 0x3feea11473eb0187, 0x3feeace5422aa0db, 0x3feec49182a3f090, | ||
| 0x3feee89f995ad3ad, 0x3fef199bdd85529c, 0x3fef5818dcfba487, 0x3fefa4afa2a490da, | ||
| #elif N == 32 | ||
| 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, | ||
| 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, | ||
| 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, | ||
| 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585, | ||
| 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13, | ||
| 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, | ||
| 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, | ||
| 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, | ||
| #elif N == 64 | ||
| 0x3ff0000000000000, 0x3fefec9a3e778061, 0x3fefd9b0d3158574, 0x3fefc74518759bc8, | ||
| 0x3fefb5586cf9890f, 0x3fefa3ec32d3d1a2, 0x3fef9301d0125b51, 0x3fef829aaea92de0, | ||
| 0x3fef72b83c7d517b, 0x3fef635beb6fcb75, 0x3fef54873168b9aa, 0x3fef463b88628cd6, | ||
| 0x3fef387a6e756238, 0x3fef2b4565e27cdd, 0x3fef1e9df51fdee1, 0x3fef1285a6e4030b, | ||
| 0x3fef06fe0a31b715, 0x3feefc08b26416ff, 0x3feef1a7373aa9cb, 0x3feee7db34e59ff7, | ||
| 0x3feedea64c123422, 0x3feed60a21f72e2a, 0x3feece086061892d, 0x3feec6a2b5c13cd0, | ||
| 0x3feebfdad5362a27, 0x3feeb9b2769d2ca7, 0x3feeb42b569d4f82, 0x3feeaf4736b527da, | ||
| 0x3feeab07dd485429, 0x3feea76f15ad2148, 0x3feea47eb03a5585, 0x3feea23882552225, | ||
| 0x3feea09e667f3bcd, 0x3fee9fb23c651a2f, 0x3fee9f75e8ec5f74, 0x3fee9feb564267c9, | ||
| 0x3feea11473eb0187, 0x3feea2f336cf4e62, 0x3feea589994cce13, 0x3feea8d99b4492ed, | ||
| 0x3feeace5422aa0db, 0x3feeb1ae99157736, 0x3feeb737b0cdc5e5, 0x3feebd829fde4e50, | ||
| 0x3feec49182a3f090, 0x3feecc667b5de565, 0x3feed503b23e255d, 0x3feede6b5579fdbf, | ||
| 0x3feee89f995ad3ad, 0x3feef3a2b84f15fb, 0x3feeff76f2fb5e47, 0x3fef0c1e904bc1d2, | ||
| 0x3fef199bdd85529c, 0x3fef27f12e57d14b, 0x3fef3720dcef9069, 0x3fef472d4a07897c, | ||
| 0x3fef5818dcfba487, 0x3fef69e603db3285, 0x3fef7c97337b9b5f, 0x3fef902ee78b3ff6, | ||
| 0x3fefa4afa2a490da, 0x3fefba1bee615a27, 0x3fefd0765b6e4540, 0x3fefe7c1819e90d8, | ||
| #endif | ||
| }, | ||
| .shift_scaled = 0x1.8p+52 / N, | ||
| .poly = { | ||
| #if N == 8 | ||
| 0x1.c6a00335106e2p-5, 0x1.ec0c313449f55p-3, 0x1.62e431111f69fp-1, | ||
| #elif N == 16 | ||
| 0x1.c6ac6aa313963p-5, 0x1.ebfff4532d9bap-3, 0x1.62e43001bc49fp-1, | ||
| #elif N == 32 | ||
| 0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1, | ||
| #elif N == 64 | ||
| 0x1.c6b04b4221b2ap-5, 0x1.ebfc213e184d7p-3, 0x1.62e42fefb5b7fp-1, | ||
| #endif | ||
| }, | ||
| .shift = 0x1.8p+52, | ||
| .invln2_scaled = 0x1.71547652b82fep+0 * N, | ||
| .poly_scaled = { | ||
| #if N == 8 | ||
| 0x1.c6a00335106e2p-5/N/N/N, 0x1.ec0c313449f55p-3/N/N, 0x1.62e431111f69fp-1/N, | ||
| #elif N == 16 | ||
| 0x1.c6ac6aa313963p-5/N/N/N, 0x1.ebfff4532d9bap-3/N/N, 0x1.62e43001bc49fp-1/N, | ||
| #elif N == 32 | ||
| 0x1.c6af84b912394p-5/N/N/N, 0x1.ebfce50fac4f3p-3/N/N, 0x1.62e42ff0c52d6p-1/N, | ||
| #elif N == 64 | ||
| 0x1.c6b04b4221b2ap-5/N/N/N, 0x1.ebfc213e184d7p-3/N/N, 0x1.62e42fefb5b7fp-1/N, | ||
| #endif | ||
| }, | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,92 @@ | ||
| /* | ||
| * Single-precision e^x function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| /* | ||
| EXP2F_TABLE_BITS = 5 | ||
| EXP2F_POLY_ORDER = 3 | ||
| ULP error: 0.502 (nearest rounding.) | ||
| Relative error: 1.69 * 2^-34 in [-ln2/64, ln2/64] (before rounding.) | ||
| Wrong count: 170635 (all nearest rounding wrong results with fma.) | ||
| Non-nearest ULP error: 1 (rounded ULP error) | ||
| */ | ||
|
|
||
| #define N (1 << EXP2F_TABLE_BITS) | ||
| #define InvLn2N __exp2f_data.invln2_scaled | ||
| #define T __exp2f_data.tab | ||
| #define C __exp2f_data.poly_scaled | ||
|
|
||
| static inline uint32_t | ||
| top12 (float x) | ||
| { | ||
| return asuint (x) >> 20; | ||
| } | ||
|
|
||
| float | ||
| expf (float x) | ||
| { | ||
| uint32_t abstop; | ||
| uint64_t ki, t; | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t kd, xd, z, r, r2, y, s; | ||
|
|
||
| xd = (double_t) x; | ||
| abstop = top12 (x) & 0x7ff; | ||
| if (unlikely (abstop >= top12 (88.0f))) | ||
| { | ||
| /* |x| >= 88 or x is nan. */ | ||
| if (asuint (x) == asuint (-INFINITY)) | ||
| return 0.0f; | ||
| if (abstop >= top12 (INFINITY)) | ||
| return x + x; | ||
| if (x > 0x1.62e42ep6f) /* x > log(0x1p128) ~= 88.72 */ | ||
| return __math_oflowf (0); | ||
| if (x < -0x1.9fe368p6f) /* x < log(0x1p-150) ~= -103.97 */ | ||
| return __math_uflowf (0); | ||
| #if WANT_ERRNO_UFLOW | ||
| if (x < -0x1.9d1d9ep6f) /* x < log(0x1p-149) ~= -103.28 */ | ||
| return __math_may_uflowf (0); | ||
| #endif | ||
| } | ||
|
|
||
| /* x*N/Ln2 = k + r with r in [-1/2, 1/2] and int k. */ | ||
| z = InvLn2N * xd; | ||
|
|
||
| /* Round and convert z to int, the result is in [-150*N, 128*N] and | ||
| ideally nearest int is used, otherwise the magnitude of r can be | ||
| bigger which gives larger approximation error. */ | ||
| #if TOINT_INTRINSICS | ||
| kd = roundtoint (z); | ||
| ki = converttoint (z); | ||
| #else | ||
| # define SHIFT __exp2f_data.shift | ||
| kd = eval_as_double (z + SHIFT); | ||
| ki = asuint64 (kd); | ||
| kd -= SHIFT; | ||
| #endif | ||
| r = z - kd; | ||
|
|
||
| /* exp(x) = 2^(k/N) * 2^(r/N) ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ | ||
| t = T[ki % N]; | ||
| t += ki << (52 - EXP2F_TABLE_BITS); | ||
| s = asdouble (t); | ||
| z = C[0] * r + C[1]; | ||
| r2 = r * r; | ||
| y = C[2] * r + 1; | ||
| y = z * r2 + y; | ||
| y = y * s; | ||
| return eval_as_float (y); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (expf, __expf_finite) | ||
| hidden_alias (expf, __ieee754_expf) | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,101 @@ | ||
| /* | ||
| * Public API. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #ifndef _MATHLIB_H | ||
| #define _MATHLIB_H | ||
|
|
||
| float expf (float); | ||
| float exp2f (float); | ||
| float logf (float); | ||
| float log2f (float); | ||
| float powf (float, float); | ||
| float sinf (float); | ||
| float cosf (float); | ||
| void sincosf (float, float*, float*); | ||
|
|
||
| double exp (double); | ||
| double exp2 (double); | ||
| double log (double); | ||
| double log2 (double); | ||
| double pow (double, double); | ||
|
|
||
| /* Scalar functions using the vector algorithm with identical result. */ | ||
| float __s_sinf (float); | ||
| float __s_cosf (float); | ||
| float __s_expf (float); | ||
| float __s_expf_1u (float); | ||
| float __s_exp2f (float); | ||
| float __s_exp2f_1u (float); | ||
| float __s_logf (float); | ||
| float __s_powf (float, float); | ||
| double __s_sin (double); | ||
| double __s_cos (double); | ||
| double __s_exp (double); | ||
| double __s_log (double); | ||
| double __s_pow (double, double); | ||
|
|
||
| #if __aarch64__ | ||
| #if __GNUC__ >= 5 | ||
| typedef __Float32x4_t __f32x4_t; | ||
| typedef __Float64x2_t __f64x2_t; | ||
| #elif __clang_major__*100+__clang_minor__ >= 305 | ||
| typedef __attribute__((__neon_vector_type__(4))) float __f32x4_t; | ||
| typedef __attribute__((__neon_vector_type__(2))) double __f64x2_t; | ||
| #else | ||
| #error Unsupported compiler | ||
| #endif | ||
|
|
||
| /* Vector functions following the base PCS. */ | ||
| __f32x4_t __v_sinf (__f32x4_t); | ||
| __f32x4_t __v_cosf (__f32x4_t); | ||
| __f32x4_t __v_expf (__f32x4_t); | ||
| __f32x4_t __v_expf_1u (__f32x4_t); | ||
| __f32x4_t __v_exp2f (__f32x4_t); | ||
| __f32x4_t __v_exp2f_1u (__f32x4_t); | ||
| __f32x4_t __v_logf (__f32x4_t); | ||
| __f32x4_t __v_powf (__f32x4_t, __f32x4_t); | ||
| __f64x2_t __v_sin (__f64x2_t); | ||
| __f64x2_t __v_cos (__f64x2_t); | ||
| __f64x2_t __v_exp (__f64x2_t); | ||
| __f64x2_t __v_log (__f64x2_t); | ||
| __f64x2_t __v_pow (__f64x2_t, __f64x2_t); | ||
|
|
||
| #if __GNUC__ >= 9 || __clang_major__ >= 8 | ||
| #define __vpcs __attribute__((__aarch64_vector_pcs__)) | ||
|
|
||
| /* Vector functions following the vector PCS. */ | ||
| __vpcs __f32x4_t __vn_sinf (__f32x4_t); | ||
| __vpcs __f32x4_t __vn_cosf (__f32x4_t); | ||
| __vpcs __f32x4_t __vn_expf (__f32x4_t); | ||
| __vpcs __f32x4_t __vn_expf_1u (__f32x4_t); | ||
| __vpcs __f32x4_t __vn_exp2f (__f32x4_t); | ||
| __vpcs __f32x4_t __vn_exp2f_1u (__f32x4_t); | ||
| __vpcs __f32x4_t __vn_logf (__f32x4_t); | ||
| __vpcs __f32x4_t __vn_powf (__f32x4_t, __f32x4_t); | ||
| __vpcs __f64x2_t __vn_sin (__f64x2_t); | ||
| __vpcs __f64x2_t __vn_cos (__f64x2_t); | ||
| __vpcs __f64x2_t __vn_exp (__f64x2_t); | ||
| __vpcs __f64x2_t __vn_log (__f64x2_t); | ||
| __vpcs __f64x2_t __vn_pow (__f64x2_t, __f64x2_t); | ||
|
|
||
| /* Vector functions following the vector PCS using ABI names. */ | ||
| __vpcs __f32x4_t _ZGVnN4v_sinf (__f32x4_t); | ||
| __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t); | ||
| __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t); | ||
| __vpcs __f32x4_t _ZGVnN4v_exp2f (__f32x4_t); | ||
| __vpcs __f32x4_t _ZGVnN4v_logf (__f32x4_t); | ||
| __vpcs __f32x4_t _ZGVnN4vv_powf (__f32x4_t, __f32x4_t); | ||
| __vpcs __f64x2_t _ZGVnN2v_sin (__f64x2_t); | ||
| __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t); | ||
| __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t); | ||
| __vpcs __f64x2_t _ZGVnN2v_log (__f64x2_t); | ||
| __vpcs __f64x2_t _ZGVnN2vv_pow (__f64x2_t, __f64x2_t); | ||
| #endif | ||
| #endif | ||
|
|
||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,163 @@ | ||
| /* | ||
| * Double-precision log(x) function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <float.h> | ||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| #define T __log_data.tab | ||
| #define T2 __log_data.tab2 | ||
| #define B __log_data.poly1 | ||
| #define A __log_data.poly | ||
| #define Ln2hi __log_data.ln2hi | ||
| #define Ln2lo __log_data.ln2lo | ||
| #define N (1 << LOG_TABLE_BITS) | ||
| #define OFF 0x3fe6000000000000 | ||
|
|
||
| /* Top 16 bits of a double. */ | ||
| static inline uint32_t | ||
| top16 (double x) | ||
| { | ||
| return asuint64 (x) >> 48; | ||
| } | ||
|
|
||
| double | ||
| log (double x) | ||
| { | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t w, z, r, r2, r3, y, invc, logc, kd, hi, lo; | ||
| uint64_t ix, iz, tmp; | ||
| uint32_t top; | ||
| int k, i; | ||
|
|
||
| ix = asuint64 (x); | ||
| top = top16 (x); | ||
|
|
||
| #if LOG_POLY1_ORDER == 10 || LOG_POLY1_ORDER == 11 | ||
| # define LO asuint64 (1.0 - 0x1p-5) | ||
| # define HI asuint64 (1.0 + 0x1.1p-5) | ||
| #elif LOG_POLY1_ORDER == 12 | ||
| # define LO asuint64 (1.0 - 0x1p-4) | ||
| # define HI asuint64 (1.0 + 0x1.09p-4) | ||
| #endif | ||
| if (unlikely (ix - LO < HI - LO)) | ||
| { | ||
| /* Handle close to 1.0 inputs separately. */ | ||
| /* Fix sign of zero with downward rounding when x==1. */ | ||
| if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0))) | ||
| return 0; | ||
| r = x - 1.0; | ||
| r2 = r * r; | ||
| r3 = r * r2; | ||
| #if LOG_POLY1_ORDER == 10 | ||
| /* Worst-case error is around 0.516 ULP. */ | ||
| y = r3 * (B[1] + r * B[2] + r2 * B[3] | ||
| + r3 * (B[4] + r * B[5] + r2 * B[6] + r3 * (B[7] + r * B[8]))); | ||
| w = B[0] * r2; /* B[0] == -0.5. */ | ||
| hi = r + w; | ||
| y += r - hi + w; | ||
| y += hi; | ||
| #elif LOG_POLY1_ORDER == 11 | ||
| /* Worst-case error is around 0.516 ULP. */ | ||
| y = r3 * (B[1] + r * B[2] | ||
| + r2 * (B[3] + r * B[4] + r2 * B[5] | ||
| + r3 * (B[6] + r * B[7] + r2 * B[8] + r3 * B[9]))); | ||
| w = B[0] * r2; /* B[0] == -0.5. */ | ||
| hi = r + w; | ||
| y += r - hi + w; | ||
| y += hi; | ||
| #elif LOG_POLY1_ORDER == 12 | ||
| y = r3 * (B[1] + r * B[2] + r2 * B[3] | ||
| + r3 * (B[4] + r * B[5] + r2 * B[6] | ||
| + r3 * (B[7] + r * B[8] + r2 * B[9] + r3 * B[10]))); | ||
| # if N <= 64 | ||
| /* Worst-case error is around 0.532 ULP. */ | ||
| w = B[0] * r2; /* B[0] == -0.5. */ | ||
| hi = r + w; | ||
| y += r - hi + w; | ||
| y += hi; | ||
| # else | ||
| /* Worst-case error is around 0.507 ULP. */ | ||
| w = r * 0x1p27; | ||
| double_t rhi = r + w - w; | ||
| double_t rlo = r - rhi; | ||
| w = rhi * rhi * B[0]; /* B[0] == -0.5. */ | ||
| hi = r + w; | ||
| lo = r - hi + w; | ||
| lo += B[0] * rlo * (rhi + r); | ||
| y += lo; | ||
| y += hi; | ||
| # endif | ||
| #endif | ||
| return eval_as_double (y); | ||
| } | ||
| if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010)) | ||
| { | ||
| /* x < 0x1p-1022 or inf or nan. */ | ||
| if (ix * 2 == 0) | ||
| return __math_divzero (1); | ||
| if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */ | ||
| return x; | ||
| if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) | ||
| return __math_invalid (x); | ||
| /* x is subnormal, normalize it. */ | ||
| ix = asuint64 (x * 0x1p52); | ||
| ix -= 52ULL << 52; | ||
| } | ||
|
|
||
| /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. | ||
| The range is split into N subintervals. | ||
| The ith subinterval contains z and c is near its center. */ | ||
| tmp = ix - OFF; | ||
| i = (tmp >> (52 - LOG_TABLE_BITS)) % N; | ||
| k = (int64_t) tmp >> 52; /* arithmetic shift */ | ||
| iz = ix - (tmp & 0xfffULL << 52); | ||
| invc = T[i].invc; | ||
| logc = T[i].logc; | ||
| z = asdouble (iz); | ||
|
|
||
| /* log(x) = log1p(z/c-1) + log(c) + k*Ln2. */ | ||
| /* r ~= z/c - 1, |r| < 1/(2*N). */ | ||
| #if HAVE_FAST_FMA | ||
| /* rounding error: 0x1p-55/N. */ | ||
| r = fma (z, invc, -1.0); | ||
| #else | ||
| /* rounding error: 0x1p-55/N + 0x1p-66. */ | ||
| r = (z - T2[i].chi - T2[i].clo) * invc; | ||
| #endif | ||
| kd = (double_t) k; | ||
|
|
||
| /* hi + lo = r + log(c) + k*Ln2. */ | ||
| w = kd * Ln2hi + logc; | ||
| hi = w + r; | ||
| lo = w - hi + r + kd * Ln2lo; | ||
|
|
||
| /* log(x) = lo + (log1p(r) - r) + hi. */ | ||
| r2 = r * r; /* rounding error: 0x1p-54/N^2. */ | ||
| /* Worst case error if |y| > 0x1p-5: | ||
| 0.5 + 4.13/N + abs-poly-error*2^57 ULP (+ 0.002 ULP without fma) | ||
| Worst case error if |y| > 0x1p-4: | ||
| 0.5 + 2.06/N + abs-poly-error*2^56 ULP (+ 0.001 ULP without fma). */ | ||
| #if LOG_POLY_ORDER == 6 | ||
| y = lo + r2 * A[0] + r * r2 * (A[1] + r * A[2] + r2 * (A[3] + r * A[4])) + hi; | ||
| #elif LOG_POLY_ORDER == 7 | ||
| y = lo | ||
| + r2 * (A[0] + r * A[1] + r2 * (A[2] + r * A[3]) | ||
| + r2 * r2 * (A[4] + r * A[5])) | ||
| + hi; | ||
| #endif | ||
| return eval_as_double (y); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (log, __log_finite) | ||
| hidden_alias (log, __ieee754_log) | ||
| # if LDBL_MANT_DIG == 53 | ||
| long double logl (long double x) { return log (x); } | ||
| # endif | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,142 @@ | ||
| /* | ||
| * Double-precision log2(x) function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <float.h> | ||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| #define T __log2_data.tab | ||
| #define T2 __log2_data.tab2 | ||
| #define B __log2_data.poly1 | ||
| #define A __log2_data.poly | ||
| #define InvLn2hi __log2_data.invln2hi | ||
| #define InvLn2lo __log2_data.invln2lo | ||
| #define N (1 << LOG2_TABLE_BITS) | ||
| #define OFF 0x3fe6000000000000 | ||
|
|
||
| /* Top 16 bits of a double. */ | ||
| static inline uint32_t | ||
| top16 (double x) | ||
| { | ||
| return asuint64 (x) >> 48; | ||
| } | ||
|
|
||
| double | ||
| log2 (double x) | ||
| { | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t z, r, r2, r4, y, invc, logc, kd, hi, lo, t1, t2, t3, p; | ||
| uint64_t ix, iz, tmp; | ||
| uint32_t top; | ||
| int k, i; | ||
|
|
||
| ix = asuint64 (x); | ||
| top = top16 (x); | ||
|
|
||
| #if LOG2_POLY1_ORDER == 11 | ||
| # define LO asuint64 (1.0 - 0x1.5b51p-5) | ||
| # define HI asuint64 (1.0 + 0x1.6ab2p-5) | ||
| #endif | ||
| if (unlikely (ix - LO < HI - LO)) | ||
| { | ||
| /* Handle close to 1.0 inputs separately. */ | ||
| /* Fix sign of zero with downward rounding when x==1. */ | ||
| if (WANT_ROUNDING && unlikely (ix == asuint64 (1.0))) | ||
| return 0; | ||
| r = x - 1.0; | ||
| #if HAVE_FAST_FMA | ||
| hi = r * InvLn2hi; | ||
| lo = r * InvLn2lo + fma (r, InvLn2hi, -hi); | ||
| #else | ||
| double_t rhi, rlo; | ||
| rhi = asdouble (asuint64 (r) & -1ULL << 32); | ||
| rlo = r - rhi; | ||
| hi = rhi * InvLn2hi; | ||
| lo = rlo * InvLn2hi + r * InvLn2lo; | ||
| #endif | ||
| r2 = r * r; /* rounding error: 0x1p-62. */ | ||
| r4 = r2 * r2; | ||
| #if LOG2_POLY1_ORDER == 11 | ||
| /* Worst-case error is less than 0.54 ULP (0.55 ULP without fma). */ | ||
| p = r2 * (B[0] + r * B[1]); | ||
| y = hi + p; | ||
| lo += hi - y + p; | ||
| lo += r4 * (B[2] + r * B[3] + r2 * (B[4] + r * B[5]) | ||
| + r4 * (B[6] + r * B[7] + r2 * (B[8] + r * B[9]))); | ||
| y += lo; | ||
| #endif | ||
| return eval_as_double (y); | ||
| } | ||
| if (unlikely (top - 0x0010 >= 0x7ff0 - 0x0010)) | ||
| { | ||
| /* x < 0x1p-1022 or inf or nan. */ | ||
| if (ix * 2 == 0) | ||
| return __math_divzero (1); | ||
| if (ix == asuint64 (INFINITY)) /* log(inf) == inf. */ | ||
| return x; | ||
| if ((top & 0x8000) || (top & 0x7ff0) == 0x7ff0) | ||
| return __math_invalid (x); | ||
| /* x is subnormal, normalize it. */ | ||
| ix = asuint64 (x * 0x1p52); | ||
| ix -= 52ULL << 52; | ||
| } | ||
|
|
||
| /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. | ||
| The range is split into N subintervals. | ||
| The ith subinterval contains z and c is near its center. */ | ||
| tmp = ix - OFF; | ||
| i = (tmp >> (52 - LOG2_TABLE_BITS)) % N; | ||
| k = (int64_t) tmp >> 52; /* arithmetic shift */ | ||
| iz = ix - (tmp & 0xfffULL << 52); | ||
| invc = T[i].invc; | ||
| logc = T[i].logc; | ||
| z = asdouble (iz); | ||
| kd = (double_t) k; | ||
|
|
||
| /* log2(x) = log2(z/c) + log2(c) + k. */ | ||
| /* r ~= z/c - 1, |r| < 1/(2*N). */ | ||
| #if HAVE_FAST_FMA | ||
| /* rounding error: 0x1p-55/N. */ | ||
| r = fma (z, invc, -1.0); | ||
| t1 = r * InvLn2hi; | ||
| t2 = r * InvLn2lo + fma (r, InvLn2hi, -t1); | ||
| #else | ||
| double_t rhi, rlo; | ||
| /* rounding error: 0x1p-55/N + 0x1p-65. */ | ||
| r = (z - T2[i].chi - T2[i].clo) * invc; | ||
| rhi = asdouble (asuint64 (r) & -1ULL << 32); | ||
| rlo = r - rhi; | ||
| t1 = rhi * InvLn2hi; | ||
| t2 = rlo * InvLn2hi + r * InvLn2lo; | ||
| #endif | ||
|
|
||
| /* hi + lo = r/ln2 + log2(c) + k. */ | ||
| t3 = kd + logc; | ||
| hi = t3 + t1; | ||
| lo = t3 - hi + t1 + t2; | ||
|
|
||
| /* log2(r+1) = r/ln2 + r^2*poly(r). */ | ||
| /* Evaluation is optimized assuming superscalar pipelined execution. */ | ||
| r2 = r * r; /* rounding error: 0x1p-54/N^2. */ | ||
| r4 = r2 * r2; | ||
| #if LOG2_POLY_ORDER == 7 | ||
| /* Worst-case error if |y| > 0x1p-4: 0.547 ULP (0.550 ULP without fma). | ||
| ~ 0.5 + 2/N/ln2 + abs-poly-error*0x1p56 ULP (+ 0.003 ULP without fma). */ | ||
| p = A[0] + r * A[1] + r2 * (A[2] + r * A[3]) + r4 * (A[4] + r * A[5]); | ||
| y = lo + r2 * p + hi; | ||
| #endif | ||
| return eval_as_double (y); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (log2, __log2_finite) | ||
| hidden_alias (log2, __ieee754_log2) | ||
| # if LDBL_MANT_DIG == 53 | ||
| long double log2l (long double x) { return log2 (x); } | ||
| # endif | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,210 @@ | ||
| /* | ||
| * Data for log2. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include "math_config.h" | ||
|
|
||
| #define N (1 << LOG2_TABLE_BITS) | ||
|
|
||
| const struct log2_data __log2_data = { | ||
| // First coefficient: 0x1.71547652b82fe1777d0ffda0d24p0 | ||
| .invln2hi = 0x1.7154765200000p+0, | ||
| .invln2lo = 0x1.705fc2eefa200p-33, | ||
| .poly1 = { | ||
| #if LOG2_POLY1_ORDER == 11 | ||
| // relative error: 0x1.2fad8188p-63 | ||
| // in -0x1.5b51p-5 0x1.6ab2p-5 | ||
| -0x1.71547652b82fep-1, | ||
| 0x1.ec709dc3a03f7p-2, | ||
| -0x1.71547652b7c3fp-2, | ||
| 0x1.2776c50f05be4p-2, | ||
| -0x1.ec709dd768fe5p-3, | ||
| 0x1.a61761ec4e736p-3, | ||
| -0x1.7153fbc64a79bp-3, | ||
| 0x1.484d154f01b4ap-3, | ||
| -0x1.289e4a72c383cp-3, | ||
| 0x1.0b32f285aee66p-3, | ||
| #endif | ||
| }, | ||
| .poly = { | ||
| #if N == 64 && LOG2_POLY_ORDER == 7 | ||
| // relative error: 0x1.a72c2bf8p-58 | ||
| // abs error: 0x1.67a552c8p-66 | ||
| // in -0x1.f45p-8 0x1.f45p-8 | ||
| -0x1.71547652b8339p-1, | ||
| 0x1.ec709dc3a04bep-2, | ||
| -0x1.7154764702ffbp-2, | ||
| 0x1.2776c50034c48p-2, | ||
| -0x1.ec7b328ea92bcp-3, | ||
| 0x1.a6225e117f92ep-3, | ||
| #endif | ||
| }, | ||
| /* Algorithm: | ||
| x = 2^k z | ||
| log2(x) = k + log2(c) + log2(z/c) | ||
| log2(z/c) = poly(z/c - 1) | ||
| where z is in [1.6p-1; 1.6p0] which is split into N subintervals and z falls | ||
| into the ith one, then table entries are computed as | ||
| tab[i].invc = 1/c | ||
| tab[i].logc = (double)log2(c) | ||
| tab2[i].chi = (double)c | ||
| tab2[i].clo = (double)(c - (double)c) | ||
| where c is near the center of the subinterval and is chosen by trying +-2^29 | ||
| floating point invc candidates around 1/center and selecting one for which | ||
| 1) the rounding error in 0x1.8p10 + logc is 0, | ||
| 2) the rounding error in z - chi - clo is < 0x1p-64 and | ||
| 3) the rounding error in (double)log2(c) is minimized (< 0x1p-68). | ||
| Note: 1) ensures that k + logc can be computed without rounding error, 2) | ||
| ensures that z/c - 1 can be computed as (z - chi - clo)*invc with close to a | ||
| single rounding error when there is no fast fma for z*invc - 1, 3) ensures | ||
| that logc + poly(z/c - 1) has small error, however near x == 1 when | ||
| |log2(x)| < 0x1p-4, this is not enough so that is special cased. */ | ||
| .tab = { | ||
| #if N == 64 | ||
| {0x1.724286bb1acf8p+0, -0x1.1095feecdb000p-1}, | ||
| {0x1.6e1f766d2cca1p+0, -0x1.08494bd76d000p-1}, | ||
| {0x1.6a13d0e30d48ap+0, -0x1.00143aee8f800p-1}, | ||
| {0x1.661ec32d06c85p+0, -0x1.efec5360b4000p-2}, | ||
| {0x1.623fa951198f8p+0, -0x1.dfdd91ab7e000p-2}, | ||
| {0x1.5e75ba4cf026cp+0, -0x1.cffae0cc79000p-2}, | ||
| {0x1.5ac055a214fb8p+0, -0x1.c043811fda000p-2}, | ||
| {0x1.571ed0f166e1ep+0, -0x1.b0b67323ae000p-2}, | ||
| {0x1.53909590bf835p+0, -0x1.a152f5a2db000p-2}, | ||
| {0x1.5014fed61adddp+0, -0x1.9217f5af86000p-2}, | ||
| {0x1.4cab88e487bd0p+0, -0x1.8304db0719000p-2}, | ||
| {0x1.49539b4334feep+0, -0x1.74189f9a9e000p-2}, | ||
| {0x1.460cbdfafd569p+0, -0x1.6552bb5199000p-2}, | ||
| {0x1.42d664ee4b953p+0, -0x1.56b23a29b1000p-2}, | ||
| {0x1.3fb01111dd8a6p+0, -0x1.483650f5fa000p-2}, | ||
| {0x1.3c995b70c5836p+0, -0x1.39de937f6a000p-2}, | ||
| {0x1.3991c4ab6fd4ap+0, -0x1.2baa1538d6000p-2}, | ||
| {0x1.3698e0ce099b5p+0, -0x1.1d98340ca4000p-2}, | ||
| {0x1.33ae48213e7b2p+0, -0x1.0fa853a40e000p-2}, | ||
| {0x1.30d191985bdb1p+0, -0x1.01d9c32e73000p-2}, | ||
| {0x1.2e025cab271d7p+0, -0x1.e857da2fa6000p-3}, | ||
| {0x1.2b404cf13cd82p+0, -0x1.cd3c8633d8000p-3}, | ||
| {0x1.288b02c7ccb50p+0, -0x1.b26034c14a000p-3}, | ||
| {0x1.25e2263944de5p+0, -0x1.97c1c2f4fe000p-3}, | ||
| {0x1.234563d8615b1p+0, -0x1.7d6023f800000p-3}, | ||
| {0x1.20b46e33eaf38p+0, -0x1.633a71a05e000p-3}, | ||
| {0x1.1e2eefdcda3ddp+0, -0x1.494f5e9570000p-3}, | ||
| {0x1.1bb4a580b3930p+0, -0x1.2f9e424e0a000p-3}, | ||
| {0x1.19453847f2200p+0, -0x1.162595afdc000p-3}, | ||
| {0x1.16e06c0d5d73cp+0, -0x1.f9c9a75bd8000p-4}, | ||
| {0x1.1485f47b7e4c2p+0, -0x1.c7b575bf9c000p-4}, | ||
| {0x1.12358ad0085d1p+0, -0x1.960c60ff48000p-4}, | ||
| {0x1.0fef00f532227p+0, -0x1.64ce247b60000p-4}, | ||
| {0x1.0db2077d03a8fp+0, -0x1.33f78b2014000p-4}, | ||
| {0x1.0b7e6d65980d9p+0, -0x1.0387d1a42c000p-4}, | ||
| {0x1.0953efe7b408dp+0, -0x1.a6f9208b50000p-5}, | ||
| {0x1.07325cac53b83p+0, -0x1.47a954f770000p-5}, | ||
| {0x1.05197e40d1b5cp+0, -0x1.d23a8c50c0000p-6}, | ||
| {0x1.03091c1208ea2p+0, -0x1.16a2629780000p-6}, | ||
| {0x1.0101025b37e21p+0, -0x1.720f8d8e80000p-8}, | ||
| {0x1.fc07ef9caa76bp-1, 0x1.6fe53b1500000p-7}, | ||
| {0x1.f4465d3f6f184p-1, 0x1.11ccce10f8000p-5}, | ||
| {0x1.ecc079f84107fp-1, 0x1.c4dfc8c8b8000p-5}, | ||
| {0x1.e573a99975ae8p-1, 0x1.3aa321e574000p-4}, | ||
| {0x1.de5d6f0bd3de6p-1, 0x1.918a0d08b8000p-4}, | ||
| {0x1.d77b681ff38b3p-1, 0x1.e72e9da044000p-4}, | ||
| {0x1.d0cb5724de943p-1, 0x1.1dcd2507f6000p-3}, | ||
| {0x1.ca4b2dc0e7563p-1, 0x1.476ab03dea000p-3}, | ||
| {0x1.c3f8ee8d6cb51p-1, 0x1.7074377e22000p-3}, | ||
| {0x1.bdd2b4f020c4cp-1, 0x1.98ede8ba94000p-3}, | ||
| {0x1.b7d6c006015cap-1, 0x1.c0db86ad2e000p-3}, | ||
| {0x1.b20366e2e338fp-1, 0x1.e840aafcee000p-3}, | ||
| {0x1.ac57026295039p-1, 0x1.0790ab4678000p-2}, | ||
| {0x1.a6d01bc2731ddp-1, 0x1.1ac056801c000p-2}, | ||
| {0x1.a16d3bc3ff18bp-1, 0x1.2db11d4fee000p-2}, | ||
| {0x1.9c2d14967feadp-1, 0x1.406464ec58000p-2}, | ||
| {0x1.970e4f47c9902p-1, 0x1.52dbe093af000p-2}, | ||
| {0x1.920fb3982bcf2p-1, 0x1.651902050d000p-2}, | ||
| {0x1.8d30187f759f1p-1, 0x1.771d2cdeaf000p-2}, | ||
| {0x1.886e5ebb9f66dp-1, 0x1.88e9c857d9000p-2}, | ||
| {0x1.83c97b658b994p-1, 0x1.9a80155e16000p-2}, | ||
| {0x1.7f405ffc61022p-1, 0x1.abe186ed3d000p-2}, | ||
| {0x1.7ad22181415cap-1, 0x1.bd0f2aea0e000p-2}, | ||
| {0x1.767dcf99eff8cp-1, 0x1.ce0a43dbf4000p-2}, | ||
| #endif | ||
| }, | ||
| #if !HAVE_FAST_FMA | ||
| .tab2 = { | ||
| # if N == 64 | ||
| {0x1.6200012b90a8ep-1, 0x1.904ab0644b605p-55}, | ||
| {0x1.66000045734a6p-1, 0x1.1ff9bea62f7a9p-57}, | ||
| {0x1.69fffc325f2c5p-1, 0x1.27ecfcb3c90bap-55}, | ||
| {0x1.6e00038b95a04p-1, 0x1.8ff8856739326p-55}, | ||
| {0x1.71fffe09994e3p-1, 0x1.afd40275f82b1p-55}, | ||
| {0x1.7600015590e1p-1, -0x1.2fd75b4238341p-56}, | ||
| {0x1.7a00012655bd5p-1, 0x1.808e67c242b76p-56}, | ||
| {0x1.7e0003259e9a6p-1, -0x1.208e426f622b7p-57}, | ||
| {0x1.81fffedb4b2d2p-1, -0x1.402461ea5c92fp-55}, | ||
| {0x1.860002dfafcc3p-1, 0x1.df7f4a2f29a1fp-57}, | ||
| {0x1.89ffff78c6b5p-1, -0x1.e0453094995fdp-55}, | ||
| {0x1.8e00039671566p-1, -0x1.a04f3bec77b45p-55}, | ||
| {0x1.91fffe2bf1745p-1, -0x1.7fa34400e203cp-56}, | ||
| {0x1.95fffcc5c9fd1p-1, -0x1.6ff8005a0695dp-56}, | ||
| {0x1.9a0003bba4767p-1, 0x1.0f8c4c4ec7e03p-56}, | ||
| {0x1.9dfffe7b92da5p-1, 0x1.e7fd9478c4602p-55}, | ||
| {0x1.a1fffd72efdafp-1, -0x1.a0c554dcdae7ep-57}, | ||
| {0x1.a5fffde04ff95p-1, 0x1.67da98ce9b26bp-55}, | ||
| {0x1.a9fffca5e8d2bp-1, -0x1.284c9b54c13dep-55}, | ||
| {0x1.adfffddad03eap-1, 0x1.812c8ea602e3cp-58}, | ||
| {0x1.b1ffff10d3d4dp-1, -0x1.efaddad27789cp-55}, | ||
| {0x1.b5fffce21165ap-1, 0x1.3cb1719c61237p-58}, | ||
| {0x1.b9fffd950e674p-1, 0x1.3f7d94194cep-56}, | ||
| {0x1.be000139ca8afp-1, 0x1.50ac4215d9bcp-56}, | ||
| {0x1.c20005b46df99p-1, 0x1.beea653e9c1c9p-57}, | ||
| {0x1.c600040b9f7aep-1, -0x1.c079f274a70d6p-56}, | ||
| {0x1.ca0006255fd8ap-1, -0x1.a0b4076e84c1fp-56}, | ||
| {0x1.cdfffd94c095dp-1, 0x1.8f933f99ab5d7p-55}, | ||
| {0x1.d1ffff975d6cfp-1, -0x1.82c08665fe1bep-58}, | ||
| {0x1.d5fffa2561c93p-1, -0x1.b04289bd295f3p-56}, | ||
| {0x1.d9fff9d228b0cp-1, 0x1.70251340fa236p-55}, | ||
| {0x1.de00065bc7e16p-1, -0x1.5011e16a4d80cp-56}, | ||
| {0x1.e200002f64791p-1, 0x1.9802f09ef62ep-55}, | ||
| {0x1.e600057d7a6d8p-1, -0x1.e0b75580cf7fap-56}, | ||
| {0x1.ea00027edc00cp-1, -0x1.c848309459811p-55}, | ||
| {0x1.ee0006cf5cb7cp-1, -0x1.f8027951576f4p-55}, | ||
| {0x1.f2000782b7dccp-1, -0x1.f81d97274538fp-55}, | ||
| {0x1.f6000260c450ap-1, -0x1.071002727ffdcp-59}, | ||
| {0x1.f9fffe88cd533p-1, -0x1.81bdce1fda8bp-58}, | ||
| {0x1.fdfffd50f8689p-1, 0x1.7f91acb918e6ep-55}, | ||
| {0x1.0200004292367p+0, 0x1.b7ff365324681p-54}, | ||
| {0x1.05fffe3e3d668p+0, 0x1.6fa08ddae957bp-55}, | ||
| {0x1.0a0000a85a757p+0, -0x1.7e2de80d3fb91p-58}, | ||
| {0x1.0e0001a5f3fccp+0, -0x1.1823305c5f014p-54}, | ||
| {0x1.11ffff8afbaf5p+0, -0x1.bfabb6680bac2p-55}, | ||
| {0x1.15fffe54d91adp+0, -0x1.d7f121737e7efp-54}, | ||
| {0x1.1a00011ac36e1p+0, 0x1.c000a0516f5ffp-54}, | ||
| {0x1.1e00019c84248p+0, -0x1.082fbe4da5dap-54}, | ||
| {0x1.220000ffe5e6ep+0, -0x1.8fdd04c9cfb43p-55}, | ||
| {0x1.26000269fd891p+0, 0x1.cfe2a7994d182p-55}, | ||
| {0x1.2a00029a6e6dap+0, -0x1.00273715e8bc5p-56}, | ||
| {0x1.2dfffe0293e39p+0, 0x1.b7c39dab2a6f9p-54}, | ||
| {0x1.31ffff7dcf082p+0, 0x1.df1336edc5254p-56}, | ||
| {0x1.35ffff05a8b6p+0, -0x1.e03564ccd31ebp-54}, | ||
| {0x1.3a0002e0eaeccp+0, 0x1.5f0e74bd3a477p-56}, | ||
| {0x1.3e000043bb236p+0, 0x1.c7dcb149d8833p-54}, | ||
| {0x1.4200002d187ffp+0, 0x1.e08afcf2d3d28p-56}, | ||
| {0x1.460000d387cb1p+0, 0x1.20837856599a6p-55}, | ||
| {0x1.4a00004569f89p+0, -0x1.9fa5c904fbcd2p-55}, | ||
| {0x1.4e000043543f3p+0, -0x1.81125ed175329p-56}, | ||
| {0x1.51fffcc027f0fp+0, 0x1.883d8847754dcp-54}, | ||
| {0x1.55ffffd87b36fp+0, -0x1.709e731d02807p-55}, | ||
| {0x1.59ffff21df7bap+0, 0x1.7f79f68727b02p-55}, | ||
| {0x1.5dfffebfc3481p+0, -0x1.180902e30e93ep-54}, | ||
| # endif | ||
| }, | ||
| #endif /* !HAVE_FAST_FMA */ | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| /* | ||
| * Single-precision log2 function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| /* | ||
| LOG2F_TABLE_BITS = 4 | ||
| LOG2F_POLY_ORDER = 4 | ||
| ULP error: 0.752 (nearest rounding.) | ||
| Relative error: 1.9 * 2^-26 (before rounding.) | ||
| */ | ||
|
|
||
| #define N (1 << LOG2F_TABLE_BITS) | ||
| #define T __log2f_data.tab | ||
| #define A __log2f_data.poly | ||
| #define OFF 0x3f330000 | ||
|
|
||
| float | ||
| log2f (float x) | ||
| { | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t z, r, r2, p, y, y0, invc, logc; | ||
| uint32_t ix, iz, top, tmp; | ||
| int k, i; | ||
|
|
||
| ix = asuint (x); | ||
| #if WANT_ROUNDING | ||
| /* Fix sign of zero with downward rounding when x==1. */ | ||
| if (unlikely (ix == 0x3f800000)) | ||
| return 0; | ||
| #endif | ||
| if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000)) | ||
| { | ||
| /* x < 0x1p-126 or inf or nan. */ | ||
| if (ix * 2 == 0) | ||
| return __math_divzerof (1); | ||
| if (ix == 0x7f800000) /* log2(inf) == inf. */ | ||
| return x; | ||
| if ((ix & 0x80000000) || ix * 2 >= 0xff000000) | ||
| return __math_invalidf (x); | ||
| /* x is subnormal, normalize it. */ | ||
| ix = asuint (x * 0x1p23f); | ||
| ix -= 23 << 23; | ||
| } | ||
|
|
||
| /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. | ||
| The range is split into N subintervals. | ||
| The ith subinterval contains z and c is near its center. */ | ||
| tmp = ix - OFF; | ||
| i = (tmp >> (23 - LOG2F_TABLE_BITS)) % N; | ||
| top = tmp & 0xff800000; | ||
| iz = ix - top; | ||
| k = (int32_t) tmp >> 23; /* arithmetic shift */ | ||
| invc = T[i].invc; | ||
| logc = T[i].logc; | ||
| z = (double_t) asfloat (iz); | ||
|
|
||
| /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ | ||
| r = z * invc - 1; | ||
| y0 = logc + (double_t) k; | ||
|
|
||
| /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ | ||
| r2 = r * r; | ||
| y = A[1] * r + A[2]; | ||
| y = A[0] * r2 + y; | ||
| p = A[3] * r + y0; | ||
| y = y * r2 + p; | ||
| return eval_as_float (y); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (log2f, __log2f_finite) | ||
| hidden_alias (log2f, __ieee754_log2f) | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| /* | ||
| * Data definition for log2f. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include "math_config.h" | ||
|
|
||
| const struct log2f_data __log2f_data = { | ||
| .tab = { | ||
| { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 }, | ||
| { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 }, | ||
| { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 }, | ||
| { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 }, | ||
| { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 }, | ||
| { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 }, | ||
| { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 }, | ||
| { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 }, | ||
| { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 }, | ||
| { 0x1p+0, 0x0p+0 }, | ||
| { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 }, | ||
| { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 }, | ||
| { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 }, | ||
| { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 }, | ||
| { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 }, | ||
| { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 }, | ||
| }, | ||
| .poly = { | ||
| -0x1.712b6f70a7e4dp-2, 0x1.ecabf496832ep-2, -0x1.715479ffae3dep-1, | ||
| 0x1.715475f35c8b8p0, | ||
| } | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| /* | ||
| * Single-precision log function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| /* | ||
| LOGF_TABLE_BITS = 4 | ||
| LOGF_POLY_ORDER = 4 | ||
| ULP error: 0.818 (nearest rounding.) | ||
| Relative error: 1.957 * 2^-26 (before rounding.) | ||
| */ | ||
|
|
||
| #define T __logf_data.tab | ||
| #define A __logf_data.poly | ||
| #define Ln2 __logf_data.ln2 | ||
| #define N (1 << LOGF_TABLE_BITS) | ||
| #define OFF 0x3f330000 | ||
|
|
||
| float | ||
| logf (float x) | ||
| { | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t z, r, r2, y, y0, invc, logc; | ||
| uint32_t ix, iz, tmp; | ||
| int k, i; | ||
|
|
||
| ix = asuint (x); | ||
| #if WANT_ROUNDING | ||
| /* Fix sign of zero with downward rounding when x==1. */ | ||
| if (unlikely (ix == 0x3f800000)) | ||
| return 0; | ||
| #endif | ||
| if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000)) | ||
| { | ||
| /* x < 0x1p-126 or inf or nan. */ | ||
| if (ix * 2 == 0) | ||
| return __math_divzerof (1); | ||
| if (ix == 0x7f800000) /* log(inf) == inf. */ | ||
| return x; | ||
| if ((ix & 0x80000000) || ix * 2 >= 0xff000000) | ||
| return __math_invalidf (x); | ||
| /* x is subnormal, normalize it. */ | ||
| ix = asuint (x * 0x1p23f); | ||
| ix -= 23 << 23; | ||
| } | ||
|
|
||
| /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. | ||
| The range is split into N subintervals. | ||
| The ith subinterval contains z and c is near its center. */ | ||
| tmp = ix - OFF; | ||
| i = (tmp >> (23 - LOGF_TABLE_BITS)) % N; | ||
| k = (int32_t) tmp >> 23; /* arithmetic shift */ | ||
| iz = ix - (tmp & 0x1ff << 23); | ||
| invc = T[i].invc; | ||
| logc = T[i].logc; | ||
| z = (double_t) asfloat (iz); | ||
|
|
||
| /* log(x) = log1p(z/c-1) + log(c) + k*Ln2 */ | ||
| r = z * invc - 1; | ||
| y0 = logc + (double_t) k * Ln2; | ||
|
|
||
| /* Pipelined polynomial evaluation to approximate log1p(r). */ | ||
| r2 = r * r; | ||
| y = A[1] * r + A[2]; | ||
| y = A[0] * r2 + y; | ||
| y = y * r2 + (y0 + r); | ||
| return eval_as_float (y); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (logf, __logf_finite) | ||
| hidden_alias (logf, __ieee754_logf) | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,34 @@ | ||
| /* | ||
| * Data definition for logf. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include "math_config.h" | ||
|
|
||
| const struct logf_data __logf_data = { | ||
| .tab = { | ||
| { 0x1.661ec79f8f3bep+0, -0x1.57bf7808caadep-2 }, | ||
| { 0x1.571ed4aaf883dp+0, -0x1.2bef0a7c06ddbp-2 }, | ||
| { 0x1.49539f0f010bp+0, -0x1.01eae7f513a67p-2 }, | ||
| { 0x1.3c995b0b80385p+0, -0x1.b31d8a68224e9p-3 }, | ||
| { 0x1.30d190c8864a5p+0, -0x1.6574f0ac07758p-3 }, | ||
| { 0x1.25e227b0b8eap+0, -0x1.1aa2bc79c81p-3 }, | ||
| { 0x1.1bb4a4a1a343fp+0, -0x1.a4e76ce8c0e5ep-4 }, | ||
| { 0x1.12358f08ae5bap+0, -0x1.1973c5a611cccp-4 }, | ||
| { 0x1.0953f419900a7p+0, -0x1.252f438e10c1ep-5 }, | ||
| { 0x1p+0, 0x0p+0 }, | ||
| { 0x1.e608cfd9a47acp-1, 0x1.aa5aa5df25984p-5 }, | ||
| { 0x1.ca4b31f026aap-1, 0x1.c5e53aa362eb4p-4 }, | ||
| { 0x1.b2036576afce6p-1, 0x1.526e57720db08p-3 }, | ||
| { 0x1.9c2d163a1aa2dp-1, 0x1.bc2860d22477p-3 }, | ||
| { 0x1.886e6037841edp-1, 0x1.1058bc8a07ee1p-2 }, | ||
| { 0x1.767dcf5534862p-1, 0x1.4043057b6ee09p-2 }, | ||
| }, | ||
| .ln2 = 0x1.62e42fefa39efp-1, | ||
| .poly = { | ||
| -0x1.00ea348b88334p-2, 0x1.5575b0be00b6ap-2, -0x1.ffffef20a4123p-2, | ||
| } | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,81 @@ | ||
| /* | ||
| * Double-precision math error handling. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include "math_config.h" | ||
|
|
||
| #if WANT_ERRNO | ||
| #include <errno.h> | ||
| /* NOINLINE reduces code size and avoids making math functions non-leaf | ||
| when the error handling is inlined. */ | ||
| NOINLINE static double | ||
| with_errno (double y, int e) | ||
| { | ||
| errno = e; | ||
| return y; | ||
| } | ||
| #else | ||
| #define with_errno(x, e) (x) | ||
| #endif | ||
|
|
||
| /* NOINLINE reduces code size. */ | ||
| NOINLINE static double | ||
| xflow (uint32_t sign, double y) | ||
| { | ||
| y = eval_as_double (opt_barrier_double (sign ? -y : y) * y); | ||
| return with_errno (y, ERANGE); | ||
| } | ||
|
|
||
| HIDDEN double | ||
| __math_uflow (uint32_t sign) | ||
| { | ||
| return xflow (sign, 0x1p-767); | ||
| } | ||
|
|
||
| #if WANT_ERRNO_UFLOW | ||
| /* Underflows to zero in some non-nearest rounding mode, setting errno | ||
| is valid even if the result is non-zero, but in the subnormal range. */ | ||
| HIDDEN double | ||
| __math_may_uflow (uint32_t sign) | ||
| { | ||
| return xflow (sign, 0x1.8p-538); | ||
| } | ||
| #endif | ||
|
|
||
| HIDDEN double | ||
| __math_oflow (uint32_t sign) | ||
| { | ||
| return xflow (sign, 0x1p769); | ||
| } | ||
|
|
||
| HIDDEN double | ||
| __math_divzero (uint32_t sign) | ||
| { | ||
| double y = opt_barrier_double (sign ? -1.0 : 1.0) / 0.0; | ||
| return with_errno (y, ERANGE); | ||
| } | ||
|
|
||
| HIDDEN double | ||
| __math_invalid (double x) | ||
| { | ||
| double y = (x - x) / (x - x); | ||
| return isnan (x) ? y : with_errno (y, EDOM); | ||
| } | ||
|
|
||
| /* Check result and set errno if necessary. */ | ||
|
|
||
| HIDDEN double | ||
| __math_check_uflow (double y) | ||
| { | ||
| return y == 0.0 ? with_errno (y, ERANGE) : y; | ||
| } | ||
|
|
||
| HIDDEN double | ||
| __math_check_oflow (double y) | ||
| { | ||
| return isinf (y) ? with_errno (y, ERANGE) : y; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| /* | ||
| * Single-precision math error handling. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include "math_config.h" | ||
|
|
||
| #if WANT_ERRNO | ||
| #include <errno.h> | ||
| /* NOINLINE reduces code size and avoids making math functions non-leaf | ||
| when the error handling is inlined. */ | ||
| NOINLINE static float | ||
| with_errnof (float y, int e) | ||
| { | ||
| errno = e; | ||
| return y; | ||
| } | ||
| #else | ||
| #define with_errnof(x, e) (x) | ||
| #endif | ||
|
|
||
| /* NOINLINE reduces code size. */ | ||
| NOINLINE static float | ||
| xflowf (uint32_t sign, float y) | ||
| { | ||
| y = eval_as_float (opt_barrier_float (sign ? -y : y) * y); | ||
| return with_errnof (y, ERANGE); | ||
| } | ||
|
|
||
| HIDDEN float | ||
| __math_uflowf (uint32_t sign) | ||
| { | ||
| return xflowf (sign, 0x1p-95f); | ||
| } | ||
|
|
||
| #if WANT_ERRNO_UFLOW | ||
| /* Underflows to zero in some non-nearest rounding mode, setting errno | ||
| is valid even if the result is non-zero, but in the subnormal range. */ | ||
| HIDDEN float | ||
| __math_may_uflowf (uint32_t sign) | ||
| { | ||
| return xflowf (sign, 0x1.4p-75f); | ||
| } | ||
| #endif | ||
|
|
||
| HIDDEN float | ||
| __math_oflowf (uint32_t sign) | ||
| { | ||
| return xflowf (sign, 0x1p97f); | ||
| } | ||
|
|
||
| HIDDEN float | ||
| __math_divzerof (uint32_t sign) | ||
| { | ||
| float y = opt_barrier_float (sign ? -1.0f : 1.0f) / 0.0f; | ||
| return with_errnof (y, ERANGE); | ||
| } | ||
|
|
||
| HIDDEN float | ||
| __math_invalidf (float x) | ||
| { | ||
| float y = (x - x) / (x - x); | ||
| return isnan (x) ? y : with_errnof (y, EDOM); | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,381 @@ | ||
| /* | ||
| * Double-precision x^y function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <float.h> | ||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| /* | ||
| Worst-case error: 0.54 ULP (~= ulperr_exp + 1024*Ln2*relerr_log*2^53) | ||
| relerr_log: 1.3 * 2^-68 (Relative error of log, 1.5 * 2^-68 without fma) | ||
| ulperr_exp: 0.509 ULP (ULP error of exp, 0.511 ULP without fma) | ||
| */ | ||
|
|
||
| #define T __pow_log_data.tab | ||
| #define A __pow_log_data.poly | ||
| #define Ln2hi __pow_log_data.ln2hi | ||
| #define Ln2lo __pow_log_data.ln2lo | ||
| #define N (1 << POW_LOG_TABLE_BITS) | ||
| #define OFF 0x3fe6955500000000 | ||
|
|
||
| /* Top 12 bits of a double (sign and exponent bits). */ | ||
| static inline uint32_t | ||
| top12 (double x) | ||
| { | ||
| return asuint64 (x) >> 52; | ||
| } | ||
|
|
||
| /* Compute y+TAIL = log(x) where the rounded result is y and TAIL has about | ||
| additional 15 bits precision. IX is the bit representation of x, but | ||
| normalized in the subnormal range using the sign bit for the exponent. */ | ||
| static inline double_t | ||
| log_inline (uint64_t ix, double_t *tail) | ||
| { | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t z, r, y, invc, logc, logctail, kd, hi, t1, t2, lo, lo1, lo2, p; | ||
| uint64_t iz, tmp; | ||
| int k, i; | ||
|
|
||
| /* x = 2^k z; where z is in range [OFF,2*OFF) and exact. | ||
| The range is split into N subintervals. | ||
| The ith subinterval contains z and c is near its center. */ | ||
| tmp = ix - OFF; | ||
| i = (tmp >> (52 - POW_LOG_TABLE_BITS)) % N; | ||
| k = (int64_t) tmp >> 52; /* arithmetic shift */ | ||
| iz = ix - (tmp & 0xfffULL << 52); | ||
| z = asdouble (iz); | ||
| kd = (double_t) k; | ||
|
|
||
| /* log(x) = k*Ln2 + log(c) + log1p(z/c-1). */ | ||
| invc = T[i].invc; | ||
| logc = T[i].logc; | ||
| logctail = T[i].logctail; | ||
|
|
||
| /* Note: 1/c is j/N or j/N/2 where j is an integer in [N,2N) and | ||
| |z/c - 1| < 1/N, so r = z/c - 1 is exactly representible. */ | ||
| #if HAVE_FAST_FMA | ||
| r = fma (z, invc, -1.0); | ||
| #else | ||
| /* Split z such that rhi, rlo and rhi*rhi are exact and |rlo| <= |r|. */ | ||
| double_t zhi = asdouble ((iz + (1ULL << 31)) & (-1ULL << 32)); | ||
| double_t zlo = z - zhi; | ||
| double_t rhi = zhi * invc - 1.0; | ||
| double_t rlo = zlo * invc; | ||
| r = rhi + rlo; | ||
| #endif | ||
|
|
||
| /* k*Ln2 + log(c) + r. */ | ||
| t1 = kd * Ln2hi + logc; | ||
| t2 = t1 + r; | ||
| lo1 = kd * Ln2lo + logctail; | ||
| lo2 = t1 - t2 + r; | ||
|
|
||
| /* Evaluation is optimized assuming superscalar pipelined execution. */ | ||
| double_t ar, ar2, ar3, lo3, lo4; | ||
| ar = A[0] * r; /* A[0] = -0.5. */ | ||
| ar2 = r * ar; | ||
| ar3 = r * ar2; | ||
| /* k*Ln2 + log(c) + r + A[0]*r*r. */ | ||
| #if HAVE_FAST_FMA | ||
| hi = t2 + ar2; | ||
| lo3 = fma (ar, r, -ar2); | ||
| lo4 = t2 - hi + ar2; | ||
| #else | ||
| double_t arhi = A[0] * rhi; | ||
| double_t arhi2 = rhi * arhi; | ||
| hi = t2 + arhi2; | ||
| lo3 = rlo * (ar + arhi); | ||
| lo4 = t2 - hi + arhi2; | ||
| #endif | ||
| /* p = log1p(r) - r - A[0]*r*r. */ | ||
| #if POW_LOG_POLY_ORDER == 8 | ||
| p = (ar3 | ||
| * (A[1] + r * A[2] + ar2 * (A[3] + r * A[4] + ar2 * (A[5] + r * A[6])))); | ||
| #endif | ||
| lo = lo1 + lo2 + lo3 + lo4 + p; | ||
| y = hi + lo; | ||
| *tail = hi - y + lo; | ||
| return y; | ||
| } | ||
|
|
||
| #undef N | ||
| #undef T | ||
| #define N (1 << EXP_TABLE_BITS) | ||
| #define InvLn2N __exp_data.invln2N | ||
| #define NegLn2hiN __exp_data.negln2hiN | ||
| #define NegLn2loN __exp_data.negln2loN | ||
| #define Shift __exp_data.shift | ||
| #define T __exp_data.tab | ||
| #define C2 __exp_data.poly[5 - EXP_POLY_ORDER] | ||
| #define C3 __exp_data.poly[6 - EXP_POLY_ORDER] | ||
| #define C4 __exp_data.poly[7 - EXP_POLY_ORDER] | ||
| #define C5 __exp_data.poly[8 - EXP_POLY_ORDER] | ||
| #define C6 __exp_data.poly[9 - EXP_POLY_ORDER] | ||
|
|
||
| /* Handle cases that may overflow or underflow when computing the result that | ||
| is scale*(1+TMP) without intermediate rounding. The bit representation of | ||
| scale is in SBITS, however it has a computed exponent that may have | ||
| overflown into the sign bit so that needs to be adjusted before using it as | ||
| a double. (int32_t)KI is the k used in the argument reduction and exponent | ||
| adjustment of scale, positive k here means the result may overflow and | ||
| negative k means the result may underflow. */ | ||
| static inline double | ||
| specialcase (double_t tmp, uint64_t sbits, uint64_t ki) | ||
| { | ||
| double_t scale, y; | ||
|
|
||
| if ((ki & 0x80000000) == 0) | ||
| { | ||
| /* k > 0, the exponent of scale might have overflowed by <= 460. */ | ||
| sbits -= 1009ull << 52; | ||
| scale = asdouble (sbits); | ||
| y = 0x1p1009 * (scale + scale * tmp); | ||
| return check_oflow (eval_as_double (y)); | ||
| } | ||
| /* k < 0, need special care in the subnormal range. */ | ||
| sbits += 1022ull << 52; | ||
| /* Note: sbits is signed scale. */ | ||
| scale = asdouble (sbits); | ||
| y = scale + scale * tmp; | ||
| if (fabs (y) < 1.0) | ||
| { | ||
| /* Round y to the right precision before scaling it into the subnormal | ||
| range to avoid double rounding that can cause 0.5+E/2 ulp error where | ||
| E is the worst-case ulp error outside the subnormal range. So this | ||
| is only useful if the goal is better than 1 ulp worst-case error. */ | ||
| double_t hi, lo, one = 1.0; | ||
| if (y < 0.0) | ||
| one = -1.0; | ||
| lo = scale - y + scale * tmp; | ||
| hi = one + y; | ||
| lo = one - hi + y + lo; | ||
| y = eval_as_double (hi + lo) - one; | ||
| /* Fix the sign of 0. */ | ||
| if (y == 0.0) | ||
| y = asdouble (sbits & 0x8000000000000000); | ||
| /* The underflow exception needs to be signaled explicitly. */ | ||
| force_eval_double (opt_barrier_double (0x1p-1022) * 0x1p-1022); | ||
| } | ||
| y = 0x1p-1022 * y; | ||
| return check_uflow (eval_as_double (y)); | ||
| } | ||
|
|
||
| #define SIGN_BIAS (0x800 << EXP_TABLE_BITS) | ||
|
|
||
| /* Computes sign*exp(x+xtail) where |xtail| < 2^-8/N and |xtail| <= |x|. | ||
| The sign_bias argument is SIGN_BIAS or 0 and sets the sign to -1 or 1. */ | ||
| static inline double | ||
| exp_inline (double_t x, double_t xtail, uint32_t sign_bias) | ||
| { | ||
| uint32_t abstop; | ||
| uint64_t ki, idx, top, sbits; | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t kd, z, r, r2, scale, tail, tmp; | ||
|
|
||
| abstop = top12 (x) & 0x7ff; | ||
| if (unlikely (abstop - top12 (0x1p-54) >= top12 (512.0) - top12 (0x1p-54))) | ||
| { | ||
| if (abstop - top12 (0x1p-54) >= 0x80000000) | ||
| { | ||
| /* Avoid spurious underflow for tiny x. */ | ||
| /* Note: 0 is common input. */ | ||
| double_t one = WANT_ROUNDING ? 1.0 + x : 1.0; | ||
| return sign_bias ? -one : one; | ||
| } | ||
| if (abstop >= top12 (1024.0)) | ||
| { | ||
| /* Note: inf and nan are already handled. */ | ||
| if (asuint64 (x) >> 63) | ||
| return __math_uflow (sign_bias); | ||
| else | ||
| return __math_oflow (sign_bias); | ||
| } | ||
| /* Large x is special cased below. */ | ||
| abstop = 0; | ||
| } | ||
|
|
||
| /* exp(x) = 2^(k/N) * exp(r), with exp(r) in [2^(-1/2N),2^(1/2N)]. */ | ||
| /* x = ln2/N*k + r, with int k and r in [-ln2/2N, ln2/2N]. */ | ||
| z = InvLn2N * x; | ||
| #if TOINT_INTRINSICS | ||
| kd = roundtoint (z); | ||
| ki = converttoint (z); | ||
| #elif EXP_USE_TOINT_NARROW | ||
| /* z - kd is in [-0.5-2^-16, 0.5] in all rounding modes. */ | ||
| kd = eval_as_double (z + Shift); | ||
| ki = asuint64 (kd) >> 16; | ||
| kd = (double_t) (int32_t) ki; | ||
| #else | ||
| /* z - kd is in [-1, 1] in non-nearest rounding modes. */ | ||
| kd = eval_as_double (z + Shift); | ||
| ki = asuint64 (kd); | ||
| kd -= Shift; | ||
| #endif | ||
| r = x + kd * NegLn2hiN + kd * NegLn2loN; | ||
| /* The code assumes 2^-200 < |xtail| < 2^-8/N. */ | ||
| r += xtail; | ||
| /* 2^(k/N) ~= scale * (1 + tail). */ | ||
| idx = 2 * (ki % N); | ||
| top = (ki + sign_bias) << (52 - EXP_TABLE_BITS); | ||
| tail = asdouble (T[idx]); | ||
| /* This is only a valid scale when -1023*N < k < 1024*N. */ | ||
| sbits = T[idx + 1] + top; | ||
| /* exp(x) = 2^(k/N) * exp(r) ~= scale + scale * (tail + exp(r) - 1). */ | ||
| /* Evaluation is optimized assuming superscalar pipelined execution. */ | ||
| r2 = r * r; | ||
| /* Without fma the worst case error is 0.25/N ulp larger. */ | ||
| /* Worst case error is less than 0.5+1.11/N+(abs poly error * 2^53) ulp. */ | ||
| #if EXP_POLY_ORDER == 4 | ||
| tmp = tail + r + r2 * C2 + r * r2 * (C3 + r * C4); | ||
| #elif EXP_POLY_ORDER == 5 | ||
| tmp = tail + r + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); | ||
| #elif EXP_POLY_ORDER == 6 | ||
| tmp = tail + r + r2 * (0.5 + r * C3) + r2 * r2 * (C4 + r * C5 + r2 * C6); | ||
| #endif | ||
| if (unlikely (abstop == 0)) | ||
| return specialcase (tmp, sbits, ki); | ||
| scale = asdouble (sbits); | ||
| /* Note: tmp == 0 or |tmp| > 2^-200 and scale > 2^-739, so there | ||
| is no spurious underflow here even without fma. */ | ||
| return eval_as_double (scale + scale * tmp); | ||
| } | ||
|
|
||
| /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is | ||
| the bit representation of a non-zero finite floating-point value. */ | ||
| static inline int | ||
| checkint (uint64_t iy) | ||
| { | ||
| int e = iy >> 52 & 0x7ff; | ||
| if (e < 0x3ff) | ||
| return 0; | ||
| if (e > 0x3ff + 52) | ||
| return 2; | ||
| if (iy & ((1ULL << (0x3ff + 52 - e)) - 1)) | ||
| return 0; | ||
| if (iy & (1ULL << (0x3ff + 52 - e))) | ||
| return 1; | ||
| return 2; | ||
| } | ||
|
|
||
| /* Returns 1 if input is the bit representation of 0, infinity or nan. */ | ||
| static inline int | ||
| zeroinfnan (uint64_t i) | ||
| { | ||
| return 2 * i - 1 >= 2 * asuint64 (INFINITY) - 1; | ||
| } | ||
|
|
||
| double | ||
| pow (double x, double y) | ||
| { | ||
| uint32_t sign_bias = 0; | ||
| uint64_t ix, iy; | ||
| uint32_t topx, topy; | ||
|
|
||
| ix = asuint64 (x); | ||
| iy = asuint64 (y); | ||
| topx = top12 (x); | ||
| topy = top12 (y); | ||
| if (unlikely (topx - 0x001 >= 0x7ff - 0x001 | ||
| || (topy & 0x7ff) - 0x3be >= 0x43e - 0x3be)) | ||
| { | ||
| /* Note: if |y| > 1075 * ln2 * 2^53 ~= 0x1.749p62 then pow(x,y) = inf/0 | ||
| and if |y| < 2^-54 / 1075 ~= 0x1.e7b6p-65 then pow(x,y) = +-1. */ | ||
| /* Special cases: (x < 0x1p-126 or inf or nan) or | ||
| (|y| < 0x1p-65 or |y| >= 0x1p63 or nan). */ | ||
| if (unlikely (zeroinfnan (iy))) | ||
| { | ||
| if (2 * iy == 0) | ||
| return issignaling_inline (x) ? x + y : 1.0; | ||
| if (ix == asuint64 (1.0)) | ||
| return issignaling_inline (y) ? x + y : 1.0; | ||
| if (2 * ix > 2 * asuint64 (INFINITY) | ||
| || 2 * iy > 2 * asuint64 (INFINITY)) | ||
| return x + y; | ||
| if (2 * ix == 2 * asuint64 (1.0)) | ||
| return 1.0; | ||
| if ((2 * ix < 2 * asuint64 (1.0)) == !(iy >> 63)) | ||
| return 0.0; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ | ||
| return y * y; | ||
| } | ||
| if (unlikely (zeroinfnan (ix))) | ||
| { | ||
| double_t x2 = x * x; | ||
| if (ix >> 63 && checkint (iy) == 1) | ||
| { | ||
| x2 = -x2; | ||
| sign_bias = 1; | ||
| } | ||
| if (WANT_ERRNO && 2 * ix == 0 && iy >> 63) | ||
| return __math_divzero (sign_bias); | ||
| /* Without the barrier some versions of clang hoist the 1/x2 and | ||
| thus division by zero exception can be signaled spuriously. */ | ||
| return iy >> 63 ? opt_barrier_double (1 / x2) : x2; | ||
| } | ||
| /* Here x and y are non-zero finite. */ | ||
| if (ix >> 63) | ||
| { | ||
| /* Finite x < 0. */ | ||
| int yint = checkint (iy); | ||
| if (yint == 0) | ||
| return __math_invalid (x); | ||
| if (yint == 1) | ||
| sign_bias = SIGN_BIAS; | ||
| ix &= 0x7fffffffffffffff; | ||
| topx &= 0x7ff; | ||
| } | ||
| if ((topy & 0x7ff) - 0x3be >= 0x43e - 0x3be) | ||
| { | ||
| /* Note: sign_bias == 0 here because y is not odd. */ | ||
| if (ix == asuint64 (1.0)) | ||
| return 1.0; | ||
| if ((topy & 0x7ff) < 0x3be) | ||
| { | ||
| /* |y| < 2^-65, x^y ~= 1 + y*log(x). */ | ||
| if (WANT_ROUNDING) | ||
| return ix > asuint64 (1.0) ? 1.0 + y : 1.0 - y; | ||
| else | ||
| return 1.0; | ||
| } | ||
| return (ix > asuint64 (1.0)) == (topy < 0x800) ? __math_oflow (0) | ||
| : __math_uflow (0); | ||
| } | ||
| if (topx == 0) | ||
| { | ||
| /* Normalize subnormal x so exponent becomes negative. */ | ||
| /* Without the barrier some versions of clang evalutate the mul | ||
| unconditionally causing spurious overflow exceptions. */ | ||
| ix = asuint64 (opt_barrier_double (x) * 0x1p52); | ||
| ix &= 0x7fffffffffffffff; | ||
| ix -= 52ULL << 52; | ||
| } | ||
| } | ||
|
|
||
| double_t lo; | ||
| double_t hi = log_inline (ix, &lo); | ||
| double_t ehi, elo; | ||
| #if HAVE_FAST_FMA | ||
| ehi = y * hi; | ||
| elo = y * lo + fma (y, hi, -ehi); | ||
| #else | ||
| double_t yhi = asdouble (iy & -1ULL << 27); | ||
| double_t ylo = y - yhi; | ||
| double_t lhi = asdouble (asuint64 (hi) & -1ULL << 27); | ||
| double_t llo = hi - lhi + lo; | ||
| ehi = yhi * lhi; | ||
| elo = ylo * lhi + y * llo; /* |elo| < |ehi| * 2^-25. */ | ||
| #endif | ||
| return exp_inline (ehi, elo, sign_bias); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (pow, __pow_finite) | ||
| hidden_alias (pow, __ieee754_pow) | ||
| # if LDBL_MANT_DIG == 53 | ||
| long double powl (long double x, long double y) { return pow (x, y); } | ||
| # endif | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,185 @@ | ||
| /* | ||
| * Data for the log part of pow. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include "math_config.h" | ||
|
|
||
| #define N (1 << POW_LOG_TABLE_BITS) | ||
|
|
||
| const struct pow_log_data __pow_log_data = { | ||
| .ln2hi = 0x1.62e42fefa3800p-1, | ||
| .ln2lo = 0x1.ef35793c76730p-45, | ||
| .poly = { | ||
| #if N == 128 && POW_LOG_POLY_ORDER == 8 | ||
| // relative error: 0x1.11922ap-70 | ||
| // in -0x1.6bp-8 0x1.6bp-8 | ||
| // Coefficients are scaled to match the scaling during evaluation. | ||
| -0x1p-1, | ||
| 0x1.555555555556p-2 * -2, | ||
| -0x1.0000000000006p-2 * -2, | ||
| 0x1.999999959554ep-3 * 4, | ||
| -0x1.555555529a47ap-3 * 4, | ||
| 0x1.2495b9b4845e9p-3 * -8, | ||
| -0x1.0002b8b263fc3p-3 * -8, | ||
| #endif | ||
| }, | ||
| /* Algorithm: | ||
| x = 2^k z | ||
| log(x) = k ln2 + log(c) + log(z/c) | ||
| log(z/c) = poly(z/c - 1) | ||
| where z is in [0x1.69555p-1; 0x1.69555p0] which is split into N subintervals | ||
| and z falls into the ith one, then table entries are computed as | ||
| tab[i].invc = 1/c | ||
| tab[i].logc = round(0x1p43*log(c))/0x1p43 | ||
| tab[i].logctail = (double)(log(c) - logc) | ||
| where c is chosen near the center of the subinterval such that 1/c has only a | ||
| few precision bits so z/c - 1 is exactly representible as double: | ||
| 1/c = center < 1 ? round(N/center)/N : round(2*N/center)/N/2 | ||
| Note: |z/c - 1| < 1/N for the chosen c, |log(c) - logc - logctail| < 0x1p-97, | ||
| the last few bits of logc are rounded away so k*ln2hi + logc has no rounding | ||
| error and the interval for z is selected such that near x == 1, where log(x) | ||
| is tiny, large cancellation error is avoided in logc + poly(z/c - 1). */ | ||
| .tab = { | ||
| #if N == 128 | ||
| #define A(a, b, c) {a, 0, b, c}, | ||
| A(0x1.6a00000000000p+0, -0x1.62c82f2b9c800p-2, 0x1.ab42428375680p-48) | ||
| A(0x1.6800000000000p+0, -0x1.5d1bdbf580800p-2, -0x1.ca508d8e0f720p-46) | ||
| A(0x1.6600000000000p+0, -0x1.5767717455800p-2, -0x1.362a4d5b6506dp-45) | ||
| A(0x1.6400000000000p+0, -0x1.51aad872df800p-2, -0x1.684e49eb067d5p-49) | ||
| A(0x1.6200000000000p+0, -0x1.4be5f95777800p-2, -0x1.41b6993293ee0p-47) | ||
| A(0x1.6000000000000p+0, -0x1.4618bc21c6000p-2, 0x1.3d82f484c84ccp-46) | ||
| A(0x1.5e00000000000p+0, -0x1.404308686a800p-2, 0x1.c42f3ed820b3ap-50) | ||
| A(0x1.5c00000000000p+0, -0x1.3a64c55694800p-2, 0x1.0b1c686519460p-45) | ||
| A(0x1.5a00000000000p+0, -0x1.347dd9a988000p-2, 0x1.5594dd4c58092p-45) | ||
| A(0x1.5800000000000p+0, -0x1.2e8e2bae12000p-2, 0x1.67b1e99b72bd8p-45) | ||
| A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46) | ||
| A(0x1.5600000000000p+0, -0x1.2895a13de8800p-2, 0x1.5ca14b6cfb03fp-46) | ||
| A(0x1.5400000000000p+0, -0x1.22941fbcf7800p-2, -0x1.65a242853da76p-46) | ||
| A(0x1.5200000000000p+0, -0x1.1c898c1699800p-2, -0x1.fafbc68e75404p-46) | ||
| A(0x1.5000000000000p+0, -0x1.1675cababa800p-2, 0x1.f1fc63382a8f0p-46) | ||
| A(0x1.4e00000000000p+0, -0x1.1058bf9ae4800p-2, -0x1.6a8c4fd055a66p-45) | ||
| A(0x1.4c00000000000p+0, -0x1.0a324e2739000p-2, -0x1.c6bee7ef4030ep-47) | ||
| A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48) | ||
| A(0x1.4a00000000000p+0, -0x1.0402594b4d000p-2, -0x1.036b89ef42d7fp-48) | ||
| A(0x1.4800000000000p+0, -0x1.fb9186d5e4000p-3, 0x1.d572aab993c87p-47) | ||
| A(0x1.4600000000000p+0, -0x1.ef0adcbdc6000p-3, 0x1.b26b79c86af24p-45) | ||
| A(0x1.4400000000000p+0, -0x1.e27076e2af000p-3, -0x1.72f4f543fff10p-46) | ||
| A(0x1.4200000000000p+0, -0x1.d5c216b4fc000p-3, 0x1.1ba91bbca681bp-45) | ||
| A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45) | ||
| A(0x1.4000000000000p+0, -0x1.c8ff7c79aa000p-3, 0x1.7794f689f8434p-45) | ||
| A(0x1.3e00000000000p+0, -0x1.bc286742d9000p-3, 0x1.94eb0318bb78fp-46) | ||
| A(0x1.3c00000000000p+0, -0x1.af3c94e80c000p-3, 0x1.a4e633fcd9066p-52) | ||
| A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45) | ||
| A(0x1.3a00000000000p+0, -0x1.a23bc1fe2b000p-3, -0x1.58c64dc46c1eap-45) | ||
| A(0x1.3800000000000p+0, -0x1.9525a9cf45000p-3, -0x1.ad1d904c1d4e3p-45) | ||
| A(0x1.3600000000000p+0, -0x1.87fa06520d000p-3, 0x1.bbdbf7fdbfa09p-45) | ||
| A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45) | ||
| A(0x1.3400000000000p+0, -0x1.7ab890210e000p-3, 0x1.bdb9072534a58p-45) | ||
| A(0x1.3200000000000p+0, -0x1.6d60fe719d000p-3, -0x1.0e46aa3b2e266p-46) | ||
| A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46) | ||
| A(0x1.3000000000000p+0, -0x1.5ff3070a79000p-3, -0x1.e9e439f105039p-46) | ||
| A(0x1.2e00000000000p+0, -0x1.526e5e3a1b000p-3, -0x1.0de8b90075b8fp-45) | ||
| A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46) | ||
| A(0x1.2c00000000000p+0, -0x1.44d2b6ccb8000p-3, 0x1.70cc16135783cp-46) | ||
| A(0x1.2a00000000000p+0, -0x1.371fc201e9000p-3, 0x1.178864d27543ap-48) | ||
| A(0x1.2800000000000p+0, -0x1.29552f81ff000p-3, -0x1.48d301771c408p-45) | ||
| A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45) | ||
| A(0x1.2600000000000p+0, -0x1.1b72ad52f6000p-3, -0x1.e80a41811a396p-45) | ||
| A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47) | ||
| A(0x1.2400000000000p+0, -0x1.0d77e7cd09000p-3, 0x1.a699688e85bf4p-47) | ||
| A(0x1.2200000000000p+0, -0x1.fec9131dbe000p-4, -0x1.575545ca333f2p-45) | ||
| A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45) | ||
| A(0x1.2000000000000p+0, -0x1.e27076e2b0000p-4, 0x1.a342c2af0003cp-45) | ||
| A(0x1.1e00000000000p+0, -0x1.c5e548f5bc000p-4, -0x1.d0c57585fbe06p-46) | ||
| A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45) | ||
| A(0x1.1c00000000000p+0, -0x1.a926d3a4ae000p-4, 0x1.53935e85baac8p-45) | ||
| A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46) | ||
| A(0x1.1a00000000000p+0, -0x1.8c345d631a000p-4, 0x1.37c294d2f5668p-46) | ||
| A(0x1.1800000000000p+0, -0x1.6f0d28ae56000p-4, -0x1.69737c93373dap-45) | ||
| A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46) | ||
| A(0x1.1600000000000p+0, -0x1.51b073f062000p-4, 0x1.f025b61c65e57p-46) | ||
| A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45) | ||
| A(0x1.1400000000000p+0, -0x1.341d7961be000p-4, 0x1.c5edaccf913dfp-45) | ||
| A(0x1.1200000000000p+0, -0x1.16536eea38000p-4, 0x1.47c5e768fa309p-46) | ||
| A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45) | ||
| A(0x1.1000000000000p+0, -0x1.f0a30c0118000p-5, 0x1.d599e83368e91p-45) | ||
| A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46) | ||
| A(0x1.0e00000000000p+0, -0x1.b42dd71198000p-5, 0x1.c827ae5d6704cp-46) | ||
| A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45) | ||
| A(0x1.0c00000000000p+0, -0x1.77458f632c000p-5, -0x1.cfc4634f2a1eep-45) | ||
| A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48) | ||
| A(0x1.0a00000000000p+0, -0x1.39e87b9fec000p-5, 0x1.502b7f526feaap-48) | ||
| A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45) | ||
| A(0x1.0800000000000p+0, -0x1.f829b0e780000p-6, -0x1.980267c7e09e4p-45) | ||
| A(0x1.0600000000000p+0, -0x1.7b91b07d58000p-6, -0x1.88d5493faa639p-45) | ||
| A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50) | ||
| A(0x1.0400000000000p+0, -0x1.fc0a8b0fc0000p-7, -0x1.f1e7cf6d3a69cp-50) | ||
| A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46) | ||
| A(0x1.0200000000000p+0, -0x1.fe02a6b100000p-8, -0x1.9e23f0dda40e4p-46) | ||
| A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0) | ||
| A(0x1.0000000000000p+0, 0x0.0000000000000p+0, 0x0.0000000000000p+0) | ||
| A(0x1.fc00000000000p-1, 0x1.0101575890000p-7, -0x1.0c76b999d2be8p-46) | ||
| A(0x1.f800000000000p-1, 0x1.0205658938000p-6, -0x1.3dc5b06e2f7d2p-45) | ||
| A(0x1.f400000000000p-1, 0x1.8492528c90000p-6, -0x1.aa0ba325a0c34p-45) | ||
| A(0x1.f000000000000p-1, 0x1.0415d89e74000p-5, 0x1.111c05cf1d753p-47) | ||
| A(0x1.ec00000000000p-1, 0x1.466aed42e0000p-5, -0x1.c167375bdfd28p-45) | ||
| A(0x1.e800000000000p-1, 0x1.894aa149fc000p-5, -0x1.97995d05a267dp-46) | ||
| A(0x1.e400000000000p-1, 0x1.ccb73cdddc000p-5, -0x1.a68f247d82807p-46) | ||
| A(0x1.e200000000000p-1, 0x1.eea31c006c000p-5, -0x1.e113e4fc93b7bp-47) | ||
| A(0x1.de00000000000p-1, 0x1.1973bd1466000p-4, -0x1.5325d560d9e9bp-45) | ||
| A(0x1.da00000000000p-1, 0x1.3bdf5a7d1e000p-4, 0x1.cc85ea5db4ed7p-45) | ||
| A(0x1.d600000000000p-1, 0x1.5e95a4d97a000p-4, -0x1.c69063c5d1d1ep-45) | ||
| A(0x1.d400000000000p-1, 0x1.700d30aeac000p-4, 0x1.c1e8da99ded32p-49) | ||
| A(0x1.d000000000000p-1, 0x1.9335e5d594000p-4, 0x1.3115c3abd47dap-45) | ||
| A(0x1.cc00000000000p-1, 0x1.b6ac88dad6000p-4, -0x1.390802bf768e5p-46) | ||
| A(0x1.ca00000000000p-1, 0x1.c885801bc4000p-4, 0x1.646d1c65aacd3p-45) | ||
| A(0x1.c600000000000p-1, 0x1.ec739830a2000p-4, -0x1.dc068afe645e0p-45) | ||
| A(0x1.c400000000000p-1, 0x1.fe89139dbe000p-4, -0x1.534d64fa10afdp-45) | ||
| A(0x1.c000000000000p-1, 0x1.1178e8227e000p-3, 0x1.1ef78ce2d07f2p-45) | ||
| A(0x1.be00000000000p-1, 0x1.1aa2b7e23f000p-3, 0x1.ca78e44389934p-45) | ||
| A(0x1.ba00000000000p-1, 0x1.2d1610c868000p-3, 0x1.39d6ccb81b4a1p-47) | ||
| A(0x1.b800000000000p-1, 0x1.365fcb0159000p-3, 0x1.62fa8234b7289p-51) | ||
| A(0x1.b400000000000p-1, 0x1.4913d8333b000p-3, 0x1.5837954fdb678p-45) | ||
| A(0x1.b200000000000p-1, 0x1.527e5e4a1b000p-3, 0x1.633e8e5697dc7p-45) | ||
| A(0x1.ae00000000000p-1, 0x1.6574ebe8c1000p-3, 0x1.9cf8b2c3c2e78p-46) | ||
| A(0x1.ac00000000000p-1, 0x1.6f0128b757000p-3, -0x1.5118de59c21e1p-45) | ||
| A(0x1.aa00000000000p-1, 0x1.7898d85445000p-3, -0x1.c661070914305p-46) | ||
| A(0x1.a600000000000p-1, 0x1.8beafeb390000p-3, -0x1.73d54aae92cd1p-47) | ||
| A(0x1.a400000000000p-1, 0x1.95a5adcf70000p-3, 0x1.7f22858a0ff6fp-47) | ||
| A(0x1.a000000000000p-1, 0x1.a93ed3c8ae000p-3, -0x1.8724350562169p-45) | ||
| A(0x1.9e00000000000p-1, 0x1.b31d8575bd000p-3, -0x1.c358d4eace1aap-47) | ||
| A(0x1.9c00000000000p-1, 0x1.bd087383be000p-3, -0x1.d4bc4595412b6p-45) | ||
| A(0x1.9a00000000000p-1, 0x1.c6ffbc6f01000p-3, -0x1.1ec72c5962bd2p-48) | ||
| A(0x1.9600000000000p-1, 0x1.db13db0d49000p-3, -0x1.aff2af715b035p-45) | ||
| A(0x1.9400000000000p-1, 0x1.e530effe71000p-3, 0x1.212276041f430p-51) | ||
| A(0x1.9200000000000p-1, 0x1.ef5ade4dd0000p-3, -0x1.a211565bb8e11p-51) | ||
| A(0x1.9000000000000p-1, 0x1.f991c6cb3b000p-3, 0x1.bcbecca0cdf30p-46) | ||
| A(0x1.8c00000000000p-1, 0x1.07138604d5800p-2, 0x1.89cdb16ed4e91p-48) | ||
| A(0x1.8a00000000000p-1, 0x1.0c42d67616000p-2, 0x1.7188b163ceae9p-45) | ||
| A(0x1.8800000000000p-1, 0x1.1178e8227e800p-2, -0x1.c210e63a5f01cp-45) | ||
| A(0x1.8600000000000p-1, 0x1.16b5ccbacf800p-2, 0x1.b9acdf7a51681p-45) | ||
| A(0x1.8400000000000p-1, 0x1.1bf99635a6800p-2, 0x1.ca6ed5147bdb7p-45) | ||
| A(0x1.8200000000000p-1, 0x1.214456d0eb800p-2, 0x1.a87deba46baeap-47) | ||
| A(0x1.7e00000000000p-1, 0x1.2bef07cdc9000p-2, 0x1.a9cfa4a5004f4p-45) | ||
| A(0x1.7c00000000000p-1, 0x1.314f1e1d36000p-2, -0x1.8e27ad3213cb8p-45) | ||
| A(0x1.7a00000000000p-1, 0x1.36b6776be1000p-2, 0x1.16ecdb0f177c8p-46) | ||
| A(0x1.7800000000000p-1, 0x1.3c25277333000p-2, 0x1.83b54b606bd5cp-46) | ||
| A(0x1.7600000000000p-1, 0x1.419b423d5e800p-2, 0x1.8e436ec90e09dp-47) | ||
| A(0x1.7400000000000p-1, 0x1.4718dc271c800p-2, -0x1.f27ce0967d675p-45) | ||
| A(0x1.7200000000000p-1, 0x1.4c9e09e173000p-2, -0x1.e20891b0ad8a4p-45) | ||
| A(0x1.7000000000000p-1, 0x1.522ae0738a000p-2, 0x1.ebe708164c759p-45) | ||
| A(0x1.6e00000000000p-1, 0x1.57bf753c8d000p-2, 0x1.fadedee5d40efp-46) | ||
| A(0x1.6c00000000000p-1, 0x1.5d5bddf596000p-2, -0x1.a0b2a08a465dcp-47) | ||
| #endif | ||
| }, | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,222 @@ | ||
| /* | ||
| * Single-precision pow function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <math.h> | ||
| #include <stdint.h> | ||
| #include "math_config.h" | ||
|
|
||
| /* | ||
| POWF_LOG2_POLY_ORDER = 5 | ||
| EXP2F_TABLE_BITS = 5 | ||
| ULP error: 0.82 (~ 0.5 + relerr*2^24) | ||
| relerr: 1.27 * 2^-26 (Relative error ~= 128*Ln2*relerr_log2 + relerr_exp2) | ||
| relerr_log2: 1.83 * 2^-33 (Relative error of logx.) | ||
| relerr_exp2: 1.69 * 2^-34 (Relative error of exp2(ylogx).) | ||
| */ | ||
|
|
||
| #define N (1 << POWF_LOG2_TABLE_BITS) | ||
| #define T __powf_log2_data.tab | ||
| #define A __powf_log2_data.poly | ||
| #define OFF 0x3f330000 | ||
|
|
||
| /* Subnormal input is normalized so ix has negative biased exponent. | ||
| Output is multiplied by N (POWF_SCALE) if TOINT_INTRINICS is set. */ | ||
| static inline double_t | ||
| log2_inline (uint32_t ix) | ||
| { | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t z, r, r2, r4, p, q, y, y0, invc, logc; | ||
| uint32_t iz, top, tmp; | ||
| int k, i; | ||
|
|
||
| /* x = 2^k z; where z is in range [OFF,2*OFF] and exact. | ||
| The range is split into N subintervals. | ||
| The ith subinterval contains z and c is near its center. */ | ||
| tmp = ix - OFF; | ||
| i = (tmp >> (23 - POWF_LOG2_TABLE_BITS)) % N; | ||
| top = tmp & 0xff800000; | ||
| iz = ix - top; | ||
| k = (int32_t) top >> (23 - POWF_SCALE_BITS); /* arithmetic shift */ | ||
| invc = T[i].invc; | ||
| logc = T[i].logc; | ||
| z = (double_t) asfloat (iz); | ||
|
|
||
| /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k */ | ||
| r = z * invc - 1; | ||
| y0 = logc + (double_t) k; | ||
|
|
||
| /* Pipelined polynomial evaluation to approximate log1p(r)/ln2. */ | ||
| r2 = r * r; | ||
| y = A[0] * r + A[1]; | ||
| p = A[2] * r + A[3]; | ||
| r4 = r2 * r2; | ||
| q = A[4] * r + y0; | ||
| q = p * r2 + q; | ||
| y = y * r4 + q; | ||
| return y; | ||
| } | ||
|
|
||
| #undef N | ||
| #undef T | ||
| #define N (1 << EXP2F_TABLE_BITS) | ||
| #define T __exp2f_data.tab | ||
| #define SIGN_BIAS (1 << (EXP2F_TABLE_BITS + 11)) | ||
|
|
||
| /* The output of log2 and thus the input of exp2 is either scaled by N | ||
| (in case of fast toint intrinsics) or not. The unscaled xd must be | ||
| in [-1021,1023], sign_bias sets the sign of the result. */ | ||
| static inline float | ||
| exp2_inline (double_t xd, uint32_t sign_bias) | ||
| { | ||
| uint64_t ki, ski, t; | ||
| /* double_t for better performance on targets with FLT_EVAL_METHOD==2. */ | ||
| double_t kd, z, r, r2, y, s; | ||
|
|
||
| #if TOINT_INTRINSICS | ||
| # define C __exp2f_data.poly_scaled | ||
| /* N*x = k + r with r in [-1/2, 1/2] */ | ||
| kd = roundtoint (xd); /* k */ | ||
| ki = converttoint (xd); | ||
| #else | ||
| # define C __exp2f_data.poly | ||
| # define SHIFT __exp2f_data.shift_scaled | ||
| /* x = k/N + r with r in [-1/(2N), 1/(2N)] */ | ||
| kd = eval_as_double (xd + SHIFT); | ||
| ki = asuint64 (kd); | ||
| kd -= SHIFT; /* k/N */ | ||
| #endif | ||
| r = xd - kd; | ||
|
|
||
| /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) */ | ||
| t = T[ki % N]; | ||
| ski = ki + sign_bias; | ||
| t += ski << (52 - EXP2F_TABLE_BITS); | ||
| s = asdouble (t); | ||
| z = C[0] * r + C[1]; | ||
| r2 = r * r; | ||
| y = C[2] * r + 1; | ||
| y = z * r2 + y; | ||
| y = y * s; | ||
| return eval_as_float (y); | ||
| } | ||
|
|
||
| /* Returns 0 if not int, 1 if odd int, 2 if even int. The argument is | ||
| the bit representation of a non-zero finite floating-point value. */ | ||
| static inline int | ||
| checkint (uint32_t iy) | ||
| { | ||
| int e = iy >> 23 & 0xff; | ||
| if (e < 0x7f) | ||
| return 0; | ||
| if (e > 0x7f + 23) | ||
| return 2; | ||
| if (iy & ((1 << (0x7f + 23 - e)) - 1)) | ||
| return 0; | ||
| if (iy & (1 << (0x7f + 23 - e))) | ||
| return 1; | ||
| return 2; | ||
| } | ||
|
|
||
| static inline int | ||
| zeroinfnan (uint32_t ix) | ||
| { | ||
| return 2 * ix - 1 >= 2u * 0x7f800000 - 1; | ||
| } | ||
|
|
||
| float | ||
| powf (float x, float y) | ||
| { | ||
| uint32_t sign_bias = 0; | ||
| uint32_t ix, iy; | ||
|
|
||
| ix = asuint (x); | ||
| iy = asuint (y); | ||
| if (unlikely (ix - 0x00800000 >= 0x7f800000 - 0x00800000 || zeroinfnan (iy))) | ||
| { | ||
| /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan). */ | ||
| if (unlikely (zeroinfnan (iy))) | ||
| { | ||
| if (2 * iy == 0) | ||
| return issignalingf_inline (x) ? x + y : 1.0f; | ||
| if (ix == 0x3f800000) | ||
| return issignalingf_inline (y) ? x + y : 1.0f; | ||
| if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000) | ||
| return x + y; | ||
| if (2 * ix == 2 * 0x3f800000) | ||
| return 1.0f; | ||
| if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000)) | ||
| return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf. */ | ||
| return y * y; | ||
| } | ||
| if (unlikely (zeroinfnan (ix))) | ||
| { | ||
| float_t x2 = x * x; | ||
| if (ix & 0x80000000 && checkint (iy) == 1) | ||
| { | ||
| x2 = -x2; | ||
| sign_bias = 1; | ||
| } | ||
| #if WANT_ERRNO | ||
| if (2 * ix == 0 && iy & 0x80000000) | ||
| return __math_divzerof (sign_bias); | ||
| #endif | ||
| /* Without the barrier some versions of clang hoist the 1/x2 and | ||
| thus division by zero exception can be signaled spuriously. */ | ||
| return iy & 0x80000000 ? opt_barrier_float (1 / x2) : x2; | ||
| } | ||
| /* x and y are non-zero finite. */ | ||
| if (ix & 0x80000000) | ||
| { | ||
| /* Finite x < 0. */ | ||
| int yint = checkint (iy); | ||
| if (yint == 0) | ||
| return __math_invalidf (x); | ||
| if (yint == 1) | ||
| sign_bias = SIGN_BIAS; | ||
| ix &= 0x7fffffff; | ||
| } | ||
| if (ix < 0x00800000) | ||
| { | ||
| /* Normalize subnormal x so exponent becomes negative. */ | ||
| ix = asuint (x * 0x1p23f); | ||
| ix &= 0x7fffffff; | ||
| ix -= 23 << 23; | ||
| } | ||
| } | ||
| double_t logx = log2_inline (ix); | ||
| double_t ylogx = y * logx; /* Note: cannot overflow, y is single prec. */ | ||
| if (unlikely ((asuint64 (ylogx) >> 47 & 0xffff) | ||
| >= asuint64 (126.0 * POWF_SCALE) >> 47)) | ||
| { | ||
| /* |y*log(x)| >= 126. */ | ||
| if (ylogx > 0x1.fffffffd1d571p+6 * POWF_SCALE) | ||
| /* |x^y| > 0x1.ffffffp127. */ | ||
| return __math_oflowf (sign_bias); | ||
| if (WANT_ROUNDING && WANT_ERRNO | ||
| && ylogx > 0x1.fffffffa3aae2p+6 * POWF_SCALE) | ||
| /* |x^y| > 0x1.fffffep127, check if we round away from 0. */ | ||
| if ((!sign_bias | ||
| && eval_as_float (1.0f + opt_barrier_float (0x1p-25f)) != 1.0f) | ||
| || (sign_bias | ||
| && eval_as_float (-1.0f - opt_barrier_float (0x1p-25f)) | ||
| != -1.0f)) | ||
| return __math_oflowf (sign_bias); | ||
| if (ylogx <= -150.0 * POWF_SCALE) | ||
| return __math_uflowf (sign_bias); | ||
| #if WANT_ERRNO_UFLOW | ||
| if (ylogx < -149.0 * POWF_SCALE) | ||
| return __math_may_uflowf (sign_bias); | ||
| #endif | ||
| } | ||
| return exp2_inline (ylogx, sign_bias); | ||
| } | ||
| #if USE_GLIBC_ABI | ||
| strong_alias (powf, __powf_finite) | ||
| hidden_alias (powf, __ieee754_powf) | ||
| #endif |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,35 @@ | ||
| /* | ||
| * Data definition for powf. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include "math_config.h" | ||
|
|
||
| const struct powf_log2_data __powf_log2_data = { | ||
| .tab = { | ||
| { 0x1.661ec79f8f3bep+0, -0x1.efec65b963019p-2 * POWF_SCALE }, | ||
| { 0x1.571ed4aaf883dp+0, -0x1.b0b6832d4fca4p-2 * POWF_SCALE }, | ||
| { 0x1.49539f0f010bp+0, -0x1.7418b0a1fb77bp-2 * POWF_SCALE }, | ||
| { 0x1.3c995b0b80385p+0, -0x1.39de91a6dcf7bp-2 * POWF_SCALE }, | ||
| { 0x1.30d190c8864a5p+0, -0x1.01d9bf3f2b631p-2 * POWF_SCALE }, | ||
| { 0x1.25e227b0b8eap+0, -0x1.97c1d1b3b7afp-3 * POWF_SCALE }, | ||
| { 0x1.1bb4a4a1a343fp+0, -0x1.2f9e393af3c9fp-3 * POWF_SCALE }, | ||
| { 0x1.12358f08ae5bap+0, -0x1.960cbbf788d5cp-4 * POWF_SCALE }, | ||
| { 0x1.0953f419900a7p+0, -0x1.a6f9db6475fcep-5 * POWF_SCALE }, | ||
| { 0x1p+0, 0x0p+0 * POWF_SCALE }, | ||
| { 0x1.e608cfd9a47acp-1, 0x1.338ca9f24f53dp-4 * POWF_SCALE }, | ||
| { 0x1.ca4b31f026aap-1, 0x1.476a9543891bap-3 * POWF_SCALE }, | ||
| { 0x1.b2036576afce6p-1, 0x1.e840b4ac4e4d2p-3 * POWF_SCALE }, | ||
| { 0x1.9c2d163a1aa2dp-1, 0x1.40645f0c6651cp-2 * POWF_SCALE }, | ||
| { 0x1.886e6037841edp-1, 0x1.88e9c2c1b9ff8p-2 * POWF_SCALE }, | ||
| { 0x1.767dcf5534862p-1, 0x1.ce0a44eb17bccp-2 * POWF_SCALE }, | ||
| }, | ||
| .poly = { | ||
| 0x1.27616c9496e0bp-2 * POWF_SCALE, -0x1.71969a075c67ap-2 * POWF_SCALE, | ||
| 0x1.ec70a6ca7baddp-2 * POWF_SCALE, -0x1.7154748bef6c8p-1 * POWF_SCALE, | ||
| 0x1.71547652ab82bp0 * POWF_SCALE, | ||
| } | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_cos.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_cosf.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_exp.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_exp2f.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_exp2f_1u.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_expf.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_expf_1u.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_log.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_logf.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_pow.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_powf.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_sin.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| /* | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
| #define SCALAR 1 | ||
| #include "v_sinf.c" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,80 @@ | ||
| /* | ||
| * Single-precision sin/cos function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <stdint.h> | ||
| #include <math.h> | ||
| #include "math_config.h" | ||
| #include "sincosf.h" | ||
|
|
||
| /* Fast sincosf implementation. Worst-case ULP is 0.5607, maximum relative | ||
| error is 0.5303 * 2^-23. A single-step range reduction is used for | ||
| small values. Large inputs have their range reduced using fast integer | ||
| arithmetic. */ | ||
| void | ||
| sincosf (float y, float *sinp, float *cosp) | ||
| { | ||
| double x = y; | ||
| double s; | ||
| int n; | ||
| const sincos_t *p = &__sincosf_table[0]; | ||
|
|
||
| if (abstop12 (y) < abstop12 (pio4)) | ||
| { | ||
| double x2 = x * x; | ||
|
|
||
| if (unlikely (abstop12 (y) < abstop12 (0x1p-12f))) | ||
| { | ||
| if (unlikely (abstop12 (y) < abstop12 (0x1p-126f))) | ||
| /* Force underflow for tiny y. */ | ||
| force_eval_float (x2); | ||
| *sinp = y; | ||
| *cosp = 1.0f; | ||
| return; | ||
| } | ||
|
|
||
| sincosf_poly (x, x2, p, 0, sinp, cosp); | ||
| } | ||
| else if (abstop12 (y) < abstop12 (120.0f)) | ||
| { | ||
| x = reduce_fast (x, p, &n); | ||
|
|
||
| /* Setup the signs for sin and cos. */ | ||
| s = p->sign[n & 3]; | ||
|
|
||
| if (n & 2) | ||
| p = &__sincosf_table[1]; | ||
|
|
||
| sincosf_poly (x * s, x * x, p, n, sinp, cosp); | ||
| } | ||
| else if (likely (abstop12 (y) < abstop12 (INFINITY))) | ||
| { | ||
| uint32_t xi = asuint (y); | ||
| int sign = xi >> 31; | ||
|
|
||
| x = reduce_large (xi, &n); | ||
|
|
||
| /* Setup signs for sin and cos - include original sign. */ | ||
| s = p->sign[(n + sign) & 3]; | ||
|
|
||
| if ((n + sign) & 2) | ||
| p = &__sincosf_table[1]; | ||
|
|
||
| sincosf_poly (x * s, x * x, p, n, sinp, cosp); | ||
| } | ||
| else | ||
| { | ||
| /* Return NaN if Inf or NaN for both sin and cos. */ | ||
| *sinp = *cosp = y - y; | ||
| #if WANT_ERRNO | ||
| /* Needed to set errno for +-Inf, the add is a hack to work | ||
| around a gcc register allocation issue: just passing y | ||
| affects code generation in the fast path. */ | ||
| __math_invalidf (y + y); | ||
| #endif | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,154 @@ | ||
| /* | ||
| * Header for sinf, cosf and sincosf. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <stdint.h> | ||
| #include <math.h> | ||
| #include "math_config.h" | ||
|
|
||
| /* 2PI * 2^-64. */ | ||
| static const double pi63 = 0x1.921FB54442D18p-62; | ||
| /* PI / 4. */ | ||
| static const double pio4 = 0x1.921FB54442D18p-1; | ||
|
|
||
| /* The constants and polynomials for sine and cosine. */ | ||
| typedef struct | ||
| { | ||
| double sign[4]; /* Sign of sine in quadrants 0..3. */ | ||
| double hpi_inv; /* 2 / PI ( * 2^24 if !TOINT_INTRINSICS). */ | ||
| double hpi; /* PI / 2. */ | ||
| double c0, c1, c2, c3, c4; /* Cosine polynomial. */ | ||
| double s1, s2, s3; /* Sine polynomial. */ | ||
| } sincos_t; | ||
|
|
||
| /* Polynomial data (the cosine polynomial is negated in the 2nd entry). */ | ||
| extern const sincos_t __sincosf_table[2] HIDDEN; | ||
|
|
||
| /* Table with 4/PI to 192 bit precision. */ | ||
| extern const uint32_t __inv_pio4[] HIDDEN; | ||
|
|
||
| /* Top 12 bits of the float representation with the sign bit cleared. */ | ||
| static inline uint32_t | ||
| abstop12 (float x) | ||
| { | ||
| return (asuint (x) >> 20) & 0x7ff; | ||
| } | ||
|
|
||
| /* Compute the sine and cosine of inputs X and X2 (X squared), using the | ||
| polynomial P and store the results in SINP and COSP. N is the quadrant, | ||
| if odd the cosine and sine polynomials are swapped. */ | ||
| static inline void | ||
| sincosf_poly (double x, double x2, const sincos_t *p, int n, float *sinp, | ||
| float *cosp) | ||
| { | ||
| double x3, x4, x5, x6, s, c, c1, c2, s1; | ||
|
|
||
| x4 = x2 * x2; | ||
| x3 = x2 * x; | ||
| c2 = p->c3 + x2 * p->c4; | ||
| s1 = p->s2 + x2 * p->s3; | ||
|
|
||
| /* Swap sin/cos result based on quadrant. */ | ||
| float *tmp = (n & 1 ? cosp : sinp); | ||
| cosp = (n & 1 ? sinp : cosp); | ||
| sinp = tmp; | ||
|
|
||
| c1 = p->c0 + x2 * p->c1; | ||
| x5 = x3 * x2; | ||
| x6 = x4 * x2; | ||
|
|
||
| s = x + x3 * p->s1; | ||
| c = c1 + x4 * p->c2; | ||
|
|
||
| *sinp = s + x5 * s1; | ||
| *cosp = c + x6 * c2; | ||
| } | ||
|
|
||
| /* Return the sine of inputs X and X2 (X squared) using the polynomial P. | ||
| N is the quadrant, and if odd the cosine polynomial is used. */ | ||
| static inline float | ||
| sinf_poly (double x, double x2, const sincos_t *p, int n) | ||
| { | ||
| double x3, x4, x6, x7, s, c, c1, c2, s1; | ||
|
|
||
| if ((n & 1) == 0) | ||
| { | ||
| x3 = x * x2; | ||
| s1 = p->s2 + x2 * p->s3; | ||
|
|
||
| x7 = x3 * x2; | ||
| s = x + x3 * p->s1; | ||
|
|
||
| return s + x7 * s1; | ||
| } | ||
| else | ||
| { | ||
| x4 = x2 * x2; | ||
| c2 = p->c3 + x2 * p->c4; | ||
| c1 = p->c0 + x2 * p->c1; | ||
|
|
||
| x6 = x4 * x2; | ||
| c = c1 + x4 * p->c2; | ||
|
|
||
| return c + x6 * c2; | ||
| } | ||
| } | ||
|
|
||
| /* Fast range reduction using single multiply-subtract. Return the modulo of | ||
| X as a value between -PI/4 and PI/4 and store the quadrant in NP. | ||
| The values for PI/2 and 2/PI are accessed via P. Since PI/2 as a double | ||
| is accurate to 55 bits and the worst-case cancellation happens at 6 * PI/4, | ||
| the result is accurate for |X| <= 120.0. */ | ||
| static inline double | ||
| reduce_fast (double x, const sincos_t *p, int *np) | ||
| { | ||
| double r; | ||
| #if TOINT_INTRINSICS | ||
| /* Use fast round and lround instructions when available. */ | ||
| r = x * p->hpi_inv; | ||
| *np = converttoint (r); | ||
| return x - roundtoint (r) * p->hpi; | ||
| #else | ||
| /* Use scaled float to int conversion with explicit rounding. | ||
| hpi_inv is prescaled by 2^24 so the quadrant ends up in bits 24..31. | ||
| This avoids inaccuracies introduced by truncating negative values. */ | ||
| r = x * p->hpi_inv; | ||
| int n = ((int32_t)r + 0x800000) >> 24; | ||
| *np = n; | ||
| return x - n * p->hpi; | ||
| #endif | ||
| } | ||
|
|
||
| /* Reduce the range of XI to a multiple of PI/2 using fast integer arithmetic. | ||
| XI is a reinterpreted float and must be >= 2.0f (the sign bit is ignored). | ||
| Return the modulo between -PI/4 and PI/4 and store the quadrant in NP. | ||
| Reduction uses a table of 4/PI with 192 bits of precision. A 32x96->128 bit | ||
| multiply computes the exact 2.62-bit fixed-point modulo. Since the result | ||
| can have at most 29 leading zeros after the binary point, the double | ||
| precision result is accurate to 33 bits. */ | ||
| static inline double | ||
| reduce_large (uint32_t xi, int *np) | ||
| { | ||
| const uint32_t *arr = &__inv_pio4[(xi >> 26) & 15]; | ||
| int shift = (xi >> 23) & 7; | ||
| uint64_t n, res0, res1, res2; | ||
|
|
||
| xi = (xi & 0xffffff) | 0x800000; | ||
| xi <<= shift; | ||
|
|
||
| res0 = xi * arr[0]; | ||
| res1 = (uint64_t)xi * arr[4]; | ||
| res2 = (uint64_t)xi * arr[8]; | ||
| res0 = (res2 >> 32) | (res0 << 32); | ||
| res0 += res1; | ||
|
|
||
| n = (res0 + (1ULL << 61)) >> 62; | ||
| res0 -= n << 62; | ||
| double x = (int64_t)res0; | ||
| *np = n; | ||
| return x * pi63; | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| /* | ||
| * Data definition for sinf, cosf and sincosf. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <stdint.h> | ||
| #include <math.h> | ||
| #include "math_config.h" | ||
| #include "sincosf.h" | ||
|
|
||
| /* The constants and polynomials for sine and cosine. The 2nd entry | ||
| computes -cos (x) rather than cos (x) to get negation for free. */ | ||
| const sincos_t __sincosf_table[2] = | ||
| { | ||
| { | ||
| { 1.0, -1.0, -1.0, 1.0 }, | ||
| #if TOINT_INTRINSICS | ||
| 0x1.45F306DC9C883p-1, | ||
| #else | ||
| 0x1.45F306DC9C883p+23, | ||
| #endif | ||
| 0x1.921FB54442D18p0, | ||
| 0x1p0, | ||
| -0x1.ffffffd0c621cp-2, | ||
| 0x1.55553e1068f19p-5, | ||
| -0x1.6c087e89a359dp-10, | ||
| 0x1.99343027bf8c3p-16, | ||
| -0x1.555545995a603p-3, | ||
| 0x1.1107605230bc4p-7, | ||
| -0x1.994eb3774cf24p-13 | ||
| }, | ||
| { | ||
| { 1.0, -1.0, -1.0, 1.0 }, | ||
| #if TOINT_INTRINSICS | ||
| 0x1.45F306DC9C883p-1, | ||
| #else | ||
| 0x1.45F306DC9C883p+23, | ||
| #endif | ||
| 0x1.921FB54442D18p0, | ||
| -0x1p0, | ||
| 0x1.ffffffd0c621cp-2, | ||
| -0x1.55553e1068f19p-5, | ||
| 0x1.6c087e89a359dp-10, | ||
| -0x1.99343027bf8c3p-16, | ||
| -0x1.555545995a603p-3, | ||
| 0x1.1107605230bc4p-7, | ||
| -0x1.994eb3774cf24p-13 | ||
| } | ||
| }; | ||
|
|
||
| /* Table with 4/PI to 192 bit precision. To avoid unaligned accesses | ||
| only 8 new bits are added per entry, making the table 4 times larger. */ | ||
| const uint32_t __inv_pio4[24] = | ||
| { | ||
| 0xa2, 0xa2f9, 0xa2f983, 0xa2f9836e, | ||
| 0xf9836e4e, 0x836e4e44, 0x6e4e4415, 0x4e441529, | ||
| 0x441529fc, 0x1529fc27, 0x29fc2757, 0xfc2757d1, | ||
| 0x2757d1f5, 0x57d1f534, 0xd1f534dd, 0xf534ddc0, | ||
| 0x34ddc0db, 0xddc0db62, 0xc0db6295, 0xdb629599, | ||
| 0x6295993c, 0x95993c43, 0x993c4390, 0x3c439041 | ||
| }; |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,68 @@ | ||
| /* | ||
| * Single-precision sin function. | ||
| * | ||
| * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| * See https://llvm.org/LICENSE.txt for license information. | ||
| * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| */ | ||
|
|
||
| #include <math.h> | ||
| #include "math_config.h" | ||
| #include "sincosf.h" | ||
|
|
||
| /* Fast sinf implementation. Worst-case ULP is 0.5607, maximum relative | ||
| error is 0.5303 * 2^-23. A single-step range reduction is used for | ||
| small values. Large inputs have their range reduced using fast integer | ||
| arithmetic. */ | ||
| float | ||
| sinf (float y) | ||
| { | ||
| double x = y; | ||
| double s; | ||
| int n; | ||
| const sincos_t *p = &__sincosf_table[0]; | ||
|
|
||
| if (abstop12 (y) < abstop12 (pio4)) | ||
| { | ||
| s = x * x; | ||
|
|
||
| if (unlikely (abstop12 (y) < abstop12 (0x1p-12f))) | ||
| { | ||
| if (unlikely (abstop12 (y) < abstop12 (0x1p-126f))) | ||
| /* Force underflow for tiny y. */ | ||
| force_eval_float (s); | ||
| return y; | ||
| } | ||
|
|
||
| return sinf_poly (x, s, p, 0); | ||
| } | ||
| else if (likely (abstop12 (y) < abstop12 (120.0f))) | ||
| { | ||
| x = reduce_fast (x, p, &n); | ||
|
|
||
| /* Setup the signs for sin and cos. */ | ||
| s = p->sign[n & 3]; | ||
|
|
||
| if (n & 2) | ||
| p = &__sincosf_table[1]; | ||
|
|
||
| return sinf_poly (x * s, x * x, p, n); | ||
| } | ||
| else if (abstop12 (y) < abstop12 (INFINITY)) | ||
| { | ||
| uint32_t xi = asuint (y); | ||
| int sign = xi >> 31; | ||
|
|
||
| x = reduce_large (xi, &n); | ||
|
|
||
| /* Setup signs for sin and cos - include original sign. */ | ||
| s = p->sign[(n + sign) & 3]; | ||
|
|
||
| if ((n + sign) & 2) | ||
| p = &__sincosf_table[1]; | ||
|
|
||
| return sinf_poly (x * s, x * x, p, n); | ||
| } | ||
| else | ||
| return __math_invalidf (y); | ||
| } |