| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,54 @@ | ||
| //===-- Common header for PolyEval implementations --------------*- C++ -*-===// | ||
| // | ||
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
| // See https://llvm.org/LICENSE.txt for license information. | ||
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| // | ||
| //===----------------------------------------------------------------------===// | ||
|
|
||
| #ifndef LLVM_LIBC_UTILS_FPUTIL_POLYEVAL_H | ||
| #define LLVM_LIBC_UTILS_FPUTIL_POLYEVAL_H | ||
|
|
||
| #include "utils/CPP/TypeTraits.h" | ||
|
|
||
| // Evaluate polynomial using Horner's Scheme: | ||
| // With polyeval(x, a_0, a_1, ..., a_n) = a_n * x^n + ... + a_1 * x + a_0, we | ||
| // evaluated it as: a_0 + x * (a_1 + x * ( ... (a_(n-1) + x * a_n) ... ) ) ). | ||
| // We will use fma instructions if available. | ||
| // Example: to evaluate x^3 + 2*x^2 + 3*x + 4, call | ||
| // polyeval( x, 4.0, 3.0, 2.0, 1.0 ) | ||
|
|
||
| #if defined(__x86_64__) || defined(__aarch64__) | ||
| #include "FMA.h" | ||
|
|
||
| namespace __llvm_libc { | ||
| namespace fputil { | ||
|
|
||
| template <typename T> static inline T polyeval(T x, T a0) { return a0; } | ||
|
|
||
| template <typename T, typename... Ts> | ||
| static inline T polyeval(T x, T a0, Ts... a) { | ||
| return fma(x, polyeval(x, a...), a0); | ||
| } | ||
|
|
||
| } // namespace fputil | ||
| } // namespace __llvm_libc | ||
|
|
||
| #else | ||
|
|
||
| namespace __llvm_libc { | ||
| namespace fputil { | ||
|
|
||
| template <typename T> static inline T polyeval(T x, T a0) { return a0; } | ||
|
|
||
| template <typename T, typename... Ts> | ||
| static inline T polyeval(T x, T a0, Ts... a) { | ||
| return x * polyeval(x, a...) + a0; | ||
| } | ||
|
|
||
| } // namespace fputil | ||
| } // namespace __llvm_libc | ||
|
|
||
| #endif | ||
|
|
||
| #endif // LLVM_LIBC_UTILS_FPUTIL_FMA_H |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,6 +28,7 @@ enum class Operation : int { | |
| Cos, | ||
| Exp, | ||
| Exp2, | ||
| Expm1, | ||
| Floor, | ||
| Round, | ||
| Sin, | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,41 @@ | ||
| # Scripts to generate polynomial approximations for expm1f function using Sollya. | ||
| # | ||
| # To compute expm1f(x), for |x| > Ln(2), using expf(x) - 1.0f is accurate enough, since catastrophic | ||
| # cancellation does not occur with the subtraction. | ||
| # | ||
| # For |x| <= Ln(2), we divide [-Ln2; Ln2] into 3 subintervals: [-Ln2; -1/8], [-1/8, 1/8], [1/8, Ln2], | ||
| # and use a degree-6 polynomial to approximate expm1f in each interval. | ||
|
|
||
| > f := expm1(x); | ||
|
|
||
| # Polynomial approximation for e^(x) - 1 on [-Ln2, -1/8]. | ||
| > P1 := fpminmax(f, [|0, ..., 6|], [|24...], [-log(2), -1/8], relative); | ||
|
|
||
| > log2(supnorm(P1, f, [-log(2), -1/8], relative, 2^(-50))); | ||
| [-29.718757839645220560605567049447893449270454705067;-29.7187578396452193192777968211678241631166415833034] | ||
|
|
||
| > P1; | ||
| -6.899231408397099585272371768951416015625e-8 + x * (0.999998271465301513671875 + x * (0.4999825656414031982421875 | ||
| + x * (0.16657467186450958251953125 + x * (4.1390590369701385498046875e-2 + x * (7.856394164264202117919921875e-3 | ||
| + x * 9.380675037391483783721923828125e-4))))) | ||
|
|
||
| # Polynomial approximation for e^(x) - 1 on [-1/8, 1/8]. | ||
| > P2 := fpminimax(f, [|1,...,6|], [|24...|], [-1/8, 1/8], relative); | ||
|
|
||
| > log2(supnorm(P2, f, [-1/8, 1/8], relative, 2^(-50))); | ||
| [-34.542864999883718873453825391741639571826398336605;-34.542864999883717632126055163461570285672585214842] | ||
|
|
||
| > P2; | ||
| x * (1 + x * (0.5 + x * (0.16666664183139801025390625 + x * (4.1666664183139801025390625e-2 | ||
| + x * (8.3379410207271575927734375e-3 + x * 1.3894210569560527801513671875e-3))))) | ||
|
|
||
| # Polynomial approximation for e^(x) - 1 on [1/8, Ln2]. | ||
| > P3 := fpminimax(f, [|0,...,6|], [|24...|], [1/8, log(2)], relative); | ||
|
|
||
| > log2(supnorm(P3, f, [1/8, log(2)], relative, 2^(-50))); | ||
| [-29.189438260653683379922869677995123967174571911561;-29.1894382606536821385950994497150546810207587897976] | ||
|
|
||
| > P3; | ||
| 1.23142086749794543720781803131103515625e-7 + x * (0.9999969005584716796875 + x * (0.500031292438507080078125 | ||
| + x * (0.16650259494781494140625 + x * (4.21491153538227081298828125e-2 + x * (7.53940828144550323486328125e-3 | ||
| + x * 2.05591344274580478668212890625e-3))))) |