|
| 1 | +// -*- mode:c++;indent-tabs-mode:nil;c-basic-offset:4;coding:utf-8 -*- |
| 2 | +// vi: set et ft=cpp ts=4 sts=4 sw=4 fenc=utf-8 :vi |
| 3 | +// |
| 4 | +// Copyright 2023 Arm Limited |
| 5 | +// |
| 6 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 7 | +// you may not use this file except in compliance with the License. |
| 8 | +// You may obtain a copy of the License at |
| 9 | +// |
| 10 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | +// |
| 12 | +// Unless required by applicable law or agreed to in writing, software |
| 13 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 14 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 15 | +// See the License for the specific language governing permissions and |
| 16 | +// limitations under the License. |
| 17 | + |
| 18 | +#pragma once |
| 19 | + |
| 20 | +/* Helper routine for calculating exp(x) - 1. |
| 21 | + Copied from expm1f_1u6.c, with several simplifications: |
| 22 | + - No special-case handling for tiny or special values, instead return early |
| 23 | + from the main routine. |
| 24 | + - No special handling for large values: |
| 25 | + - No early return for infinity. |
| 26 | + - Simpler combination of p and t in final stage of algorithm. |
| 27 | + - |i| < 27, so can calculate t by simpler shift-and-add, instead of ldexpf. |
| 28 | + From Optimized Routines by Arm Limited. */ |
| 29 | +static inline float Expm1f(float x) { |
| 30 | + /* Reduce argument: f in [-ln2/2, ln2/2], i is exact. */ |
| 31 | + float Shift = 0x1.8p23f; |
| 32 | + float j = fmaf(0x1.715476p+0f, x, Shift) - Shift; |
| 33 | + int i = j; |
| 34 | + float f = fmaf(j, -0x1.62e4p-1f, x); |
| 35 | + f = fmaf(j, -0x1.7f7d1cp-20f, f); |
| 36 | + |
| 37 | + /* Approximate expm1(f) with polynomial P, expm1(f) ~= f + f^2 * P(f). |
| 38 | + Uses Estrin scheme, where the main expm1f routine uses Horner. */ |
| 39 | + float f2 = f * f; |
| 40 | + float p_01 = fmaf(f, 0x1.5554aep-3, 0x1.fffffep-2); |
| 41 | + float p_23 = fmaf(f, 0x1.12287cp-7, 0x1.555736p-5); |
| 42 | + float p = fmaf(f2, p_23, p_01); |
| 43 | + p = fmaf(f2 * f2, 0x1.6b55a2p-10, p); |
| 44 | + p = fmaf(f2, p, f); |
| 45 | + |
| 46 | + /* t = 2^i. */ |
| 47 | + union { |
| 48 | + unsigned i; |
| 49 | + float f; |
| 50 | + } u = {(i + 127) << 23}; |
| 51 | + float t = u.f; |
| 52 | + |
| 53 | + /* expm1(x) ~= p * t + (t - 1). */ |
| 54 | + return fmaf(p, t, t - 1); |
| 55 | +} |
| 56 | + |
| 57 | +/* Single-precision tanh(x) approximation. |
| 58 | + The maximum error is 2.58 ULP. |
| 59 | + Designed by Arm Limited. */ |
| 60 | +static inline float Tanhf(float x) { |
| 61 | + union { |
| 62 | + float f; |
| 63 | + unsigned i; |
| 64 | + } u = {x}; |
| 65 | + unsigned iax = u.i & 0x7fffffff; |
| 66 | + unsigned sign = u.i & ~0x7fffffff; |
| 67 | + |
| 68 | + /* Above 0x1.205966p+3 tanhf rounds to 1 (or -1 for negative). */ |
| 69 | + if (iax > 0x41102cb3) { |
| 70 | + if (iax > 0x7f800000) |
| 71 | + return (x - x) / (x - x); |
| 72 | + u.i = 0x3f800000 | sign; |
| 73 | + return u.f; |
| 74 | + } |
| 75 | + if (iax < 0x34000000) |
| 76 | + return x; |
| 77 | + |
| 78 | + /* tanh(x) = (e^2x - 1) / (e^2x + 1). */ |
| 79 | + float q = Expm1f(2 * x); |
| 80 | + return q / (q + 2); |
| 81 | +} |
0 commit comments