Expand Up
@@ -8,6 +8,7 @@
#include " src/math/exp.h"
#include " common_constants.h" // Lookup tables EXP_M1 and EXP_M2.
#include " explogxf.h" // ziv_test_denorm.
#include " src/__support/CPP/bit.h"
#include " src/__support/CPP/optional.h"
#include " src/__support/FPUtil/FEnvImpl.h"
Expand All
@@ -18,6 +19,7 @@
#include " src/__support/FPUtil/multiply_add.h"
#include " src/__support/FPUtil/nearest_integer.h"
#include " src/__support/FPUtil/rounding_mode.h"
#include " src/__support/FPUtil/triple_double.h"
#include " src/__support/common.h"
#include " src/__support/macros/optimization.h" // LIBC_UNLIKELY
Expand All
@@ -26,9 +28,10 @@
namespace __llvm_libc {
using fputil::DoubleDouble;
using fputil::TripleDouble;
using Float128 = typename fputil::DyadicFloat<128 >;
// 2^12 * log2(e)
// log2(e)
constexpr double LOG2_E = 0x1 .71547652b82fep+0 ;
// Error bounds:
Expand All
@@ -37,12 +40,6 @@ constexpr double ERR_D = 0x1.8p-63;
// Errors when using double-double precision.
constexpr double ERR_DD = 0x1 .0p-99 ;
struct TripleDouble {
double hi = 0.0 ;
double mid = 0.0 ;
double lo = 0.0 ;
};
// -2^-12 * log(2)
// > a = -2^-12 * log(2);
// > b = round(a, 30, RN);
Expand All
@@ -54,142 +51,6 @@ constexpr double MLOG_2_EXP2_M12_MID = 0x1.718432a1b0e26p-47;
constexpr double MLOG_2_EXP2_M12_MID_30 = 0x1 .718432ap-47 ;
constexpr double MLOG_2_EXP2_M12_LO = 0x1 .b0e2633fe0685p-79 ;
// 2^(k * 2^-6), for k = 0..63.
constexpr TripleDouble EXP_MID1[64 ] = {
{0x1p0, 0 , 0 },
{0x1 .02c9a3e778061p0, -0x1 .19083535b085dp-56 , -0x1 .9085b0a3d74d5p-110 },
{0x1 .059b0d3158574p0, 0x1 .d73e2a475b465p -55 , 0x1 .05ff94f8d257ep-110 },
{0x1 .0874518759bc8p0, 0x1 .186be4bb284ffp-57 , 0x1 .15820d96b414fp-111 },
{0x1 .0b5586cf9890fp0, 0x1 .8a62e4adc610bp-54 , -0x1 .67c9bd6ebf74cp-108 },
{0x1 .0e3ec32d3d1a2p0, 0x1 .03a1727c57b53p-59 , -0x1 .5aa76994e9ddbp-113 },
{0x1 .11301d0125b51p0, -0x1 .6c51039449b3ap-54 , 0x1 .9d58b988f562dp-109 },
{0x1 .1429aaea92dep0, -0x1 .32fbf9af1369ep-54 , -0x1 .2fe7bb4c76416p-108 },
{0x1 .172b83c7d517bp0, -0x1 .19041b9d78a76p-55 , 0x1 .4f2406aa13ffp-109 },
{0x1 .1a35beb6fcb75p0, 0x1 .e5b4c7b4968e4p -55 , 0x1 .ad36183926ae8p -111 },
{0x1 .1d4873168b9aap0, 0x1 .e016e00a2643cp -54 , 0x1 .ea62d0881b918p -110 },
{0x1 .2063b88628cd6p0, 0x1 .dc775814a8495p -55 , -0x1 .781dbc16f1ea4p-111 },
{0x1 .2387a6e756238p0, 0x1 .9b07eb6c70573p-54 , -0x1 .4d89f9af532ep-109 },
{0x1 .26b4565e27cddp0, 0x1 .2bd339940e9d9p-55 , 0x1 .277393a461b77p-110 },
{0x1 .29e9df51fdee1p0, 0x1 .612e8afad1255p-55 , 0x1 .de5448560469p -111 },
{0x1 .2d285a6e4030bp0, 0x1 .0024754db41d5p-54 , -0x1 .ee9d8f8cb9307p -110 },
{0x1 .306fe0a31b715p0, 0x1 .6f46ad23182e4p-55 , 0x1 .7b7b2f09cd0d9p-110 },
{0x1 .33c08b26416ffp0, 0x1 .32721843659a6p-54 , -0x1 .406a2ea6cfc6bp-108 },
{0x1 .371a7373aa9cbp0, -0x1 .63aeabf42eae2p-54 , 0x1 .87e3e12516bfap-108 },
{0x1 .3a7db34e59ff7p0, -0x1 .5e436d661f5e3p-56 , 0x1 .9b0b1ff17c296p-111 },
{0x1 .3dea64c123422p0, 0x1 .ada0911f09ebcp -55 , -0x1 .808ba68fa8fb7p-109 },
{0x1 .4160a21f72e2ap0, -0x1 .ef3691c309278p -58 , -0x1 .32b43eafc6518p-114 },
{0x1 .44e086061892dp0, 0x1 .89b7a04ef80dp-59 , -0x1 .0ac312de3d922p-114 },
{0x1 .486a2b5c13cdp0, 0x1 .3c1a3b69062fp-56 , 0x1 .e1eebae743acp -111 },
{0x1 .4bfdad5362a27p0, 0x1 .d4397afec42e2p -56 , 0x1 .c06c7745c2b39p -113 },
{0x1 .4f9b2769d2ca7p0, -0x1 .4b309d25957e3p-54 , -0x1 .1aa1fd7b685cdp-112 },
{0x1 .5342b569d4f82p0, -0x1 .07abe1db13cadp-55 , 0x1 .fa733951f214cp -111 },
{0x1 .56f4736b527dap0, 0x1 .9bb2c011d93adp-54 , -0x1 .ff86852a613ffp -111 },
{0x1 .5ab07dd485429p0, 0x1 .6324c054647adp-54 , -0x1 .744ee506fdafep-109 },
{0x1 .5e76f15ad2148p0, 0x1 .ba6f93080e65ep -54 , -0x1 .95f9ab75fa7d6p-108 },
{0x1 .6247eb03a5585p0, -0x1 .383c17e40b497p-54 , 0x1 .5d8e757cfb991p-111 },
{0x1 .6623882552225p0, -0x1 .bb60987591c34p -54 , 0x1 .4a337f4dc0a3bp-108 },
{0x1 .6a09e667f3bcdp0, -0x1 .bdd3413b26456p -54 , 0x1 .57d3e3adec175p-108 },
{0x1 .6dfb23c651a2fp0, -0x1 .bbe3a683c88abp -57 , 0x1 .a59f88abbe778p -115 },
{0x1 .71f75e8ec5f74p0, -0x1 .16e4786887a99p-55 , -0x1 .269796953a4c3p-109 },
{0x1 .75feb564267c9p0, -0x1 .0245957316dd3p-54 , -0x1 .8f8e7fa19e5e8p-108 },
{0x1 .7a11473eb0187p0, -0x1 .41577ee04992fp-55 , -0x1 .4217a932d10d4p-113 },
{0x1 .7e2f336cf4e62p0, 0x1 .05d02ba15797ep-56 , 0x1 .70a1427f8fcdfp-112 },
{0x1 .82589994cce13p0, -0x1 .d4c1dd41532d8p -54 , 0x1 .0f6ad65cbbac1p-112 },
{0x1 .868d99b4492edp0, -0x1 .fc6f89bd4f6bap -54 , -0x1 .f16f65181d921p -109 },
{0x1 .8ace5422aa0dbp0, 0x1 .6e9f156864b27p-54 , -0x1 .30644a7836333p-110 },
{0x1 .8f1ae99157736p0, 0x1 .5cc13a2e3976cp-55 , 0x1 .3bf26d2b85163p-114 },
{0x1 .93737b0cdc5e5p0, -0x1 .75fc781b57ebcp-57 , 0x1 .697e257ac0db2p-111 },
{0x1 .97d829fde4e5p0, -0x1 .d185b7c1b85d1p -54 , 0x1 .7edb9d7144b6fp-108 },
{0x1 .9c49182a3f09p0, 0x1 .c7c46b071f2bep -56 , 0x1 .6376b7943085cp-110 },
{0x1 .a0c667b5de565p0 , -0x1 .359495d1cd533p-54 , 0x1 .354084551b4fbp-109 },
{0x1 .a5503b23e255dp0 , -0x1 .d2f6edb8d41e1p -54 , -0x1 .bfd7adfd63f48p -111 },
{0x1 .a9e6b5579fdbfp0 , 0x1 .0fac90ef7fd31p-54 , 0x1 .8b16ae39e8cb9p-109 },
{0x1 .ae89f995ad3adp0 , 0x1 .7a1cd345dcc81p-54 , 0x1 .a7fbc3ae675eap -108 },
{0x1 .b33a2b84f15fbp0 , -0x1 .2805e3084d708p-57 , 0x1 .2babc0edda4d9p-111 },
{0x1 .b7f76f2fb5e47p0 , -0x1 .5584f7e54ac3bp-56 , 0x1 .aa64481e1ab72p -111 },
{0x1 .bcc1e904bc1d2p0 , 0x1 .23dd07a2d9e84p-55 , 0x1 .9a164050e1258p-109 },
{0x1 .c199bdd85529cp0 , 0x1 .11065895048ddp-55 , 0x1 .99e51125928dap-110 },
{0x1 .c67f12e57d14bp0 , 0x1 .2884dff483cadp-54 , -0x1 .fc44c329d5cb2p -109 },
{0x1 .cb720dcef9069p0 , 0x1 .503cbd1e949dbp-56 , 0x1 .d8765566b032ep -110 },
{0x1 .d072d4a07897cp0 , -0x1 .cbc3743797a9cp -54 , -0x1 .e7044039da0f6p -108 },
{0x1 .d5818dcfba487p0 , 0x1 .2ed02d75b3707p-55 , -0x1 .ab053b05531fcp -111 },
{0x1 .da9e603db3285p0 , 0x1 .c2300696db532p -54 , 0x1 .7f6246f0ec615p-108 },
{0x1 .dfc97337b9b5fp0 , -0x1 .1a5cd4f184b5cp-54 , 0x1 .b7225a944efd6p -108 },
{0x1 .e502ee78b3ff6p0 , 0x1 .39e8980a9cc8fp-55 , 0x1 .1e92cb3c2d278p-109 },
{0x1 .ea4afa2a490dap0 , -0x1 .e9c23179c2893p -54 , -0x1 .fc0f242bbf3dep -109 },
{0x1 .efa1bee615a27p0 , 0x1 .dc7f486a4b6bp -54 , 0x1 .f6dd5d229ff69p -108 },
{0x1 .f50765b6e454p0 , 0x1 .9d3e12dd8a18bp-54 , -0x1 .4019bffc80ef3p-110 },
{0x1 .fa7c1819e90d8p0 , 0x1 .74853f3a5931ep-55 , 0x1 .dc060c36f7651p -112 },
};
// 2^(k * 2^-12), for k = 0..63.
constexpr TripleDouble EXP_MID2[64 ] = {
{0x1p0, 0 , 0 },
{0x1 .000b175effdc7p0, 0x1 .ae8e38c59c72ap -54 , 0x1 .39726694630e3p-108 },
{0x1 .00162f3904052p0, -0x1 .7b5d0d58ea8f4p-58 , 0x1 .e5e06ddd31156p -112 },
{0x1 .0021478e11ce6p0, 0x1 .4115cb6b16a8ep-54 , 0x1 .5a0768b51f609p-111 },
{0x1 .002c605e2e8cfp0, -0x1 .d7c96f201bb2fp -55 , 0x1 .d008403605217p -111 },
{0x1 .003779a95f959p0, 0x1 .84711d4c35e9fp-54 , 0x1 .89bc16f765708p-109 },
{0x1 .0042936faa3d8p0, -0x1 .0484245243777p-55 , -0x1 .4535b7f8c1e2dp-109 },
{0x1 .004dadb113dap0, -0x1 .4b237da2025f9p-54 , -0x1 .8ba92f6b25456p-108 },
{0x1 .0058c86da1c0ap0, -0x1 .5e00e62d6b30dp-56 , -0x1 .30c72e81f4294p-113 },
{0x1 .0063e3a559473p0, 0x1 .a1d6cedbb9481p -54 , -0x1 .34a5384e6f0b9p-110 },
{0x1 .006eff583fc3dp0, -0x1 .4acf197a00142p-54 , 0x1 .f8d0580865d2ep -108 },
{0x1 .007a1b865a8cap0, -0x1 .eaf2ea42391a5p -57 , -0x1 .002bcb3ae9a99p-111 },
{0x1 .0085382faef83p0, 0x1 .da93f90835f75p -56 , 0x1 .c3c5aedee9851p -111 },
{0x1 .00905554425d4p0, -0x1 .6a79084ab093cp-55 , 0x1 .7217851d1ec6ep-109 },
{0x1 .009b72f41a12bp0, 0x1 .86364f8fbe8f8p-54 , -0x1 .80cbca335a7c3p-110 },
{0x1 .00a6910f3b6fdp0, -0x1 .82e8e14e3110ep-55 , -0x1 .706bd4eb22595p-110 },
{0x1 .00b1afa5abcbfp0, -0x1 .4f6b2a7609f71p-55 , -0x1 .b55dd523f3c08p -111 },
{0x1 .00bcceb7707ecp0, -0x1 .e1a258ea8f71bp -56 , 0x1 .90a1e207cced1p-110 },
{0x1 .00c7ee448ee02p0, 0x1 .4362ca5bc26f1p-56 , 0x1 .78d0472db37c5p-110 },
{0x1 .00d30e4d0c483p0, 0x1 .095a56c919d02p-54 , -0x1 .bcd4db3cb52fep -109 },
{0x1 .00de2ed0ee0f5p0, -0x1 .406ac4e81a645p-57 , -0x1 .cf1b131575ec2p -112 },
{0x1 .00e94fd0398ep0, 0x1 .b5a6902767e09p -54 , -0x1 .6aaa1fa7ff913p-112 },
{0x1 .00f4714af41d3p0, -0x1 .91b2060859321p-54 , 0x1 .68f236dff3218p-110 },
{0x1 .00ff93412315cp0, 0x1 .427068ab22306p-55 , -0x1 .e8bb58067e60ap -109 },
{0x1 .010ab5b2cbd11p0, 0x1 .c1d0660524e08p -54 , 0x1 .d4cd5e1d71fdfp -108 },
{0x1 .0115d89ff3a8bp0, -0x1 .e7bdfb3204be8p -54 , 0x1 .e4ecf350ebe88p -108 },
{0x1 .0120fc089ff63p0, 0x1 .843aa8b9cbbc6p-55 , 0x1 .6a2aa2c89c4f8p-109 },
{0x1 .012c1fecd613bp0, -0x1 .34104ee7edae9p-56 , 0x1 .1ca368a20ed05p-110 },
{0x1 .0137444c9b5b5p0, -0x1 .2b6aeb6176892p-56 , 0x1 .edb1095d925cfp -114 },
{0x1 .01426927f5278p0, 0x1 .a8cd33b8a1bb3p -56 , -0x1 .488c78eded75fp-111 },
{0x1 .014d8e7ee8d2fp0, 0x1 .2edc08e5da99ap-56 , -0x1 .7480f5ea1b3c9p-113 },
{0x1 .0158b4517bb88p0, 0x1 .57ba2dc7e0c73p-55 , -0x1 .ae45989a04dd5p -111 },
{0x1 .0163da9fb3335p0, 0x1 .b61299ab8cdb7p -54 , 0x1 .bf48007d80987p -109 },
{0x1 .016f0169949edp0, -0x1 .90565902c5f44p-54 , 0x1 .1aa91a059292cp-109 },
{0x1 .017a28af25567p0, 0x1 .70fc41c5c2d53p-55 , 0x1 .b6663292855f5p -110 },
{0x1 .018550706ab62p0, 0x1 .4b9a6e145d76cp-54 , 0x1 .e7fbca6793d94p -108 },
{0x1 .019078ad6a19fp0, -0x1 .008eff5142bf9p-56 , -0x1 .5b9f5c7de3b93p-110 },
{0x1 .019ba16628de2p0, -0x1 .77669f033c7dep-54 , 0x1 .4638bf2f6acabp-110 },
{0x1 .01a6ca9aac5f3p0, -0x1 .09bb78eeead0ap-54 , -0x1 .ab237b9a069c5p -109 },
{0x1 .01b1f44af9f9ep0, 0x1 .371231477ece5p-54 , 0x1 .3ab358be97cefp-108 },
{0x1 .01bd1e77170b4p0, 0x1 .5e7626621eb5bp-56 , -0x1 .4027b2294bb64p-110 },
{0x1 .01c8491f08f08p0, -0x1 .bc72b100828a5p -54 , 0x1 .656394426c99p-111 },
{0x1 .01d37442d507p0, -0x1 .ce39cbbab8bbep -57 , 0x1 .bf9785189bdd8p -111 },
{0x1 .01de9fe280ac8p0, 0x1 .16996709da2e2p-55 , 0x1 .7c12f86114fe3p-109 },
{0x1 .01e9cbfe113efp0, -0x1 .c11f5239bf535p -55 , -0x1 .653d5d24b5d28p-109 },
{0x1 .01f4f8958c1c6p0, 0x1 .e1d4eb5edc6b3p -55 , 0x1 .04a0cdc1d86d7p-109 },
{0x1 .020025a8f6a35p0, -0x1 .afb99946ee3fp -54 , 0x1 .c678c46149782p -109 },
{0x1 .020b533856324p0, -0x1 .8f06d8a148a32p-54 , 0x1 .48524e1e9df7p-108 },
{0x1 .02168143b0281p0, -0x1 .2bf310fc54eb6p-55 , 0x1 .9953ea727ff0bp-109 },
{0x1 .0221afcb09e3ep0, -0x1 .c95a035eb4175p -54 , -0x1 .ccfbbec22d28ep -108 },
{0x1 .022cdece68c4fp0, -0x1 .491793e46834dp-54 , 0x1 .9e2bb6e181de1p-108 },
{0x1 .02380e4dd22adp0, -0x1 .3e8d0d9c49091p-56 , 0x1 .f17609ae29308p -110 },
{0x1 .02433e494b755p0, -0x1 .314aa16278aa3p-54 , -0x1 .c7dc2c476bfb8p -110 },
{0x1 .024e6ec0da046p0, 0x1 .48daf888e9651p-55 , -0x1 .fab994971d4a3p -109 },
{0x1 .02599fb483385p0, 0x1 .56dc8046821f4p-55 , 0x1 .848b62cbdd0afp-109 },
{0x1 .0264d1244c719p0, 0x1 .45b42356b9d47p-54 , -0x1 .bf603ba715d0cp -109 },
{0x1 .027003103b10ep0, -0x1 .082ef51b61d7ep-56 , 0x1 .89434e751e1aap-110 },
{0x1 .027b357854772p0, 0x1 .2106ed0920a34p-56 , -0x1 .03b54fd64e8acp-110 },
{0x1 .0286685c9e059p0, -0x1 .fd4cf26ea5d0fp -54 , 0x1 .7785ea0acc486p-109 },
{0x1 .02919bbd1d1d8p0, -0x1 .09f8775e78084p-54 , -0x1 .ce447fdb35ff9p -109 },
{0x1 .029ccf99d720ap0, 0x1 .64cbba902ca27p-58 , 0x1 .5b884aab5642ap-112 },
{0x1 .02a803f2d170dp0, 0x1 .4383ef231d207p-54 , -0x1 .cfb3e46d7c1cp -108 },
{0x1 .02b338c811703p0, 0x1 .4a47a505b3a47p-54 , -0x1 .0d40cee4b81afp-112 },
{0x1 .02be6e199c811p0, 0x1 .e47120223467fp -54 , 0x1 .6ae7d36d7c1f7p-109 },
};
// Polynomial approximations with double precision:
// Return expm1(dx) / x ~ 1 + dx / 2 + dx^2 / 6 + dx^3 / 24.
// For |dx| < 2^-13 + 2^-30:
Expand Down
Expand Up
@@ -267,14 +128,14 @@ Float128 exp_f128(double x, double kd, int idx1, int idx2) {
// TODO: Skip recalculating exp_mid1 and exp_mid2.
Float128 exp_mid1 =
fputil::quick_add (Float128 (EXP_MID1 [idx1].hi ),
fputil::quick_add (Float128 (EXP_MID1 [idx1].mid ),
Float128 (EXP_MID1 [idx1].lo )));
fputil::quick_add (Float128 (EXP2_MID1 [idx1].hi ),
fputil::quick_add (Float128 (EXP2_MID1 [idx1].mid ),
Float128 (EXP2_MID1 [idx1].lo )));
Float128 exp_mid2 =
fputil::quick_add (Float128 (EXP_MID2 [idx2].hi ),
fputil::quick_add (Float128 (EXP_MID2 [idx2].mid ),
Float128 (EXP_MID2 [idx2].lo )));
fputil::quick_add (Float128 (EXP2_MID2 [idx2].hi ),
fputil::quick_add (Float128 (EXP2_MID2 [idx2].mid ),
Float128 (EXP2_MID2 [idx2].lo )));
Float128 exp_mid = fputil::quick_mul (exp_mid1, exp_mid2);
Expand Down
Expand Up
@@ -309,48 +170,8 @@ DoubleDouble exp_double_double(double x, double kd,
return r;
}
// Rounding tests when the output might be denormal.
cpp::optional<double > ziv_test_denorm (int hi, double mid, double lo,
double err) {
using FloatProp = typename fputil::FloatProperties<double >;
// Scaling factor = 1/(min normal number) = 2^1022
int64_t exp_hi = static_cast <int64_t >(hi + 1022 ) << FloatProp::MANTISSA_WIDTH;
double mid_hi = cpp::bit_cast<double >(exp_hi + cpp::bit_cast<int64_t >(mid));
// Extra errors from another rounding step.
err += 0x1 .0p-52 ;
double lo_u = lo + err;
double lo_l = lo - err;
double mid_lo_u =
cpp::bit_cast<double >(exp_hi + cpp::bit_cast<int64_t >(lo_u));
double mid_lo_l =
cpp::bit_cast<double >(exp_hi + cpp::bit_cast<int64_t >(lo_l));
// By adding 2^-511, the results will have similar rounding points as denormal
// outputs.
double upper = (mid_hi + mid_lo_u);
double lower = (mid_hi + mid_lo_l);
uint64_t scale_down = 0 ;
if (upper < 1.0 ) {
// Upper bound is in denormal range, need extra rounding.
upper += 1.0 ;
lower += 1.0 ;
scale_down = 0x3FF0'0000'0000'0000 ; // 1.0
}
if (LIBC_LIKELY (upper == lower)) {
return cpp::bit_cast<double >(cpp::bit_cast<uint64_t >(upper) - scale_down);
}
return cpp::nullopt;
}
// Check for exceptional cases when
// |x| < 2^-53
// |x| <= 2^-53 or x < log(2^-1075) or x >= 0x1.6232bdd7abcd3p+9
double set_exceptional (double x) {
using FPBits = typename fputil::FPBits<double >;
using FloatProp = typename fputil::FloatProperties<double >;
Expand All
@@ -359,7 +180,7 @@ double set_exceptional(double x) {
uint64_t x_u = xbits.uintval ();
uint64_t x_abs = x_u & FloatProp::EXP_MANT_MASK;
// |x| < 2^-53
// |x| <= 2^-53
if (x_abs <= 0x3ca0'0000'0000'0000ULL ) {
// exp(x) ~ 1 + x
return 1 + x;
Expand Down
Expand Up
@@ -424,7 +245,7 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
return set_exceptional (x);
}
// Now log(2^-1022 ) <= x <= -2^-53 or 2^-53 <= x < log(2^1023 * (2 - 2^-52))
// Now log(2^-1075 ) <= x <= -2^-53 or 2^-53 <= x < log(2^1023 * (2 - 2^-52))
// Range reduction:
// Let x = log(2) * (hi + mid1 + mid2) + lo
Expand Down
Expand Up
@@ -514,8 +335,8 @@ LLVM_LIBC_FUNCTION(double, exp, (double x)) {
bool denorm = (hi <= -1022 );
DoubleDouble exp_mid1{EXP_MID1 [idx1].mid , EXP_MID1 [idx1].hi };
DoubleDouble exp_mid2{EXP_MID2 [idx2].mid , EXP_MID2 [idx2].hi };
DoubleDouble exp_mid1{EXP2_MID1 [idx1].mid , EXP2_MID1 [idx1].hi };
DoubleDouble exp_mid2{EXP2_MID2 [idx2].mid , EXP2_MID2 [idx2].hi };
DoubleDouble exp_mid = fputil::quick_mult (exp_mid1, exp_mid2);
Expand Down