-
Notifications
You must be signed in to change notification settings - Fork 11k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[libc] Implement correctly rounded log2f based on RLIBM library.
Implement log2f based on RLIBM library correctly rounded for all rounding modes. Reviewed By: sivachandra, michaelrj, santoshn, jpl169, zimmermann6 Differential Revision: https://reviews.llvm.org/D115828
- Loading branch information
Showing
21 changed files
with
528 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
//===-- Common constants for math functions ---------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "common_constants.h" | ||
|
||
namespace __llvm_libc { | ||
|
||
// Lookup table for (1/f) where f = 1 + n*2^(-7), n = 0..127. | ||
const double ONE_OVER_F[128] = { | ||
0x1.0000000000000p+0, 0x1.fc07f01fc07f0p-1, 0x1.f81f81f81f820p-1, | ||
0x1.f44659e4a4271p-1, 0x1.f07c1f07c1f08p-1, 0x1.ecc07b301ecc0p-1, | ||
0x1.e9131abf0b767p-1, 0x1.e573ac901e574p-1, 0x1.e1e1e1e1e1e1ep-1, | ||
0x1.de5d6e3f8868ap-1, 0x1.dae6076b981dbp-1, 0x1.d77b654b82c34p-1, | ||
0x1.d41d41d41d41dp-1, 0x1.d0cb58f6ec074p-1, 0x1.cd85689039b0bp-1, | ||
0x1.ca4b3055ee191p-1, 0x1.c71c71c71c71cp-1, 0x1.c3f8f01c3f8f0p-1, | ||
0x1.c0e070381c0e0p-1, 0x1.bdd2b899406f7p-1, 0x1.bacf914c1bad0p-1, | ||
0x1.b7d6c3dda338bp-1, 0x1.b4e81b4e81b4fp-1, 0x1.b2036406c80d9p-1, | ||
0x1.af286bca1af28p-1, 0x1.ac5701ac5701bp-1, 0x1.a98ef606a63bep-1, | ||
0x1.a6d01a6d01a6dp-1, 0x1.a41a41a41a41ap-1, 0x1.a16d3f97a4b02p-1, | ||
0x1.9ec8e951033d9p-1, 0x1.9c2d14ee4a102p-1, 0x1.999999999999ap-1, | ||
0x1.970e4f80cb872p-1, 0x1.948b0fcd6e9e0p-1, 0x1.920fb49d0e229p-1, | ||
0x1.8f9c18f9c18fap-1, 0x1.8d3018d3018d3p-1, 0x1.8acb90f6bf3aap-1, | ||
0x1.886e5f0abb04ap-1, 0x1.8618618618618p-1, 0x1.83c977ab2beddp-1, | ||
0x1.8181818181818p-1, 0x1.7f405fd017f40p-1, 0x1.7d05f417d05f4p-1, | ||
0x1.7ad2208e0ecc3p-1, 0x1.78a4c8178a4c8p-1, 0x1.767dce434a9b1p-1, | ||
0x1.745d1745d1746p-1, 0x1.724287f46debcp-1, 0x1.702e05c0b8170p-1, | ||
0x1.6e1f76b4337c7p-1, 0x1.6c16c16c16c17p-1, 0x1.6a13cd1537290p-1, | ||
0x1.6816816816817p-1, 0x1.661ec6a5122f9p-1, 0x1.642c8590b2164p-1, | ||
0x1.623fa77016240p-1, 0x1.6058160581606p-1, 0x1.5e75bb8d015e7p-1, | ||
0x1.5c9882b931057p-1, 0x1.5ac056b015ac0p-1, 0x1.58ed2308158edp-1, | ||
0x1.571ed3c506b3ap-1, 0x1.5555555555555p-1, 0x1.5390948f40febp-1, | ||
0x1.51d07eae2f815p-1, 0x1.5015015015015p-1, 0x1.4e5e0a72f0539p-1, | ||
0x1.4cab88725af6ep-1, 0x1.4afd6a052bf5bp-1, 0x1.49539e3b2d067p-1, | ||
0x1.47ae147ae147bp-1, 0x1.460cbc7f5cf9ap-1, 0x1.446f86562d9fbp-1, | ||
0x1.42d6625d51f87p-1, 0x1.4141414141414p-1, 0x1.3fb013fb013fbp-1, | ||
0x1.3e22cbce4a902p-1, 0x1.3c995a47babe7p-1, 0x1.3b13b13b13b14p-1, | ||
0x1.3991c2c187f63p-1, 0x1.3813813813814p-1, 0x1.3698df3de0748p-1, | ||
0x1.3521cfb2b78c1p-1, 0x1.33ae45b57bcb2p-1, 0x1.323e34a2b10bfp-1, | ||
0x1.30d190130d190p-1, 0x1.2f684bda12f68p-1, 0x1.2e025c04b8097p-1, | ||
0x1.2c9fb4d812ca0p-1, 0x1.2b404ad012b40p-1, 0x1.29e4129e4129ep-1, | ||
0x1.288b01288b013p-1, 0x1.27350b8812735p-1, 0x1.25e22708092f1p-1, | ||
0x1.2492492492492p-1, 0x1.23456789abcdfp-1, 0x1.21fb78121fb78p-1, | ||
0x1.20b470c67c0d9p-1, 0x1.1f7047dc11f70p-1, 0x1.1e2ef3b3fb874p-1, | ||
0x1.1cf06ada2811dp-1, 0x1.1bb4a4046ed29p-1, 0x1.1a7b9611a7b96p-1, | ||
0x1.19453808ca29cp-1, 0x1.1811811811812p-1, 0x1.16e0689427379p-1, | ||
0x1.15b1e5f75270dp-1, 0x1.1485f0e0acd3bp-1, 0x1.135c81135c811p-1, | ||
0x1.12358e75d3033p-1, 0x1.1111111111111p-1, 0x1.0fef010fef011p-1, | ||
0x1.0ecf56be69c90p-1, 0x1.0db20a88f4696p-1, 0x1.0c9714fbcda3bp-1, | ||
0x1.0b7e6ec259dc8p-1, 0x1.0a6810a6810a7p-1, 0x1.0953f39010954p-1, | ||
0x1.0842108421084p-1, 0x1.073260a47f7c6p-1, 0x1.0624dd2f1a9fcp-1, | ||
0x1.05197f7d73404p-1, 0x1.0410410410410p-1, 0x1.03091b51f5e1ap-1, | ||
0x1.0204081020408p-1, 0x1.0101010101010p-1}; | ||
|
||
} // namespace __llvm_libc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
//===-- Common constants for math functions ---------------------*- C++ -*-===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#ifndef LLVM_LIBC_SRC_MATH_GENERIC_COMMON_CONSTANTS_H | ||
#define LLVM_LIBC_SRC_MATH_GENERIC_COMMON_CONSTANTS_H | ||
|
||
namespace __llvm_libc { | ||
|
||
// Lookup table for (1/f) where f = 1 + n*2^(-7), n = 0..127. | ||
extern const double ONE_OVER_F[128]; | ||
|
||
} // namespace __llvm_libc | ||
|
||
#endif // LLVM_LIBC_SRC_MATH_GENERIC_COMMON_CONSTANTS_H |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,154 @@ | ||
//===-- Single-precision log2(x) function ---------------------------------===// | ||
// | ||
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. | ||
// See https://llvm.org/LICENSE.txt for license information. | ||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
|
||
#include "src/math/log2f.h" | ||
#include "common_constants.h" // Lookup table for (1/f) | ||
#include "src/__support/FPUtil/BasicOperations.h" | ||
#include "src/__support/FPUtil/FEnvUtils.h" | ||
#include "src/__support/FPUtil/FMA.h" | ||
#include "src/__support/FPUtil/FPBits.h" | ||
#include "src/__support/FPUtil/PolyEval.h" | ||
#include "src/__support/common.h" | ||
|
||
// This is a correctly-rounded algorithm for log2(x) in single precision with | ||
// round-to-nearest, tie-to-even mode from the RLIBM project at: | ||
// https://people.cs.rutgers.edu/~sn349/rlibm | ||
|
||
// Step 1 - Range reduction: | ||
// For x = 2^m * 1.mant, log2(x) = m + log2(1.m) | ||
// If x is denormal, we normalize it by multiplying x by 2^23 and subtracting | ||
// m by 23. | ||
|
||
// Step 2 - Another range reduction: | ||
// To compute log(1.mant), let f be the highest 8 bits including the hidden | ||
// bit, and d be the difference (1.mant - f), i.e. the remaining 16 bits of the | ||
// mantissa. Then we have the following approximation formula: | ||
// log2(1.mant) = log2(f) + log2(1.mant / f) | ||
// = log2(f) + log2(1 + d/f) | ||
// ~ log2(f) + P(d/f) | ||
// since d/f is sufficiently small. | ||
// log2(f) and 1/f are then stored in two 2^7 = 128 entries look-up tables. | ||
|
||
// Step 3 - Polynomial approximation: | ||
// To compute P(d/f), we use a single degree-5 polynomial in double precision | ||
// which provides correct rounding for all but few exception values. | ||
// For more detail about how this polynomial is obtained, please refer to the | ||
// papers: | ||
// Lim, J. and Nagarakatte, S., "One Polynomial Approximation to Produce | ||
// Correctly Rounded Results of an Elementary Function for Multiple | ||
// Representations and Rounding Modes", Proceedings of the 49th ACM SIGPLAN | ||
// Symposium on Principles of Programming Languages (POPL-2022), Philadelphia, | ||
// USA, Jan. 16-22, 2022. | ||
// https://people.cs.rutgers.edu/~sn349/papers/rlibmall-popl-2022.pdf | ||
// Aanjaneya, M., Lim, J., and Nagarakatte, S., "RLibm-Prog: Progressive | ||
// Polynomial Approximations for Fast Correctly Rounded Math Libraries", | ||
// Dept. of Comp. Sci., Rutgets U., Technical Report DCS-TR-758, Nov. 2021. | ||
// https://arxiv.org/pdf/2111.12852.pdf. | ||
|
||
namespace __llvm_libc { | ||
|
||
// Lookup table for log2(f) = log2(1 + n*2^(-7)) where n = 0..127. | ||
static constexpr double LOG2_F[128] = { | ||
0x0.0000000000000p+0, 0x1.6fe50b6ef0851p-7, 0x1.6e79685c2d22ap-6, | ||
0x1.11cd1d5133413p-5, 0x1.6bad3758efd87p-5, 0x1.c4dfab90aab5fp-5, | ||
0x1.0eb389fa29f9bp-4, 0x1.3aa2fdd27f1c3p-4, 0x1.663f6fac91316p-4, | ||
0x1.918a16e46335bp-4, 0x1.bc84240adabbap-4, 0x1.e72ec117fa5b2p-4, | ||
0x1.08c588cda79e4p-3, 0x1.1dcd197552b7bp-3, 0x1.32ae9e278ae1ap-3, | ||
0x1.476a9f983f74dp-3, 0x1.5c01a39fbd688p-3, 0x1.70742d4ef027fp-3, | ||
0x1.84c2bd02f03b3p-3, 0x1.98edd077e70dfp-3, 0x1.acf5e2db4ec94p-3, | ||
0x1.c0db6cdd94deep-3, 0x1.d49ee4c325970p-3, 0x1.e840be74e6a4dp-3, | ||
0x1.fbc16b902680ap-3, 0x1.0790adbb03009p-2, 0x1.11307dad30b76p-2, | ||
0x1.1ac05b291f070p-2, 0x1.24407ab0e073ap-2, 0x1.2db10fc4d9aafp-2, | ||
0x1.37124cea4cdedp-2, 0x1.406463b1b0449p-2, 0x1.49a784bcd1b8bp-2, | ||
0x1.52dbdfc4c96b3p-2, 0x1.5c01a39fbd688p-2, 0x1.6518fe4677ba7p-2, | ||
0x1.6e221cd9d0cdep-2, 0x1.771d2ba7efb3cp-2, 0x1.800a563161c54p-2, | ||
0x1.88e9c72e0b226p-2, 0x1.91bba891f1709p-2, 0x1.9a802391e232fp-2, | ||
0x1.a33760a7f6051p-2, 0x1.abe18797f1f49p-2, 0x1.b47ebf73882a1p-2, | ||
0x1.bd0f2e9e79031p-2, 0x1.c592fad295b56p-2, 0x1.ce0a4923a587dp-2, | ||
0x1.d6753e032ea0fp-2, 0x1.ded3fd442364cp-2, 0x1.e726aa1e754d2p-2, | ||
0x1.ef6d67328e220p-2, 0x1.f7a8568cb06cfp-2, 0x1.ffd799a83ff9bp-2, | ||
0x1.03fda8b97997fp-1, 0x1.0809cf27f703dp-1, 0x1.0c10500d63aa6p-1, | ||
0x1.10113b153c8eap-1, 0x1.140c9faa1e544p-1, 0x1.18028cf72976ap-1, | ||
0x1.1bf311e95d00ep-1, 0x1.1fde3d30e8126p-1, 0x1.23c41d42727c8p-1, | ||
0x1.27a4c0585cbf8p-1, 0x1.2b803473f7ad1p-1, 0x1.2f56875eb3f26p-1, | ||
0x1.3327c6ab49ca7p-1, 0x1.36f3ffb6d9162p-1, 0x1.3abb3faa02167p-1, | ||
0x1.3e7d9379f7016p-1, 0x1.423b07e986aa9p-1, 0x1.45f3a98a20739p-1, | ||
0x1.49a784bcd1b8bp-1, 0x1.4d56a5b33cec4p-1, 0x1.510118708a8f9p-1, | ||
0x1.54a6e8ca5438ep-1, 0x1.5848226989d34p-1, 0x1.5be4d0cb51435p-1, | ||
0x1.5f7cff41e09afp-1, 0x1.6310b8f553048p-1, 0x1.66a008e4788ccp-1, | ||
0x1.6a2af9e5a0f0ap-1, 0x1.6db196a76194ap-1, 0x1.7133e9b156c7cp-1, | ||
0x1.74b1fd64e0754p-1, 0x1.782bdbfdda657p-1, 0x1.7ba18f93502e4p-1, | ||
0x1.7f1322182cf16p-1, 0x1.82809d5be7073p-1, 0x1.85ea0b0b27b26p-1, | ||
0x1.894f74b06ef8bp-1, 0x1.8cb0e3b4b3bbep-1, 0x1.900e6160002cdp-1, | ||
0x1.9367f6da0ab2fp-1, 0x1.96bdad2acb5f6p-1, 0x1.9a0f8d3b0e050p-1, | ||
0x1.9d5d9fd5010b3p-1, 0x1.a0a7eda4c112dp-1, 0x1.a3ee7f38e181fp-1, | ||
0x1.a7315d02f20c8p-1, 0x1.aa708f58014d3p-1, 0x1.adac1e711c833p-1, | ||
0x1.b0e4126bcc86cp-1, 0x1.b418734a9008cp-1, 0x1.b74948f5532dap-1, | ||
0x1.ba769b39e4964p-1, 0x1.bda071cc67e6ep-1, 0x1.c0c6d447c5dd3p-1, | ||
0x1.c3e9ca2e1a055p-1, 0x1.c7095ae91e1c7p-1, 0x1.ca258dca93316p-1, | ||
0x1.cd3e6a0ca8907p-1, 0x1.d053f6d260896p-1, 0x1.d3663b27f31d5p-1, | ||
0x1.d6753e032ea0fp-1, 0x1.d9810643d6615p-1, 0x1.dc899ab3ff56cp-1, | ||
0x1.df8f02086af2cp-1, 0x1.e29142e0e0140p-1, 0x1.e59063c8822cep-1, | ||
0x1.e88c6b3626a73p-1, 0x1.eb855f8ca88fbp-1, 0x1.ee7b471b3a950p-1, | ||
0x1.f16e281db7630p-1, 0x1.f45e08bcf0655p-1, 0x1.f74aef0efafaep-1, | ||
0x1.fa34e1177c233p-1, 0x1.fd1be4c7f2af9p-1}; | ||
|
||
INLINE_FMA | ||
LLVM_LIBC_FUNCTION(float, log2f, (float x)) { | ||
using FPBits = typename fputil::FPBits<float>; | ||
FPBits xbits(x); | ||
int m = 0; | ||
|
||
// Hard to round value(s). | ||
if (FPBits(x).uintval() == 0x3f81d0b5U) { | ||
int rounding_mode = fputil::get_round(); | ||
if (rounding_mode == FE_DOWNWARD || rounding_mode == FE_TOWARDZERO) { | ||
return 0x1.4cdc4cp-6f; | ||
} | ||
} | ||
|
||
// Exceptional inputs. | ||
if (xbits.uintval() < FPBits::MIN_NORMAL || | ||
xbits.uintval() > FPBits::MAX_NORMAL) { | ||
if (xbits.is_zero()) { | ||
return static_cast<float>(FPBits::neg_inf()); | ||
} | ||
if (xbits.get_sign() && !xbits.is_nan()) { | ||
return FPBits::build_nan(1 << (fputil::MantissaWidth<float>::VALUE - 1)); | ||
} | ||
if (xbits.is_inf_or_nan()) { | ||
return x; | ||
} | ||
// Normalize denormal inputs. | ||
xbits.val *= 0x1.0p23f; | ||
m = -23; | ||
} | ||
|
||
m += xbits.get_exponent(); | ||
// Set bits to 1.m | ||
xbits.set_unbiased_exponent(0x7F); | ||
// Get the 8 highest bits, use 7 bits (excluding the implicit hidden bit) for | ||
// lookup tables. | ||
int f_index = xbits.get_mantissa() >> 16; | ||
|
||
FPBits f(xbits.val); | ||
// Clear the lowest 16 bits. | ||
f.bits &= ~0x0000'FFFF; | ||
|
||
double d = static_cast<float>(xbits) - static_cast<float>(f); | ||
d *= ONE_OVER_F[f_index]; | ||
|
||
double extra_factor = static_cast<double>(m) + LOG2_F[f_index]; | ||
double r = __llvm_libc::fputil::polyeval( | ||
d, extra_factor, 0x1.71547652bd4fp+0, -0x1.7154769b978c7p-1, | ||
0x1.ec71a99e349c8p-2, -0x1.720d90e6aac6cp-2, 0x1.5132da3583dap-2); | ||
|
||
return static_cast<float>(r); | ||
} | ||
|
||
} // namespace __llvm_libc |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.