117 changes: 59 additions & 58 deletions libc/src/__support/UInt.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ template <size_t Bits> struct UInt {

static_assert(Bits > 0 && Bits % 64 == 0,
"Number of bits in UInt should be a multiple of 64.");
static constexpr size_t WordCount = Bits / 64;
uint64_t val[WordCount];
static constexpr size_t WORDCOUNT = Bits / 64;
uint64_t val[WORDCOUNT];

static constexpr uint64_t MASK32 = 0xFFFFFFFFu;

Expand All @@ -38,45 +38,45 @@ template <size_t Bits> struct UInt {
constexpr UInt() {}

constexpr UInt(const UInt<Bits> &other) {
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
val[i] = other.val[i];
}

template <size_t OtherBits> constexpr UInt(const UInt<OtherBits> &other) {
if (OtherBits >= Bits) {
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
val[i] = other[i];
} else {
size_t i = 0;
for (; i < OtherBits / 64; ++i)
val[i] = other[i];
for (; i < WordCount; ++i)
for (; i < WORDCOUNT; ++i)
val[i] = 0;
}
}

// Construct a UInt from a C array.
template <size_t N, enable_if_t<N <= WordCount, int> = 0>
template <size_t N, enable_if_t<N <= WORDCOUNT, int> = 0>
constexpr UInt(const uint64_t (&nums)[N]) {
size_t min_wordcount = N < WordCount ? N : WordCount;
size_t min_wordcount = N < WORDCOUNT ? N : WORDCOUNT;
size_t i = 0;
for (; i < min_wordcount; ++i)
val[i] = nums[i];

// If nums doesn't completely fill val, then fill the rest with zeroes.
for (; i < WordCount; ++i)
for (; i < WORDCOUNT; ++i)
val[i] = 0;
}

// Initialize the first word to |v| and the rest to 0.
constexpr UInt(uint64_t v) {
val[0] = v;
for (size_t i = 1; i < WordCount; ++i) {
for (size_t i = 1; i < WORDCOUNT; ++i) {
val[i] = 0;
}
}
constexpr explicit UInt(const cpp::array<uint64_t, WordCount> &words) {
for (size_t i = 0; i < WordCount; ++i)
constexpr explicit UInt(const cpp::array<uint64_t, WORDCOUNT> &words) {
for (size_t i = 0; i < WORDCOUNT; ++i)
val[i] = words[i];
}

Expand All @@ -91,13 +91,13 @@ template <size_t Bits> struct UInt {
}

UInt<Bits> &operator=(const UInt<Bits> &other) {
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
val[i] = other.val[i];
return *this;
}

constexpr bool is_zero() const {
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
if (val[i] != 0)
return false;
}
Expand All @@ -108,7 +108,7 @@ template <size_t Bits> struct UInt {
// Returns the carry value produced by the addition operation.
constexpr uint64_t add(const UInt<Bits> &x) {
SumCarry<uint64_t> s{0, 0};
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
s = add_with_carry(val[i], x.val[i], s.carry);
val[i] = s.sum;
}
Expand All @@ -118,7 +118,7 @@ template <size_t Bits> struct UInt {
constexpr UInt<Bits> operator+(const UInt<Bits> &other) const {
UInt<Bits> result;
SumCarry<uint64_t> s{0, 0};
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
s = add_with_carry(val[i], other.val[i], s.carry);
result.val[i] = s.sum;
}
Expand All @@ -134,7 +134,7 @@ template <size_t Bits> struct UInt {
// Returns the carry value produced by the subtraction operation.
constexpr uint64_t sub(const UInt<Bits> &x) {
DiffBorrow<uint64_t> d{0, 0};
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
d = sub_with_borrow(val[i], x.val[i], d.borrow);
val[i] = d.diff;
}
Expand All @@ -144,7 +144,7 @@ template <size_t Bits> struct UInt {
constexpr UInt<Bits> operator-(const UInt<Bits> &other) const {
UInt<Bits> result;
DiffBorrow<uint64_t> d{0, 0};
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
d = sub_with_borrow(val[i], other.val[i], d.borrow);
result.val[i] = d.diff;
}
Expand All @@ -166,7 +166,7 @@ template <size_t Bits> struct UInt {
constexpr uint64_t mul(uint64_t x) {
UInt<128> partial_sum(0);
uint64_t carry = 0;
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
NumberPair<uint64_t> prod = full_mul(val[i], x);
UInt<128> tmp({prod.lo, prod.hi});
carry += partial_sum.add(tmp);
Expand All @@ -179,13 +179,13 @@ template <size_t Bits> struct UInt {
}

constexpr UInt<Bits> operator*(const UInt<Bits> &other) const {
if constexpr (WordCount == 1) {
if constexpr (WORDCOUNT == 1) {
return {val[0] * other.val[0]};
} else {
UInt<Bits> result(0);
UInt<128> partial_sum(0);
uint64_t carry = 0;
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
for (size_t j = 0; j <= i; j++) {
NumberPair<uint64_t> prod = full_mul(val[j], other.val[i - j]);
UInt<128> tmp({prod.lo, prod.hi});
Expand All @@ -206,10 +206,11 @@ template <size_t Bits> struct UInt {
UInt<Bits + OtherBits> result(0);
UInt<128> partial_sum(0);
uint64_t carry = 0;
constexpr size_t OtherWordCount = UInt<OtherBits>::WordCount;
for (size_t i = 0; i <= WordCount + OtherWordCount - 2; ++i) {
const size_t lower_idx = i < OtherWordCount ? 0 : i - OtherWordCount + 1;
const size_t upper_idx = i < WordCount ? i : WordCount - 1;
constexpr size_t OTHER_WORDCOUNT = UInt<OtherBits>::WORDCOUNT;
for (size_t i = 0; i <= WORDCOUNT + OTHER_WORDCOUNT - 2; ++i) {
const size_t lower_idx =
i < OTHER_WORDCOUNT ? 0 : i - OTHER_WORDCOUNT + 1;
const size_t upper_idx = i < WORDCOUNT ? i : WORDCOUNT - 1;
for (size_t j = lower_idx; j <= upper_idx; ++j) {
NumberPair<uint64_t> prod = full_mul(val[j], other.val[i - j]);
UInt<128> tmp({prod.lo, prod.hi});
Expand All @@ -220,15 +221,15 @@ template <size_t Bits> struct UInt {
partial_sum.val[1] = carry;
carry = 0;
}
result.val[WordCount + OtherWordCount - 1] = partial_sum.val[0];
result.val[WORDCOUNT + OTHER_WORDCOUNT - 1] = partial_sum.val[0];
return result;
}

// Fast hi part of the full product. The normal product `operator*` returns
// `Bits` least significant bits of the full product, while this function will
// approximate `Bits` most significant bits of the full product with errors
// bounded by:
// 0 <= (a.full_mul(b) >> Bits) - a.quick_mul_hi(b)) <= WordCount - 1.
// 0 <= (a.full_mul(b) >> Bits) - a.quick_mul_hi(b)) <= WORDCOUNT - 1.
//
// An example usage of this is to quickly (but less accurately) compute the
// product of (normalized) mantissas of floating point numbers:
Expand All @@ -240,7 +241,7 @@ template <size_t Bits> struct UInt {
//
// Performance summary:
// Number of 64-bit x 64-bit -> 128-bit multiplications performed.
// Bits WordCount ful_mul quick_mul_hi Error bound
// Bits WORDCOUNT ful_mul quick_mul_hi Error bound
// 128 2 4 3 1
// 196 3 9 6 2
// 256 4 16 10 3
Expand All @@ -249,26 +250,26 @@ template <size_t Bits> struct UInt {
UInt<Bits> result(0);
UInt<128> partial_sum(0);
uint64_t carry = 0;
// First round of accumulation for those at WordCount - 1 in the full
// First round of accumulation for those at WORDCOUNT - 1 in the full
// product.
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
NumberPair<uint64_t> prod =
full_mul(val[i], other.val[WordCount - 1 - i]);
full_mul(val[i], other.val[WORDCOUNT - 1 - i]);
UInt<128> tmp({prod.lo, prod.hi});
carry += partial_sum.add(tmp);
}
for (size_t i = WordCount; i < 2 * WordCount - 1; ++i) {
for (size_t i = WORDCOUNT; i < 2 * WORDCOUNT - 1; ++i) {
partial_sum.val[0] = partial_sum.val[1];
partial_sum.val[1] = carry;
carry = 0;
for (size_t j = i - WordCount + 1; j < WordCount; ++j) {
for (size_t j = i - WORDCOUNT + 1; j < WORDCOUNT; ++j) {
NumberPair<uint64_t> prod = full_mul(val[j], other.val[i - j]);
UInt<128> tmp({prod.lo, prod.hi});
carry += partial_sum.add(tmp);
}
result.val[i - WordCount] = partial_sum.val[0];
result.val[i - WORDCOUNT] = partial_sum.val[0];
}
result.val[WordCount - 1] = partial_sum.val[1];
result.val[WORDCOUNT - 1] = partial_sum.val[1];
return result;
}

Expand Down Expand Up @@ -338,7 +339,7 @@ template <size_t Bits> struct UInt {

constexpr uint64_t clz() {
uint64_t leading_zeroes = 0;
for (size_t i = WordCount; i > 0; --i) {
for (size_t i = WORDCOUNT; i > 0; --i) {
if (val[i - 1] == 0) {
leading_zeroes += sizeof(uint64_t) * 8;
} else {
Expand Down Expand Up @@ -370,17 +371,17 @@ template <size_t Bits> struct UInt {

const size_t drop = s / 64; // Number of words to drop
const size_t shift = s % 64; // Bits to shift in the remaining words.
size_t i = WordCount;
size_t i = WORDCOUNT;

if (drop < WordCount) {
i = WordCount - 1;
if (drop < WORDCOUNT) {
i = WORDCOUNT - 1;
if (shift > 0) {
for (size_t j = WordCount - 1 - drop; j > 0; --i, --j) {
for (size_t j = WORDCOUNT - 1 - drop; j > 0; --i, --j) {
val[i] = (val[j] << shift) | (val[j - 1] >> (64 - shift));
}
val[i] = val[0] << shift;
} else {
for (size_t j = WordCount - 1 - drop; j > 0; --i, --j) {
for (size_t j = WORDCOUNT - 1 - drop; j > 0; --i, --j) {
val[i] = val[j];
}
val[i] = val[0];
Expand Down Expand Up @@ -427,21 +428,21 @@ template <size_t Bits> struct UInt {

size_t i = 0;

if (drop < WordCount) {
if (drop < WORDCOUNT) {
if (shift > 0) {
for (size_t j = drop; j < WordCount - 1; ++i, ++j) {
for (size_t j = drop; j < WORDCOUNT - 1; ++i, ++j) {
val[i] = (val[j] >> shift) | (val[j + 1] << (64 - shift));
}
val[i] = val[WordCount - 1] >> shift;
val[i] = val[WORDCOUNT - 1] >> shift;
++i;
} else {
for (size_t j = drop; j < WordCount; ++i, ++j) {
for (size_t j = drop; j < WORDCOUNT; ++i, ++j) {
val[i] = val[j];
}
}
}

for (; i < WordCount; ++i) {
for (; i < WORDCOUNT; ++i) {
val[i] = 0;
}
}
Expand All @@ -459,68 +460,68 @@ template <size_t Bits> struct UInt {

constexpr UInt<Bits> operator&(const UInt<Bits> &other) const {
UInt<Bits> result;
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
result.val[i] = val[i] & other.val[i];
return result;
}

constexpr UInt<Bits> &operator&=(const UInt<Bits> &other) {
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
val[i] &= other.val[i];
return *this;
}

constexpr UInt<Bits> operator|(const UInt<Bits> &other) const {
UInt<Bits> result;
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
result.val[i] = val[i] | other.val[i];
return result;
}

constexpr UInt<Bits> &operator|=(const UInt<Bits> &other) {
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
val[i] |= other.val[i];
return *this;
}

constexpr UInt<Bits> operator^(const UInt<Bits> &other) const {
UInt<Bits> result;
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
result.val[i] = val[i] ^ other.val[i];
return result;
}

constexpr UInt<Bits> &operator^=(const UInt<Bits> &other) {
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
val[i] ^= other.val[i];
return *this;
}

constexpr UInt<Bits> operator~() const {
UInt<Bits> result;
for (size_t i = 0; i < WordCount; ++i)
for (size_t i = 0; i < WORDCOUNT; ++i)
result.val[i] = ~val[i];
return result;
}

constexpr bool operator==(const UInt<Bits> &other) const {
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
if (val[i] != other.val[i])
return false;
}
return true;
}

constexpr bool operator!=(const UInt<Bits> &other) const {
for (size_t i = 0; i < WordCount; ++i) {
for (size_t i = 0; i < WORDCOUNT; ++i) {
if (val[i] != other.val[i])
return true;
}
return false;
}

constexpr bool operator>(const UInt<Bits> &other) const {
for (size_t i = WordCount; i > 0; --i) {
for (size_t i = WORDCOUNT; i > 0; --i) {
uint64_t word = val[i - 1];
uint64_t other_word = other.val[i - 1];
if (word > other_word)
Expand All @@ -533,7 +534,7 @@ template <size_t Bits> struct UInt {
}

constexpr bool operator>=(const UInt<Bits> &other) const {
for (size_t i = WordCount; i > 0; --i) {
for (size_t i = WORDCOUNT; i > 0; --i) {
uint64_t word = val[i - 1];
uint64_t other_word = other.val[i - 1];
if (word > other_word)
Expand All @@ -546,7 +547,7 @@ template <size_t Bits> struct UInt {
}

constexpr bool operator<(const UInt<Bits> &other) const {
for (size_t i = WordCount; i > 0; --i) {
for (size_t i = WORDCOUNT; i > 0; --i) {
uint64_t word = val[i - 1];
uint64_t other_word = other.val[i - 1];
if (word > other_word)
Expand All @@ -559,7 +560,7 @@ template <size_t Bits> struct UInt {
}

constexpr bool operator<=(const UInt<Bits> &other) const {
for (size_t i = WordCount; i > 0; --i) {
for (size_t i = WORDCOUNT; i > 0; --i) {
uint64_t word = val[i - 1];
uint64_t other_word = other.val[i - 1];
if (word > other_word)
Expand Down
12 changes: 6 additions & 6 deletions libc/src/math/generic/exp2f.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@

namespace __llvm_libc {

constexpr uint32_t exval1 = 0x3b42'9d37U;
constexpr uint32_t exval2 = 0xbcf3'a937U;
constexpr uint32_t exval_mask = exval1 & exval2;
constexpr uint32_t EXVAL1 = 0x3b42'9d37U;
constexpr uint32_t EXVAL2 = 0xbcf3'a937U;
constexpr uint32_t EXVAL_MASK = EXVAL1 & EXVAL2;

LLVM_LIBC_FUNCTION(float, exp2f, (float x)) {
using FPBits = typename fputil::FPBits<float>;
Expand Down Expand Up @@ -73,11 +73,11 @@ LLVM_LIBC_FUNCTION(float, exp2f, (float x)) {
}

// Check exceptional values.
if (LIBC_UNLIKELY((x_u & exval_mask) == exval_mask)) {
if (LIBC_UNLIKELY(x_u == exval1)) { // x = 0x1.853a6ep-9f
if (LIBC_UNLIKELY((x_u & EXVAL_MASK) == EXVAL_MASK)) {
if (LIBC_UNLIKELY(x_u == EXVAL1)) { // x = 0x1.853a6ep-9f
if (fputil::get_round() == FE_TONEAREST)
return 0x1.00870ap+0f;
} else if (LIBC_UNLIKELY(x_u == exval2)) { // x = -0x1.e7526ep-6f
} else if (LIBC_UNLIKELY(x_u == EXVAL2)) { // x = -0x1.e7526ep-6f
if (fputil::get_round() == FE_TONEAREST)
return 0x1.f58d62p-1f;
}
Expand Down
1 change: 0 additions & 1 deletion libc/src/math/generic/log10.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -917,7 +917,6 @@ double log10_accurate(int e_x, int index, double m_x) {

// Further range reductions.
double scale = 0x1.0p+7;
const fputil::DyadicFloat<128> NEG_ONE(-1.0);
for (size_t i = 0; i < R_STEPS; ++i) {
scale *= 0x1.0p+4;
int id = static_cast<int>(fputil::multiply_add(mx.hi, scale, 0x1.0p+4));
Expand Down
3 changes: 0 additions & 3 deletions libc/utils/HdrGen/PrototypeTestGen/.clang-tidy

This file was deleted.