Expand Up
@@ -27,8 +27,8 @@ template <size_t Bits> struct UInt {
static_assert (Bits > 0 && Bits % 64 == 0 ,
" Number of bits in UInt should be a multiple of 64." );
static constexpr size_t WordCount = Bits / 64 ;
uint64_t val[WordCount ];
static constexpr size_t WORDCOUNT = Bits / 64 ;
uint64_t val[WORDCOUNT ];
static constexpr uint64_t MASK32 = 0xFFFFFFFFu ;
Expand All
@@ -38,45 +38,45 @@ template <size_t Bits> struct UInt {
constexpr UInt () {}
constexpr UInt (const UInt<Bits> &other) {
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
val[i] = other.val [i];
}
template <size_t OtherBits> constexpr UInt (const UInt<OtherBits> &other) {
if (OtherBits >= Bits) {
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
val[i] = other[i];
} else {
size_t i = 0 ;
for (; i < OtherBits / 64 ; ++i)
val[i] = other[i];
for (; i < WordCount ; ++i)
for (; i < WORDCOUNT ; ++i)
val[i] = 0 ;
}
}
// Construct a UInt from a C array.
template <size_t N, enable_if_t <N <= WordCount , int > = 0 >
template <size_t N, enable_if_t <N <= WORDCOUNT , int > = 0 >
constexpr UInt (const uint64_t (&nums)[N]) {
size_t min_wordcount = N < WordCount ? N : WordCount ;
size_t min_wordcount = N < WORDCOUNT ? N : WORDCOUNT ;
size_t i = 0 ;
for (; i < min_wordcount; ++i)
val[i] = nums[i];
// If nums doesn't completely fill val, then fill the rest with zeroes.
for (; i < WordCount ; ++i)
for (; i < WORDCOUNT ; ++i)
val[i] = 0 ;
}
// Initialize the first word to |v| and the rest to 0.
constexpr UInt (uint64_t v) {
val[0 ] = v;
for (size_t i = 1 ; i < WordCount ; ++i) {
for (size_t i = 1 ; i < WORDCOUNT ; ++i) {
val[i] = 0 ;
}
}
constexpr explicit UInt (const cpp::array<uint64_t , WordCount > &words) {
for (size_t i = 0 ; i < WordCount ; ++i)
constexpr explicit UInt (const cpp::array<uint64_t , WORDCOUNT > &words) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
val[i] = words[i];
}
Expand All
@@ -91,13 +91,13 @@ template <size_t Bits> struct UInt {
}
UInt<Bits> &operator =(const UInt<Bits> &other) {
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
val[i] = other.val [i];
return *this ;
}
constexpr bool is_zero () const {
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
if (val[i] != 0 )
return false ;
}
Expand All
@@ -108,7 +108,7 @@ template <size_t Bits> struct UInt {
// Returns the carry value produced by the addition operation.
constexpr uint64_t add (const UInt<Bits> &x) {
SumCarry<uint64_t > s{0 , 0 };
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
s = add_with_carry (val[i], x.val [i], s.carry );
val[i] = s.sum ;
}
Expand All
@@ -118,7 +118,7 @@ template <size_t Bits> struct UInt {
constexpr UInt<Bits> operator +(const UInt<Bits> &other) const {
UInt<Bits> result;
SumCarry<uint64_t > s{0 , 0 };
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
s = add_with_carry (val[i], other.val [i], s.carry );
result.val [i] = s.sum ;
}
Expand All
@@ -134,7 +134,7 @@ template <size_t Bits> struct UInt {
// Returns the carry value produced by the subtraction operation.
constexpr uint64_t sub (const UInt<Bits> &x) {
DiffBorrow<uint64_t > d{0 , 0 };
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
d = sub_with_borrow (val[i], x.val [i], d.borrow );
val[i] = d.diff ;
}
Expand All
@@ -144,7 +144,7 @@ template <size_t Bits> struct UInt {
constexpr UInt<Bits> operator -(const UInt<Bits> &other) const {
UInt<Bits> result;
DiffBorrow<uint64_t > d{0 , 0 };
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
d = sub_with_borrow (val[i], other.val [i], d.borrow );
result.val [i] = d.diff ;
}
Expand All
@@ -166,7 +166,7 @@ template <size_t Bits> struct UInt {
constexpr uint64_t mul (uint64_t x) {
UInt<128 > partial_sum (0 );
uint64_t carry = 0 ;
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
NumberPair<uint64_t > prod = full_mul (val[i], x);
UInt<128 > tmp ({prod.lo , prod.hi });
carry += partial_sum.add (tmp);
Expand All
@@ -179,13 +179,13 @@ template <size_t Bits> struct UInt {
}
constexpr UInt<Bits> operator *(const UInt<Bits> &other) const {
if constexpr (WordCount == 1 ) {
if constexpr (WORDCOUNT == 1 ) {
return {val[0 ] * other.val [0 ]};
} else {
UInt<Bits> result (0 );
UInt<128 > partial_sum (0 );
uint64_t carry = 0 ;
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
for (size_t j = 0 ; j <= i; j++) {
NumberPair<uint64_t > prod = full_mul (val[j], other.val [i - j]);
UInt<128 > tmp ({prod.lo , prod.hi });
Expand All
@@ -206,10 +206,11 @@ template <size_t Bits> struct UInt {
UInt<Bits + OtherBits> result (0 );
UInt<128 > partial_sum (0 );
uint64_t carry = 0 ;
constexpr size_t OtherWordCount = UInt<OtherBits>::WordCount;
for (size_t i = 0 ; i <= WordCount + OtherWordCount - 2 ; ++i) {
const size_t lower_idx = i < OtherWordCount ? 0 : i - OtherWordCount + 1 ;
const size_t upper_idx = i < WordCount ? i : WordCount - 1 ;
constexpr size_t OTHER_WORDCOUNT = UInt<OtherBits>::WORDCOUNT;
for (size_t i = 0 ; i <= WORDCOUNT + OTHER_WORDCOUNT - 2 ; ++i) {
const size_t lower_idx =
i < OTHER_WORDCOUNT ? 0 : i - OTHER_WORDCOUNT + 1 ;
const size_t upper_idx = i < WORDCOUNT ? i : WORDCOUNT - 1 ;
for (size_t j = lower_idx; j <= upper_idx; ++j) {
NumberPair<uint64_t > prod = full_mul (val[j], other.val [i - j]);
UInt<128 > tmp ({prod.lo , prod.hi });
Expand All
@@ -220,15 +221,15 @@ template <size_t Bits> struct UInt {
partial_sum.val [1 ] = carry;
carry = 0 ;
}
result.val [WordCount + OtherWordCount - 1 ] = partial_sum.val [0 ];
result.val [WORDCOUNT + OTHER_WORDCOUNT - 1 ] = partial_sum.val [0 ];
return result;
}
// Fast hi part of the full product. The normal product `operator*` returns
// `Bits` least significant bits of the full product, while this function will
// approximate `Bits` most significant bits of the full product with errors
// bounded by:
// 0 <= (a.full_mul(b) >> Bits) - a.quick_mul_hi(b)) <= WordCount - 1.
// 0 <= (a.full_mul(b) >> Bits) - a.quick_mul_hi(b)) <= WORDCOUNT - 1.
//
// An example usage of this is to quickly (but less accurately) compute the
// product of (normalized) mantissas of floating point numbers:
Expand All
@@ -240,7 +241,7 @@ template <size_t Bits> struct UInt {
//
// Performance summary:
// Number of 64-bit x 64-bit -> 128-bit multiplications performed.
// Bits WordCount ful_mul quick_mul_hi Error bound
// Bits WORDCOUNT ful_mul quick_mul_hi Error bound
// 128 2 4 3 1
// 196 3 9 6 2
// 256 4 16 10 3
Expand All
@@ -249,26 +250,26 @@ template <size_t Bits> struct UInt {
UInt<Bits> result (0 );
UInt<128 > partial_sum (0 );
uint64_t carry = 0 ;
// First round of accumulation for those at WordCount - 1 in the full
// First round of accumulation for those at WORDCOUNT - 1 in the full
// product.
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
NumberPair<uint64_t > prod =
full_mul (val[i], other.val [WordCount - 1 - i]);
full_mul (val[i], other.val [WORDCOUNT - 1 - i]);
UInt<128 > tmp ({prod.lo , prod.hi });
carry += partial_sum.add (tmp);
}
for (size_t i = WordCount ; i < 2 * WordCount - 1 ; ++i) {
for (size_t i = WORDCOUNT ; i < 2 * WORDCOUNT - 1 ; ++i) {
partial_sum.val [0 ] = partial_sum.val [1 ];
partial_sum.val [1 ] = carry;
carry = 0 ;
for (size_t j = i - WordCount + 1 ; j < WordCount ; ++j) {
for (size_t j = i - WORDCOUNT + 1 ; j < WORDCOUNT ; ++j) {
NumberPair<uint64_t > prod = full_mul (val[j], other.val [i - j]);
UInt<128 > tmp ({prod.lo , prod.hi });
carry += partial_sum.add (tmp);
}
result.val [i - WordCount ] = partial_sum.val [0 ];
result.val [i - WORDCOUNT ] = partial_sum.val [0 ];
}
result.val [WordCount - 1 ] = partial_sum.val [1 ];
result.val [WORDCOUNT - 1 ] = partial_sum.val [1 ];
return result;
}
Expand Down
Expand Up
@@ -338,7 +339,7 @@ template <size_t Bits> struct UInt {
constexpr uint64_t clz () {
uint64_t leading_zeroes = 0 ;
for (size_t i = WordCount ; i > 0 ; --i) {
for (size_t i = WORDCOUNT ; i > 0 ; --i) {
if (val[i - 1 ] == 0 ) {
leading_zeroes += sizeof (uint64_t ) * 8 ;
} else {
Expand Down
Expand Up
@@ -370,17 +371,17 @@ template <size_t Bits> struct UInt {
const size_t drop = s / 64 ; // Number of words to drop
const size_t shift = s % 64 ; // Bits to shift in the remaining words.
size_t i = WordCount ;
size_t i = WORDCOUNT ;
if (drop < WordCount ) {
i = WordCount - 1 ;
if (drop < WORDCOUNT ) {
i = WORDCOUNT - 1 ;
if (shift > 0 ) {
for (size_t j = WordCount - 1 - drop; j > 0 ; --i, --j) {
for (size_t j = WORDCOUNT - 1 - drop; j > 0 ; --i, --j) {
val[i] = (val[j] << shift) | (val[j - 1 ] >> (64 - shift));
}
val[i] = val[0 ] << shift;
} else {
for (size_t j = WordCount - 1 - drop; j > 0 ; --i, --j) {
for (size_t j = WORDCOUNT - 1 - drop; j > 0 ; --i, --j) {
val[i] = val[j];
}
val[i] = val[0 ];
Expand Down
Expand Up
@@ -427,21 +428,21 @@ template <size_t Bits> struct UInt {
size_t i = 0 ;
if (drop < WordCount ) {
if (drop < WORDCOUNT ) {
if (shift > 0 ) {
for (size_t j = drop; j < WordCount - 1 ; ++i, ++j) {
for (size_t j = drop; j < WORDCOUNT - 1 ; ++i, ++j) {
val[i] = (val[j] >> shift) | (val[j + 1 ] << (64 - shift));
}
val[i] = val[WordCount - 1 ] >> shift;
val[i] = val[WORDCOUNT - 1 ] >> shift;
++i;
} else {
for (size_t j = drop; j < WordCount ; ++i, ++j) {
for (size_t j = drop; j < WORDCOUNT ; ++i, ++j) {
val[i] = val[j];
}
}
}
for (; i < WordCount ; ++i) {
for (; i < WORDCOUNT ; ++i) {
val[i] = 0 ;
}
}
Expand All
@@ -459,68 +460,68 @@ template <size_t Bits> struct UInt {
constexpr UInt<Bits> operator &(const UInt<Bits> &other) const {
UInt<Bits> result;
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
result.val [i] = val[i] & other.val [i];
return result;
}
constexpr UInt<Bits> &operator &=(const UInt<Bits> &other) {
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
val[i] &= other.val [i];
return *this ;
}
constexpr UInt<Bits> operator |(const UInt<Bits> &other) const {
UInt<Bits> result;
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
result.val [i] = val[i] | other.val [i];
return result;
}
constexpr UInt<Bits> &operator |=(const UInt<Bits> &other) {
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
val[i] |= other.val [i];
return *this ;
}
constexpr UInt<Bits> operator ^(const UInt<Bits> &other) const {
UInt<Bits> result;
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
result.val [i] = val[i] ^ other.val [i];
return result;
}
constexpr UInt<Bits> &operator ^=(const UInt<Bits> &other) {
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
val[i] ^= other.val [i];
return *this ;
}
constexpr UInt<Bits> operator ~() const {
UInt<Bits> result;
for (size_t i = 0 ; i < WordCount ; ++i)
for (size_t i = 0 ; i < WORDCOUNT ; ++i)
result.val [i] = ~val[i];
return result;
}
constexpr bool operator ==(const UInt<Bits> &other) const {
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
if (val[i] != other.val [i])
return false ;
}
return true ;
}
constexpr bool operator !=(const UInt<Bits> &other) const {
for (size_t i = 0 ; i < WordCount ; ++i) {
for (size_t i = 0 ; i < WORDCOUNT ; ++i) {
if (val[i] != other.val [i])
return true ;
}
return false ;
}
constexpr bool operator >(const UInt<Bits> &other) const {
for (size_t i = WordCount ; i > 0 ; --i) {
for (size_t i = WORDCOUNT ; i > 0 ; --i) {
uint64_t word = val[i - 1 ];
uint64_t other_word = other.val [i - 1 ];
if (word > other_word)
Expand All
@@ -533,7 +534,7 @@ template <size_t Bits> struct UInt {
}
constexpr bool operator >=(const UInt<Bits> &other) const {
for (size_t i = WordCount ; i > 0 ; --i) {
for (size_t i = WORDCOUNT ; i > 0 ; --i) {
uint64_t word = val[i - 1 ];
uint64_t other_word = other.val [i - 1 ];
if (word > other_word)
Expand All
@@ -546,7 +547,7 @@ template <size_t Bits> struct UInt {
}
constexpr bool operator <(const UInt<Bits> &other) const {
for (size_t i = WordCount ; i > 0 ; --i) {
for (size_t i = WORDCOUNT ; i > 0 ; --i) {
uint64_t word = val[i - 1 ];
uint64_t other_word = other.val [i - 1 ];
if (word > other_word)
Expand All
@@ -559,7 +560,7 @@ template <size_t Bits> struct UInt {
}
constexpr bool operator <=(const UInt<Bits> &other) const {
for (size_t i = WordCount ; i > 0 ; --i) {
for (size_t i = WORDCOUNT ; i > 0 ; --i) {
uint64_t word = val[i - 1 ];
uint64_t other_word = other.val [i - 1 ];
if (word > other_word)
Expand Down