Skip to content
Permalink
Browse files

8235551: BitMap::count_one_bits should use population_count

Co-authored-by: Kim Barrett <kim.barrett@oracle.com>
Reviewed-by: kbarrett, neliasso
  • Loading branch information
cl4es and kimbarrett committed Dec 11, 2019
1 parent f51595b commit 91de8b6c912a3a600a204994f4730693186310c4
@@ -309,7 +309,7 @@ uint RegMask::Size() const {
uint sum = 0;
assert(valid_watermarks(), "sanity");
for (int i = _lwm; i <= _hwm; i++) {
sum += population_count(_A[i]);
sum += population_count((unsigned)_A[i]);
}
return sum;
}
@@ -29,6 +29,7 @@
#include "utilities/bitMap.inline.hpp"
#include "utilities/copy.hpp"
#include "utilities/debug.hpp"
#include "utilities/population_count.hpp"

STATIC_ASSERT(sizeof(BitMap::bm_word_t) == BytesPerWord); // "Implementation assumption."

@@ -646,50 +647,11 @@ bool BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) {
return true;
}

const BitMap::idx_t* BitMap::_pop_count_table = NULL;

void BitMap::init_pop_count_table() {
if (_pop_count_table == NULL) {
BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256, mtInternal);
for (uint i = 0; i < 256; i++) {
table[i] = num_set_bits(i);
}

if (!Atomic::replace_if_null(&_pop_count_table, table)) {
guarantee(_pop_count_table != NULL, "invariant");
FREE_C_HEAP_ARRAY(idx_t, table);
}
}
}

BitMap::idx_t BitMap::num_set_bits(bm_word_t w) {
idx_t bits = 0;

while (w != 0) {
while ((w & 1) == 0) {
w >>= 1;
}
bits++;
w >>= 1;
}
return bits;
}

BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) {
assert(_pop_count_table != NULL, "precondition");
return _pop_count_table[c];
}

BitMap::idx_t BitMap::count_one_bits() const {
init_pop_count_table(); // If necessary.
idx_t sum = 0;
typedef unsigned char uchar;
for (idx_t i = 0; i < size_in_words(); i++) {
bm_word_t w = map()[i];
for (size_t j = 0; j < sizeof(bm_word_t); j++) {
sum += num_set_bits_from_table(uchar(w & 255));
w >>= 8;
}
sum += population_count(w);
}
return sum;
}
@@ -169,12 +169,6 @@ class BitMap {
// Verify [beg,end) is a valid range, e.g. beg <= end <= size().
void verify_range(idx_t beg, idx_t end) const NOT_DEBUG_RETURN;

// Statistics.
static const idx_t* _pop_count_table;
static void init_pop_count_table();
static idx_t num_set_bits(bm_word_t w);
static idx_t num_set_bits_from_table(unsigned char c);

// Allocation Helpers.

// Allocates and clears the bitmap memory.
@@ -25,23 +25,47 @@
#ifndef SHARE_UTILITIES_POPULATION_COUNT_HPP
#define SHARE_UTILITIES_POPULATION_COUNT_HPP

#include "metaprogramming/conditional.hpp"
#include "metaprogramming/enableIf.hpp"
#include "metaprogramming/isIntegral.hpp"
#include "metaprogramming/isSigned.hpp"
#include "utilities/debug.hpp"
#include "utilities/globalDefinitions.hpp"

// Returns the population count of x, i.e., the number of bits set in x.
//
// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2.
// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2 and the text that
// follows.
//
// Ideally this should be dispatched per platform to use optimized
// instructions when available, such as POPCNT on modern x86/AMD. Our builds
// still target and support older architectures that might lack support for
// these, however. For example, with current build configurations,
// __builtin_popcount(x) would generate a call to a similar but slower 64-bit
// version of this 32-bit implementation.
static uint32_t population_count(uint32_t x) {
x -= ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
// these. For example, with current build configurations, __builtin_popcount(x)
// generate a call to a similar but slower 64-bit version when calling with
// a 32-bit integer type.
template <typename T>
inline unsigned population_count(T x) {
STATIC_ASSERT(BitsPerWord <= 128);
STATIC_ASSERT(BitsPerByte == 8);
STATIC_ASSERT(IsIntegral<T>::value);
STATIC_ASSERT(!IsSigned<T>::value);
// We need to take care with implicit integer promotion when dealing with
// integers < 32-bit. We chose to do this by explicitly widening constants
// to unsigned
typedef typename Conditional<(sizeof(T) < sizeof(unsigned)), unsigned, T>::type P;
const T all = ~T(0); // 0xFF..FF
const P fives = all/3; // 0x55..55
const P threes = (all/15) * 3; // 0x33..33
const P z_ones = all/255; // 0x0101..01
const P z_effs = z_ones * 15; // 0x0F0F..0F
P r = x;
r -= ((r >> 1) & fives);
r = (r & threes) + ((r >> 2) & threes);
r = ((r + (r >> 4)) & z_effs) * z_ones;
// The preceeding multiply by z_ones is the only place where the intermediate
// calculations can exceed the range of T. We need to discard any such excess
// before the right-shift, hence the conversion back to T.
return static_cast<T>(r) >> (((sizeof(T) - 1) * BitsPerByte));
}

#endif // SHARE_UTILITIES_POPULATION_COUNT_HPP
@@ -25,12 +25,13 @@
#include "precompiled.hpp"
#include "runtime/os.hpp"
#include "utilities/population_count.hpp"
#include "utilities/powerOfTwo.hpp"
#include "utilities/globalDefinitions.hpp"
#include "unittest.hpp"

#define BITS_IN_BYTE_ARRAY_SIZE 256

uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
const uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
@@ -49,30 +50,49 @@ uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = {
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
};

TEST(population_count, sparse) {
template <typename T>
void sparse() {
const T max_val = max_value<T>();

// Step through the entire input range from a random starting point,
// verify population_count return values against the lookup table
// approach used historically
uint32_t step = 4711;
for (uint32_t value = os::random() % step; value < UINT_MAX - step; value += step) {
uint32_t lookup = test_popcnt_bitsInByte[(value >> 24) & 0xff] +
test_popcnt_bitsInByte[(value >> 16) & 0xff] +
test_popcnt_bitsInByte[(value >> 8) & 0xff] +
test_popcnt_bitsInByte[ value & 0xff];
T step = T(1) << ((sizeof(T) * 8) - 7);

for (T value = os::random() % step; value < max_val - step; value += step) {
uint64_t v = (uint64_t)value;
unsigned lookup = 0u;
for (unsigned i = 0u; i < sizeof(T); i++) {
lookup += test_popcnt_bitsInByte[v & 0xff];
v >>= 8u;
}
EXPECT_EQ(lookup, population_count(value))
<< "value = " << value;
}

// Test a few edge cases
EXPECT_EQ(0u, population_count(0u))
EXPECT_EQ(0u, population_count(T(0u)))
<< "value = " << 0;
EXPECT_EQ(1u, population_count(1u))
EXPECT_EQ(1u, population_count(T(1u)))
<< "value = " << 1;
EXPECT_EQ(1u, population_count(2u))
EXPECT_EQ(1u, population_count(T(2u)))
<< "value = " << 2;
EXPECT_EQ(32u, population_count(UINT_MAX))
<< "value = " << UINT_MAX;
EXPECT_EQ(31u, population_count(UINT_MAX - 1))
<< "value = " << (UINT_MAX - 1);
EXPECT_EQ(T(sizeof(T) * BitsPerByte), population_count(max_val))
<< "value = " << max_val;
EXPECT_EQ(T(sizeof(T) * BitsPerByte - 1u), population_count(T(max_val - 1u)))
<< "value = " << (max_val - 1u);
}


TEST(population_count, sparse8) {
sparse<uint8_t>();
}
TEST(population_count, sparse16) {
sparse<uint16_t>();
}
TEST(population_count, sparse32) {
sparse<uint32_t>();
}
TEST(population_count, sparse64) {
sparse<uint64_t>();
}

0 comments on commit 91de8b6

Please sign in to comment.
You can’t perform that action at this time.