diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp index fdd5d538533..94070721f58 100644 --- a/src/hotspot/share/opto/regmask.cpp +++ b/src/hotspot/share/opto/regmask.cpp @@ -309,7 +309,7 @@ uint RegMask::Size() const { uint sum = 0; assert(valid_watermarks(), "sanity"); for (int i = _lwm; i <= _hwm; i++) { - sum += population_count(_A[i]); + sum += population_count((unsigned)_A[i]); } return sum; } diff --git a/src/hotspot/share/utilities/bitMap.cpp b/src/hotspot/share/utilities/bitMap.cpp index 83bbd20958a..db8b47baae0 100644 --- a/src/hotspot/share/utilities/bitMap.cpp +++ b/src/hotspot/share/utilities/bitMap.cpp @@ -29,6 +29,7 @@ #include "utilities/bitMap.inline.hpp" #include "utilities/copy.hpp" #include "utilities/debug.hpp" +#include "utilities/population_count.hpp" STATIC_ASSERT(sizeof(BitMap::bm_word_t) == BytesPerWord); // "Implementation assumption." @@ -646,50 +647,11 @@ bool BitMap::iterate(BitMapClosure* blk, idx_t leftOffset, idx_t rightOffset) { return true; } -const BitMap::idx_t* BitMap::_pop_count_table = NULL; - -void BitMap::init_pop_count_table() { - if (_pop_count_table == NULL) { - BitMap::idx_t *table = NEW_C_HEAP_ARRAY(idx_t, 256, mtInternal); - for (uint i = 0; i < 256; i++) { - table[i] = num_set_bits(i); - } - - if (!Atomic::replace_if_null(&_pop_count_table, table)) { - guarantee(_pop_count_table != NULL, "invariant"); - FREE_C_HEAP_ARRAY(idx_t, table); - } - } -} - -BitMap::idx_t BitMap::num_set_bits(bm_word_t w) { - idx_t bits = 0; - - while (w != 0) { - while ((w & 1) == 0) { - w >>= 1; - } - bits++; - w >>= 1; - } - return bits; -} - -BitMap::idx_t BitMap::num_set_bits_from_table(unsigned char c) { - assert(_pop_count_table != NULL, "precondition"); - return _pop_count_table[c]; -} - BitMap::idx_t BitMap::count_one_bits() const { - init_pop_count_table(); // If necessary. idx_t sum = 0; - typedef unsigned char uchar; for (idx_t i = 0; i < size_in_words(); i++) { bm_word_t w = map()[i]; - for (size_t j = 0; j < sizeof(bm_word_t); j++) { - sum += num_set_bits_from_table(uchar(w & 255)); - w >>= 8; - } + sum += population_count(w); } return sum; } diff --git a/src/hotspot/share/utilities/bitMap.hpp b/src/hotspot/share/utilities/bitMap.hpp index d849389e337..85b80d30f2e 100644 --- a/src/hotspot/share/utilities/bitMap.hpp +++ b/src/hotspot/share/utilities/bitMap.hpp @@ -169,12 +169,6 @@ class BitMap { // Verify [beg,end) is a valid range, e.g. beg <= end <= size(). void verify_range(idx_t beg, idx_t end) const NOT_DEBUG_RETURN; - // Statistics. - static const idx_t* _pop_count_table; - static void init_pop_count_table(); - static idx_t num_set_bits(bm_word_t w); - static idx_t num_set_bits_from_table(unsigned char c); - // Allocation Helpers. // Allocates and clears the bitmap memory. diff --git a/src/hotspot/share/utilities/population_count.hpp b/src/hotspot/share/utilities/population_count.hpp index 16811b1aa53..a88a27246fc 100644 --- a/src/hotspot/share/utilities/population_count.hpp +++ b/src/hotspot/share/utilities/population_count.hpp @@ -25,23 +25,47 @@ #ifndef SHARE_UTILITIES_POPULATION_COUNT_HPP #define SHARE_UTILITIES_POPULATION_COUNT_HPP +#include "metaprogramming/conditional.hpp" +#include "metaprogramming/enableIf.hpp" +#include "metaprogramming/isIntegral.hpp" +#include "metaprogramming/isSigned.hpp" #include "utilities/debug.hpp" #include "utilities/globalDefinitions.hpp" // Returns the population count of x, i.e., the number of bits set in x. // -// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2. +// Adapted from Hacker's Delight, 2nd Edition, Figure 5-2 and the text that +// follows. // // Ideally this should be dispatched per platform to use optimized // instructions when available, such as POPCNT on modern x86/AMD. Our builds // still target and support older architectures that might lack support for -// these, however. For example, with current build configurations, -// __builtin_popcount(x) would generate a call to a similar but slower 64-bit -// version of this 32-bit implementation. -static uint32_t population_count(uint32_t x) { - x -= ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24; +// these. For example, with current build configurations, __builtin_popcount(x) +// generate a call to a similar but slower 64-bit version when calling with +// a 32-bit integer type. +template +inline unsigned population_count(T x) { + STATIC_ASSERT(BitsPerWord <= 128); + STATIC_ASSERT(BitsPerByte == 8); + STATIC_ASSERT(IsIntegral::value); + STATIC_ASSERT(!IsSigned::value); + // We need to take care with implicit integer promotion when dealing with + // integers < 32-bit. We chose to do this by explicitly widening constants + // to unsigned + typedef typename Conditional<(sizeof(T) < sizeof(unsigned)), unsigned, T>::type P; + const T all = ~T(0); // 0xFF..FF + const P fives = all/3; // 0x55..55 + const P threes = (all/15) * 3; // 0x33..33 + const P z_ones = all/255; // 0x0101..01 + const P z_effs = z_ones * 15; // 0x0F0F..0F + P r = x; + r -= ((r >> 1) & fives); + r = (r & threes) + ((r >> 2) & threes); + r = ((r + (r >> 4)) & z_effs) * z_ones; + // The preceeding multiply by z_ones is the only place where the intermediate + // calculations can exceed the range of T. We need to discard any such excess + // before the right-shift, hence the conversion back to T. + return static_cast(r) >> (((sizeof(T) - 1) * BitsPerByte)); } #endif // SHARE_UTILITIES_POPULATION_COUNT_HPP diff --git a/test/hotspot/gtest/utilities/test_population_count.cpp b/test/hotspot/gtest/utilities/test_population_count.cpp index 835d12d3e1f..a5320817ac2 100644 --- a/test/hotspot/gtest/utilities/test_population_count.cpp +++ b/test/hotspot/gtest/utilities/test_population_count.cpp @@ -25,12 +25,13 @@ #include "precompiled.hpp" #include "runtime/os.hpp" #include "utilities/population_count.hpp" +#include "utilities/powerOfTwo.hpp" #include "utilities/globalDefinitions.hpp" #include "unittest.hpp" #define BITS_IN_BYTE_ARRAY_SIZE 256 -uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = { +const uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, @@ -49,30 +50,49 @@ uint8_t test_popcnt_bitsInByte[BITS_IN_BYTE_ARRAY_SIZE] = { 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 }; -TEST(population_count, sparse) { +template +void sparse() { + const T max_val = max_value(); + // Step through the entire input range from a random starting point, // verify population_count return values against the lookup table // approach used historically - uint32_t step = 4711; - for (uint32_t value = os::random() % step; value < UINT_MAX - step; value += step) { - uint32_t lookup = test_popcnt_bitsInByte[(value >> 24) & 0xff] + - test_popcnt_bitsInByte[(value >> 16) & 0xff] + - test_popcnt_bitsInByte[(value >> 8) & 0xff] + - test_popcnt_bitsInByte[ value & 0xff]; + T step = T(1) << ((sizeof(T) * 8) - 7); + for (T value = os::random() % step; value < max_val - step; value += step) { + uint64_t v = (uint64_t)value; + unsigned lookup = 0u; + for (unsigned i = 0u; i < sizeof(T); i++) { + lookup += test_popcnt_bitsInByte[v & 0xff]; + v >>= 8u; + } EXPECT_EQ(lookup, population_count(value)) << "value = " << value; } // Test a few edge cases - EXPECT_EQ(0u, population_count(0u)) + EXPECT_EQ(0u, population_count(T(0u))) << "value = " << 0; - EXPECT_EQ(1u, population_count(1u)) + EXPECT_EQ(1u, population_count(T(1u))) << "value = " << 1; - EXPECT_EQ(1u, population_count(2u)) + EXPECT_EQ(1u, population_count(T(2u))) << "value = " << 2; - EXPECT_EQ(32u, population_count(UINT_MAX)) - << "value = " << UINT_MAX; - EXPECT_EQ(31u, population_count(UINT_MAX - 1)) - << "value = " << (UINT_MAX - 1); + EXPECT_EQ(T(sizeof(T) * BitsPerByte), population_count(max_val)) + << "value = " << max_val; + EXPECT_EQ(T(sizeof(T) * BitsPerByte - 1u), population_count(T(max_val - 1u))) + << "value = " << (max_val - 1u); +} + + +TEST(population_count, sparse8) { + sparse(); +} +TEST(population_count, sparse16) { + sparse(); +} +TEST(population_count, sparse32) { + sparse(); } +TEST(population_count, sparse64) { + sparse(); +} \ No newline at end of file