diff --git a/src/hyperloglog.c b/src/hyperloglog.c index cb0929f47e894..4b4492c537980 100644 --- a/src/hyperloglog.c +++ b/src/hyperloglog.c @@ -186,6 +186,7 @@ struct hllhdr { static char *invalid_hll_err = "-INVALIDOBJ Corrupted HLL object detected"; + /* =========================== Low level bit macros ========================= */ /* Macros to access the dense representation. @@ -425,11 +426,19 @@ uint64_t MurmurHash64A (const void * key, int len, unsigned int seed) { return h; } +static int hll_de_bruijn_64_lookup[64] = { + 0, 1, 56, 2, 57, 49, 28, 3, 61, 58, 42, 50, 38, 29, 17, 4, + 62, 47, 59, 36, 45, 43, 51, 22, 53, 39, 33, 30, 24, 18, 12, 5, + 63, 55, 48, 27, 60, 41, 37, 16, 46, 35, 44, 21, 52, 32, 23, 11, + 54, 26, 40, 15, 34, 20, 31, 10, 25, 14, 19, 9, 13, 8, 7, 6, +}; + + /* Given a string element to add to the HyperLogLog, returns the length * of the pattern 000..1 of the element hash. As a side effect 'regp' is * set to the register index this element hashes to. */ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) { - uint64_t hash, bit, index; + uint64_t hash, index; int count; /* Count the number of zeroes starting from bit HLL_REGISTERS @@ -448,12 +457,24 @@ int hllPatLen(unsigned char *ele, size_t elesize, long *regp) { hash >>= HLL_P; /* Remove bits used to address the register. */ hash |= ((uint64_t)1<>= 58; + /* Initialized to 1 since we count the "00000...1" pattern. */ + count = hll_de_bruijn_64_lookup[hash] + 1; + *regp = (int) index; return count; }