Skip to content
This repository has been archived by the owner on Jun 1, 2023. It is now read-only.

Commit

Permalink
HeArray: remove hek_hash and refcounted_he_hash
Browse files Browse the repository at this point in the history
Calculate hashes on demand, but not store it in a HEK
to make HEK shorter to fill more entries into a cache line.
HEK_HASH(hek) is now invalid and gone.
Use the new HeHASH_calc(he), HEK_HASH_calc(hek), SvSHARED_HASH_calc(sv)
instead.
See http://www.ilikebigbits.com/blog/2016/8/28/designing-a-fast-hash-table
for benchmarks (HashCache).

And using 4 tests in the hot hash loop also makes not much sense,
when checking the length and the string is enough to weed out
collisions.
This strategy, recomputing the hash wehen needed, is so far 1-7% slower,
but we hope to get to speed with the HeARRAY patch. See below.

The endgoal is to get rid of linked lists and store the collisions
inlined in consecutive memory, in a HekARRAY. (len,cmp-flags,char*,other-flags,val)
Measurements in "Cache-Conscious Collision Resolution in String Hash Tables"
by Nikolas Askitis and Justin Zobel, Melbourne 2005 show that this is the
fastest strategy for Open Hashing (chained) tables.
See GH #24 and GH #102

The next idea is to use MSB varint encoding of the str length in a HEK,
because our strings are usually short, len < 63, fits into one byte.
We can then merge it with the cmp-flags, the flags only needed for comparison.
See https://techoverflow.net/blog/2013/01/25/efficiently-encoding-variable-length-integers-in-cc/
or just <63 one byte, >63 MSB: I32 len.
Note that the 1st MSB bit is already taken for UTF8.
  • Loading branch information
Reini Urban authored and rurban committed Jun 26, 2019
1 parent b1a34c8 commit d5f527e
Show file tree
Hide file tree
Showing 9 changed files with 136 additions and 85 deletions.
10 changes: 6 additions & 4 deletions dump.c
Expand Up @@ -2707,7 +2707,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest,
}
count++;

hash = HeHASH(he);
/*hash = HeHASH(he);*/
keysv = hv_iterkeysv(he);
keypv = SvPV_const(keysv, len);
elt = HeVAL(he);
Expand All @@ -2720,7 +2720,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest,
UNI_DISPLAY_QQ));
if (HvEITER_get(hv) == he)
PerlIO_printf(file, "[CURRENT] ");
PerlIO_printf(file, "HASH = 0x%" UVxf "\n", (UV) hash);
/*PerlIO_printf(file, "HASH = 0x%" UVxf "\n", (UV) hash);*/
do_sv_dump(level+1, file, elt, nest+1, maxnest, dumpops, pvlim);
}
}
Expand Down Expand Up @@ -3264,11 +3264,13 @@ Perl_deb_hek(pTHX_ HEK* hek, SV* val)
else if (HEK_IS_SVKEY(hek)) {
SV * const tmp = newSVpvs_flags("", SVs_TEMP);
SV* sv = *(SV**)HEK_KEY(hek);
PerlIO_printf(Perl_debug_log, " [0x%08x SV:\"%s\" ", (unsigned)HEK_HASH(hek),
U32 hash = HEK_HASH_calc(hek);
PerlIO_printf(Perl_debug_log, " [0x%08x SV:\"%s\" ", (unsigned)hash,
pretty_pv_escape( tmp, SvPVX_const(sv), SvCUR(sv), SvUTF8(sv)));
} else {
SV * const tmp = newSVpvs_flags("", SVs_TEMP);
PerlIO_printf(Perl_debug_log, " [0x%08x \"%s\" ", (unsigned)HEK_HASH(hek),
U32 hash = HEK_HASH_calc(hek);
PerlIO_printf(Perl_debug_log, " [0x%08x \"%s\" ", (unsigned)hash,
pretty_pv_escape( tmp, HEK_KEY(hek), HEK_LEN(hek), HEK_UTF8(hek)));
if (HEK_FLAGS(hek) > 1)
PerlIO_printf(Perl_debug_log, "0x%x ", HEK_FLAGS(hek));
Expand Down
4 changes: 4 additions & 0 deletions embed.fnc
Expand Up @@ -743,6 +743,7 @@ AbmdRp |bool |hv_exists |NULLOK HV *hv|NN const char *key|I32 klen
AbmdRp |bool |hv_exists_ent |NULLOK HV *hv|NN SV *keysv|U32 hash
Abmdp |SV** |hv_fetch |NULLOK HV *hv|NN const char *key|I32 klen \
|I32 lval
: hash ignored
Abmdp |HE* |hv_fetch_ent |NULLOK HV *hv|NN SV *keysv|I32 lval|U32 hash
#if defined(USE_CPERL)
Ap |void* |hv_common |NULLOK HV *hv|NULLOK SV *keysv \
Expand Down Expand Up @@ -2121,6 +2122,7 @@ Abpd |SSize_t|unpack_str |NN const char *pat|NN const char *patend|NN const char
|I32 ocnt|U32 flags
Apd |SSize_t|unpackstring |NN const char *pat|NN const char *patend|NN const char *s \
|NN const char *strend|U32 flags
: hash ignored
Ap |void |unsharepvn |NULLOK const char* sv|I32 len|U32 hash
: Used in gv.c, hv.c
#if defined(USE_CPERL)
Expand Down Expand Up @@ -2517,10 +2519,12 @@ s |void |hsplit |NN HV *hv|STRLEN const oldsize|STRLEN newsize
s |void |hv_free_entries|NN HV *hv
s |SV* |hv_free_ent_ret|NN HV *hv|NN HE *entry
sR |HE* |new_he
: hash ignored
sanR |HEK* |save_hek_flags |NN const char *str|I32 len|U32 hash|int flags
sn |void |hv_magic_check |NN HV *hv|NN bool *needs_copy|NN bool *needs_store
s |void |unshare_hek_or_pvn|NULLOK const HEK* hek|NULLOK const char* str|I32 len|U32 hash
# if defined(USE_CPERL)
: hash mandatory
sR |HEK* |share_hek_flags|NN const char *str|I32 len|U32 hash|int flags
# else
: a perl5 security risk
Expand Down
9 changes: 9 additions & 0 deletions ext/B/B.xs
Expand Up @@ -2362,6 +2362,15 @@ HeKEY(he)
U32
HeHASH(he)
B::HE he
PREINIT:
U32 hash = 0;
HEK *hek;
CODE:
hek = HeKEY_hek(he);
PERL_HASH(hash, HEK_KEY(hek), HEK_LEN(hek));
RETVAL = hash;
OUTPUT:
RETVAL

I32
HeKLEN(he)
Expand Down

0 comments on commit d5f527e

Please sign in to comment.