Skip to content

Commit

Permalink
Plan C rough edge smoothing. Criteria for a hash split is now
Browse files Browse the repository at this point in the history
the earlier of "more keys than buckets" (the old test) or
linked list too long. Rehash is triggered after a split if the
longest linked list is too long.

p4raw-id: //depot/perl@21533
  • Loading branch information
nwc10 committed Oct 25, 2003
1 parent 6c0731c commit bc92a80
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 15 deletions.
50 changes: 35 additions & 15 deletions hv.c
Expand Up @@ -20,6 +20,8 @@
#define PERL_IN_HV_C
#include "perl.h"

#define HV_MAX_LENGTH_BEFORE_SPLIT 4

STATIC HE*
S_new_he(pTHX)
{
Expand Down Expand Up @@ -313,7 +315,7 @@ S_hv_fetch_flags(pTHX_ HV *hv, const char *key, I32 klen, I32 lval, int flags)
}
else
HeKFLAGS(entry) = flags;
if (flags)
if (flags & HVhek_ENABLEHVKFLAGS)
HvHASKFLAGS_on(hv);
}
if (flags & HVhek_FREEKEY)
Expand Down Expand Up @@ -487,7 +489,7 @@ Perl_hv_fetch_ent(pTHX_ HV *hv, SV *keysv, I32 lval, register U32 hash)
}
else
HeKFLAGS(entry) = flags;
if (flags)
if (flags & HVhek_ENABLEHVKFLAGS)
HvHASKFLAGS_on(hv);
}
if (key != keysave)
Expand Down Expand Up @@ -603,7 +605,7 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val,
register U32 hash, int flags)
{
register XPVHV* xhv;
register I32 i;
register U32 n_links;
register HE *entry;
register HE **oentry;

Expand Down Expand Up @@ -650,9 +652,10 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val,

/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
i = 1;

for (entry = *oentry; entry; i=0, entry = HeNEXT(entry)) {
n_links = 0;

for (entry = *oentry; entry; ++n_links, entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
Expand Down Expand Up @@ -719,9 +722,17 @@ Perl_hv_store_flags(pTHX_ HV *hv, const char *key, I32 klen, SV *val,
*oentry = entry;

xhv->xhv_keys++; /* HvKEYS(hv)++ */
if (i) { /* initial entry? */
if (!n_links) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
} else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
} else if ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT)
&& (!HvREHASH(hv)
|| (xhv->xhv_keys > (IV)xhv->xhv_max))) {
/* Use the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket
splits on a rehashed hash, as we're not going to split it again,
and if someone is lucky (evil) enough to get all the keys in one
list they could exhaust our memory as we repeatedly double the
number of buckets on every entry. Linear search feels a less worse
thing to do. */
hsplit(hv);
}

Expand Down Expand Up @@ -763,7 +774,7 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash)
XPVHV* xhv;
char *key;
STRLEN klen;
I32 i;
U32 n_links;
HE *entry;
HE **oentry;
bool is_utf8;
Expand Down Expand Up @@ -830,9 +841,9 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash)

/* oentry = &(HvARRAY(hv))[hash & (I32) HvMAX(hv)]; */
oentry = &((HE**)xhv->xhv_array)[hash & (I32) xhv->xhv_max];
i = 1;
n_links = 0;
entry = *oentry;
for (; entry; i=0, entry = HeNEXT(entry)) {
for (; entry; ++n_links, entry = HeNEXT(entry)) {
if (HeHASH(entry) != hash) /* strings can't be equal */
continue;
if (HeKLEN(entry) != (I32)klen)
Expand Down Expand Up @@ -886,10 +897,17 @@ Perl_hv_store_ent(pTHX_ HV *hv, SV *keysv, SV *val, U32 hash)
*oentry = entry;

xhv->xhv_keys++; /* HvKEYS(hv)++ */
if (i) { /* initial entry? */
if (!n_links) { /* initial entry? */
xhv->xhv_fill++; /* HvFILL(hv)++ */
} else if (xhv->xhv_keys > (IV)xhv->xhv_max /* HvKEYS(hv) > HvMAX(hv) */) {
hsplit(hv);
} else if ((xhv->xhv_keys > (IV)xhv->xhv_max)
|| ((n_links > HV_MAX_LENGTH_BEFORE_SPLIT) && !HvREHASH(hv))) {
/* Use only the old HvKEYS(hv) > HvMAX(hv) condition to limit bucket
splits on a rehashed hash, as we're not going to split it again,
and if someone is lucky (evil) enough to get all the keys in one
list they could exhaust our memory as we repeatedly double the
number of buckets on every entry. Linear search feels a less worse
thing to do. */
hsplit(hv);
}

return entry;
Expand Down Expand Up @@ -1511,7 +1529,7 @@ S_hsplit(pTHX_ HV *hv)


/* Pick your policy for "hashing isn't working" here: */
if (longest_chain < 8 || longest_chain * 2 < HvTOTALKEYS(hv)
if (longest_chain <= HV_MAX_LENGTH_BEFORE_SPLIT /* split worked? */
|| HvREHASH(hv)) {
return;
}
Expand All @@ -1533,7 +1551,6 @@ S_hsplit(pTHX_ HV *hv)
xhv->xhv_fill = 0;
HvSHAREKEYS_off(hv);
HvREHASH_on(hv);
HvHASKFLAGS_on(hv);

aep = (HE **) xhv->xhv_array;

Expand Down Expand Up @@ -2385,6 +2402,9 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, register U32 hash, int flags)
if (!(Svp = hv_fetch(PL_strtab, str, len, FALSE)))
hv_store(PL_strtab, str, len, Nullsv, hash);
Can't rehash the shared string table, so not sure if it's worth
counting the number of entries in the linked list
*/
xhv = (XPVHV*)SvANY(PL_strtab);
/* assert(xhv_array != 0) */
Expand Down
11 changes: 11 additions & 0 deletions hv.h
Expand Up @@ -212,6 +212,7 @@ C<SV*>.
* is utf8 (including 8 bit keys that were entered as utf8, and need upgrading
* when retrieved during iteration. It may still be set when there are no longer
* any utf8 keys.
* See HVhek_ENABLEHVKFLAGS for the trigger.
*/
#define HvHASKFLAGS(hv) (SvFLAGS(hv) & SVphv_HASKFLAGS)
#define HvHASKFLAGS_on(hv) (SvFLAGS(hv) |= SVphv_HASKFLAGS)
Expand Down Expand Up @@ -283,6 +284,16 @@ C<SV*>.
* (may change, but Storable is a core module) */
#define HVhek_MASK 0xFF

/* Which flags enable HvHASKFLAGS? Somewhat a hack on a hack, as
HVhek_REHASH is only needed because the rehash flag has to be duplicated
into all keys as hv_iternext has no access to the hash flags. At this
point Storable's tests get upset, because sometimes hashes are "keyed"
and sometimes not, depending on the order of data insertion, and whether
it triggered rehashing. So currently HVhek_REHAS is exempt.
*/

#define HVhek_ENABLEHVKFLAGS (HVhek_MASK - HVhek_REHASH)

#define HEK_UTF8(hek) (HEK_FLAGS(hek) & HVhek_UTF8)
#define HEK_UTF8_on(hek) (HEK_FLAGS(hek) |= HVhek_UTF8)
#define HEK_UTF8_off(hek) (HEK_FLAGS(hek) &= ~HVhek_UTF8)
Expand Down

0 comments on commit bc92a80

Please sign in to comment.