From a2ab02be1ee963ccdf10cad90563a1bf6189ddb0 Mon Sep 17 00:00:00 2001 From: Reini Urban Date: Mon, 29 Aug 2016 21:09:50 +0200 Subject: [PATCH] open hash: use open addressing with linear probing (WIP) and a layout similar to array_he, inlined 1-2 word hash array. variants to try: simple linear, double, quadratic, robin-hood, hopscotch. khash tried double hashing and found it slower. robin-hood with backward deletion propagation would be an alternative, but first we need to re-architecture and abstract the various probing implementations and search loops scattered all over. See GH #24 --- dump.c | 13 +- gv.c | 2 +- hv.c | 113 +++-- hv.h | 178 +++++--- hv_func.h | 1171 ++-------------------------------------------------- mro_core.c | 4 +- perl.c | 2 +- sv.c | 4 +- 8 files changed, 208 insertions(+), 1279 deletions(-) diff --git a/dump.c b/dump.c index 098fd8e4106..dab1a5f7c57 100644 --- a/dump.c +++ b/dump.c @@ -804,7 +804,7 @@ Perl_dump_packsubs_perl(pTHX_ const HV *stash, bool justperl) return; for (i = 0; i <= HvMAX(stash); i++) { const HE *entry = AHe(HvARRAY(stash)[i]); - HE_EACH(hv, entry, { + HE_EACH(stash, i, entry, { GV * gv = (GV *)HeVAL(entry); if (SvROK(gv) && SvTYPE(SvRV(gv)) == SVt_PVCV) /* unfake a fake GV */ @@ -2486,7 +2486,7 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, for (i = 0; i <= HvMAX(sv); i++) { HE* h = AHe(HvARRAY(sv)[i]); U32 count = 0; - HE_EACH(sv, h, count++); + HE_EACH(sv, i, h, count++); if (count > FREQ_MAX) count = FREQ_MAX; freq[count]++; @@ -2682,14 +2682,15 @@ Perl_do_sv_dump(pTHX_ I32 level, PerlIO *file, SV *sv, I32 nest, I32 maxnest, } if (nest < maxnest) { HV * const hv = MUTABLE_HV(sv); + if (HvARRAY(hv)) { HE *he; U32 i; U32 count = 0; U32 maxcount = maxnest - nest; for (i=0; i <= HvMAX(hv); i++) { - he = AHe(HvARRAY(hv)[i]); - HE_EACH(hv, he, { + HE *he = AHe(HvARRAY(hv)[i]); + HE_EACH(hv, i, he, { U32 hash; SV * keysv; SV * elt; @@ -3313,7 +3314,7 @@ Perl_deb_hechain(pTHX_ HE* entry) assert(HeHASH(entry)); assert(entry != HeNEXT(entry)); assert(i <= PERL_ARENA_SIZE/sizeof(HE)); - }) + } PerlIO_printf(Perl_debug_log, " )\n"); } #endif @@ -3997,7 +3998,7 @@ S__hv_dump(pTHX_ SV* sv, bool with_values, int level) for (i = 0; i <= HvMAX(sv); i++) { HE* h = AHe(ents[i]); PerlIO_printf(file, "[%u]: ", (unsigned)i); - HE_EACH(sv, h, { + HE_EACH(sv, i, h, { #if defined(PERL_INLINE_HASH) && defined(DEBUGGING) if (!ents[i].hent_hash) continue; #endif diff --git a/gv.c b/gv.c index abba02c3e83..2e2a51d1f96 100644 --- a/gv.c +++ b/gv.c @@ -2638,7 +2638,7 @@ Perl_gv_check(pTHX_ HV *stash) /* mark stash is being scanned, to avoid recursing */ HvAUX(stash)->xhv_aux_flags |= HvAUXf_SCAN_STASH; - HE_EACH(stash, entry, { + HE_EACH(stash, i, entry, { GV *gv; HV *hv; STRLEN keylen = HeKLEN(entry); diff --git a/hv.c b/hv.c index 21f7cd508af..dadb266667d 100644 --- a/hv.c +++ b/hv.c @@ -2,28 +2,26 @@ * * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others + * Copyright (C) 2016 cPanel Inc. * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. * */ -/* - * I sit beside the fire and think - * of all that I have seen. - * --Bilbo - * - * [p.278 of _The Lord of the Rings_, II/iii: "The Ring Goes South"] - */ - /* =head1 Hash Manipulation Functions -A HV structure represents a Perl hash. It consists mainly of an array -of pointers, each of which points to a linked list of HE structures. The -array is indexed by the hash function of the key, so each linked list -represents all the hash entries with the same hash value. Each HE contains -a pointer to the actual value, plus a pointer to a HEK structure which -holds the key and hash value. + +A HV structure represents a Perl hash. It consists mainly of an array +of hash entries (HE). The array is indexed by the hash function of +the key. + +It is now an open addressing hash table with linear probing and +each entry contains a HE* ptr and the HASH value for faster inline +comparison. The AUX struct is allocated seperately, at [HvMAX]. + +The fill rate (load factor) went from 100% in perl5 and 90% in cperl +to now 70%. =cut @@ -34,9 +32,9 @@ holds the key and hash value. #define PERL_HASH_INTERNAL_ACCESS #include "perl.h" -/* New 90% fill rate ("load factor"). Was 100 before */ +/* 70% fill rate ("load factor") with open addressing */ #ifndef HV_FILL_RATE -# define HV_FILL_RATE 90 +# define HV_FILL_RATE 70 #endif #ifndef HV_FILL_THRESHOLD # define HV_FILL_THRESHOLD 31 @@ -45,16 +43,9 @@ holds the key and hash value. #if HV_FILL_RATE == 100 # define DO_HSPLIT(xhv) ((xhv)->xhv_keys >= (xhv)->xhv_max) #else -# define DO_HSPLIT_slow(xhv) !(xhv)->xhv_max || \ - ((U32)(((xhv)->xhv_keys * 100) / (xhv)->xhv_max) >= HV_FILL_RATE) /* x/128 == x>>7, x>>ctz(n) */ -# define DO_HSPLIT_fast(xhv) !(xhv)->xhv_max || \ +# define DO_HSPLIT(xhv) !(xhv)->xhv_max || \ ((U32)(((xhv)->xhv_keys * 100) >> CTZ(1+((xhv)->xhv_max))) >= HV_FILL_RATE) -# if 0 -# define DO_HSPLIT(xhv) DO_HSPLIT_slow(xhv) -# else -# define DO_HSPLIT(xhv) DO_HSPLIT_fast(xhv) -# endif #endif static const char S_strtab_error[] @@ -266,20 +257,20 @@ negative the key is assumed to be in UTF-8-encoded Unicode. The C parameter is the precomputed hash value; if it is zero then Perl will compute it. -The return value will be -C if the operation failed or if the value did not need to be actually -stored within the hash (as in the case of tied hashes). Otherwise it can -be dereferenced to get the original C. Note that the caller is -responsible for suitably incrementing the reference count of C before -the call, and decrementing it if the function returned C. Effectively -a successful C takes ownership of one reference to C. This is -usually what you want; a newly created SV has a reference count of one, so -if all your code does is create SVs then store them in a hash, C -will own the only reference to the new SV, and your code doesn't need to do -anything further to tidy up. C is not implemented as a call to -C, and does not create a temporary SV for the key, so if your -key data is not already in SV form then use C in preference to -C. +The return value will be C if the operation failed or if the +value did not need to be actually stored within the hash (as in the +case of tied hashes). Otherwise it can be dereferenced to get the +original C. Note that the caller is responsible for suitably +incrementing the reference count of C before the call, and +decrementing it if the function returned C. Effectively a +successful C takes ownership of one reference to C. +This is usually what you want; a newly created SV has a reference +count of one, so if all your code does is create SVs then store them +in a hash, C will own the only reference to the new SV, and +your code doesn't need to do anything further to tidy up. C +is not implemented as a call to C, and does not create a +temporary SV for the key, so if your key data is not already in SV +form then use C in preference to C. See L for more information on how to use this function on tied hashes. @@ -406,9 +397,11 @@ static void S_assert_hechain(pTHX_ HE* entry) { if (!entry) return; - HE_EACH(hv, entry, +#ifndef HASH_OPEN_LINEAR + HE_EACH(hv, i, entry, assert(entry->hent_hek) ) +#endif } static void @@ -417,7 +410,9 @@ S_assert_ahe(pTHX_ AHE* ahe) #ifdef PERL_INLINE_HASH assert(ahe->hent_he ? ahe->hent_hash : !ahe->hent_hash); #endif +#ifndef HASH_OPEN_LINEAR S_assert_hechain(aTHX_ ahe->hent_he); +#endif } #endif @@ -649,7 +644,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, */ int keysv_flags = HEK_FLAGS(keysv_hek); HE *orig_entry = entry; - HE_EACH(hv, entry, { + HE_EACH(hv, hindex, entry, { const HEK *hek = HeKEY_hek(entry); CHECK_HASH_FLOOD(collisions) if (hek == keysv_hek) @@ -665,7 +660,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, entry = orig_entry; } - HE_EACH(hv, entry, { + HE_EACH(hv, hindex, entry, { CHECK_HASH_FLOOD(collisions) if (HeHASH(entry) != hash) /* strings can't be equal */ continue; @@ -754,7 +749,7 @@ Perl_hv_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, /* move found bucket to the front oe -> e -> A => e -> oe -> A oe -> A .. X -> e -> B => e -> oe -> A .. X -> B */ - if (entry != oentry->hent_he && !HvEITER_get(hv)) { + if (!HvEITER_get(hv) && entry != oentry->hent_he) { if (HeNEXT(oentry->hent_he) == entry) { DEBUG_H(PerlIO_printf(Perl_debug_log, "HASH move up 1\t%s{%s}\n", HvNAME_get(hv)?HvNAME_get(hv):"", key)); @@ -1551,7 +1546,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, goto not_found; #ifdef PERL_INLINE_HASH if (first_entry->hent_hash != hash) { - oentry = &entry->hent_next; + oentry = &HeNEXT(entry); entry = *oentry; if (!entry) goto not_found; @@ -1572,7 +1567,7 @@ S_hv_delete_common(pTHX_ HV *hv, SV *keysv, const char *key, I32 klen, */ int keysv_flags = HEK_FLAGS(keysv_hek); - HE_OEACH(hv, oentry, entry, { + HE_OEACH(hv, hindex, oentry, entry, { const HEK *hek = HeKEY_hek(entry); CHECK_HASH_FLOOD(collisions) if (hek == keysv_hek) @@ -1969,7 +1964,7 @@ S_hsplit(pTHX_ HV *hv, U32 const oldsize, U32 newsize) } #endif entry = *oentry; - } + } } if (UNLIKELY(newsize < oldsize)) { /* shrinked */ if (do_aux) /* move to left */ @@ -2101,16 +2096,17 @@ Perl_newHVhv(pTHX_ HV *ohv) ents = (AHE*)a; Copy(oents, ents, hv_max+1, AHE); - /* and descent into each bucket... */ - for (; oent <= last; oent++, ents++) { - HE *e = oent->hent_he; - HE *prev = NULL; + /* In each bucket... */ + for (i = 0; i <= hv_max; i++) { + HE *prev = NULL; + HE *oent = oents[i]; if (!oent->hent_he) continue; /* Copy the linked list of entries. */ - HE_EACH(hv, e, { - const HEK *hek = HeKEY_hek(e); + e = oent->hent_he; + HE_EACH(hv, i, oent, { + const HEK *hek = HeKEY_hek(oent); HE * const ent = new_HE(); SV * const val = HeVAL(e); @@ -2307,7 +2303,7 @@ Perl_hv_clear(pTHX_ HV *hv) U32 i; for (i = 0; i <= xhv->xhv_max; i++) { HE *entry = AHe(HvARRAY(hv)[i]); - HE_EACH(hv, entry, { + HE_EACH(hv, hindex, entry, { /* not already placeholder */ if (!(He_IS_PLACEHOLDER(entry))) { if (HeVAL(entry)) { @@ -2509,10 +2505,7 @@ Perl_hfree_next_entry(pTHX_ HV *hv, U32 *indexp) #endif } AHe(array[*indexp]) = HeNEXT(entry); - /*AHeHASH_set(&array[*indexp], HeNEXT(entry) ? HeHASH(HeNEXT(entry)) : 0);*/ -#ifdef PERL_INLINE_HASH - array[*indexp].hent_hash = HeNEXT(entry) ? HeHASH(HeNEXT(entry)) : 0; -#endif + AHeHASH_set(&array[*indexp], HeNEXT(entry) ? HeHASH(HeNEXT(entry)) : 0); ((XPVHV*) SvANY(hv))->xhv_keys--; if ( PL_phase != PERL_PHASE_DESTRUCT && HvENAME(hv) @@ -3757,7 +3750,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) if (!entry) goto not_found; - HE_EACH(hv, entry, { + HE_EACH(hv, hindex, entry, { const HEK *hek = HeKEY_hek(entry); CHECK_HASH_FLOOD(collisions) if (HEK_HASH(hek) != hash) /* strings can't be equal */ @@ -3769,7 +3762,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 hash, int flags) if (HEK_FLAGS(hek) != flags_masked) continue; break; - }) + }) if (!entry) not_found: { @@ -3988,7 +3981,7 @@ Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain, U32 flags) SV *value; #ifdef USE_ITHREADS - HE_EACH(hv, entry, { + HE_EACH(hv, hindex, entry, { if (HeHASH(entry) == hash) { /* We might have a duplicate key here. If so, entry is older than the key we've already put in the hash, so if they are @@ -4004,7 +3997,7 @@ Perl_refcounted_he_chain_2hv(pTHX_ const struct refcounted_he *chain, U32 flags) CHECK_HASH_FLOOD(collisions) }) #else - HE_EACH(hv, entry, { + HE_EACH(hv, hindex, entry, { if (HeHASH(entry) == hash) { /* We might have a duplicate key here. If so, entry is older than the key we've already put in the hash, so if they are diff --git a/hv.h b/hv.h index 026d2271ea4..009fc0a372f 100644 --- a/hv.h +++ b/hv.h @@ -1,43 +1,34 @@ /* hv.h * - * Copyright (C) 1991, 1992, 1993, 1996, 1997, 1998, 1999, - * 2000, 2001, 2002, 2003, 2005, 2006, 2007, 2008, by Larry Wall and others + * Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, + * 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others + * Copyright (C) 2016 cPanel Inc. + * Copyright (C) 2017 Reini Urban * * You may distribute under the terms of either the GNU General Public * License or the Artistic License, as specified in the README file. * + * Now a proper open addressing hash table, with linear probing and each entry + * contains a HE* ptr and the HASH value for faster inline comparison. + * The AUX struct is allocated seperately, at [HvMAX]. */ -/* These control hash traversal randomization and the environment variable PERL_PERTURB_KEYS. - * Currently disabling this functionality will break a few tests, but should otherwise work fine. - * See perlrun for more details. */ +#define PERL_HASH_ITER_BUCKET(iter) ((iter)->xhv_riter) -#if defined(USE_CPERL) -/* Performance. We have good enough security measures to fight DoS hash floods. */ -/*# define PERL_PERTURB_KEYS_DISABLED*/ -# define PERL_PERTURB_KEYS_TOP -/* array_he branch: inline hent_hash into AHE* */ -# define PERL_INLINE_HASH -#endif +/* open addressing with linear or robin-hood probing, or ... */ +#define HASH_OPEN_LINEAR +/* #define HASH_OPEN_QUADRATIC */ +/* #define HASH_OPEN_DOUBLE */ +/* #define HASH_OPEN_ROBINHOOD */ +/* #define HASH_OPEN_HOPSCOTCH */ +/* #define HASH_CHAINED_LIST */ -#if defined(PERL_PERTURB_KEYS_DISABLED) || defined(PERL_PERTURB_KEYS_TOP) -# undef PERL_HASH_RANDOMIZE_KEYS -# define PL_HASH_RAND_BITS_ENABLED 0 -# define PERL_HASH_ITER_BUCKET(iter) ((iter)->xhv_riter) -#else -# define PERL_HASH_RANDOMIZE_KEYS 1 -# if defined(PERL_PERTURB_KEYS_RANDOM) -# define PL_HASH_RAND_BITS_ENABLED 1 -# elif defined(PERL_PERTURB_KEYS_DETERMINISTIC) -# define PL_HASH_RAND_BITS_ENABLED 2 -# else -# define USE_PERL_PERTURB_KEYS 1 -# define PL_HASH_RAND_BITS_ENABLED PL_hash_rand_bits_enabled -# endif -# define PERL_HASH_ITER_BUCKET(iter) (((iter)->xhv_riter) ^ ((iter)->xhv_rand)) -#endif +#undef PERL_HASH_RANDOMIZE_KEYS +#define PL_HASH_RAND_BITS_ENABLED 0 -/* inlined entry in hash array, 2-3 words */ +#define PERL_INLINE_HASH + +/* inlined entry in hash array, 1-2 words */ struct array_he { HE *hent_he; /* ptr to full hash entry */ #ifdef PERL_INLINE_HASH @@ -45,20 +36,25 @@ struct array_he { #endif }; -/* entry in hash value linked list */ +/* hash entry with key + value */ struct he { - /* Keep hent_next first in this structure, because sv_free_arenas take - advantage of this to share code between the he arenas and the SV - body arenas */ - HE *hent_next; /* next entry in chain */ - HEK *hent_hek; /* hash key */ + /*SV *hent_val;*/ /* scalar value that was hashed */ union { SV *hent_val; /* scalar value that was hashed */ Size_t hent_refcount; /* references for this shared hash key */ } he_valu; + U32 hent_hash; /* hash of key */ + I32 hent_len; /* length of hash key, with utf8 bit as MSB */ +#ifdef PERL_GCC_BRACE_GROUPS_FORBIDDEN + char hent_key[1]; /* variable-length hash key + flag */ +#else + char hent_key[]; /* for easier debugging */ +#endif }; -/* hash key -- defined separately for use as shared pointer */ +/* hash key -- defined separately for use as shared pointer. + Not needed anymore. + */ struct hek { U32 hek_hash; /* hash of key */ I32 hek_len; /* length of hash key */ @@ -67,9 +63,9 @@ struct hek { #else char hek_key[]; /* for easier debugging */ #endif - /* the hash-key is \0-terminated */ + /* The hash-key is \0-terminated */ /* after the \0 there is a byte for flags, such as whether the key - is UTF-8 */ + is UTF-8 or WASUTF8 */ }; struct shared_he { @@ -91,13 +87,12 @@ struct xpvhv_aux { AV *xhv_backreferences; /* back references for weak references */ HE *xhv_eiter; /* current entry of iterator: todo: move to loop context */ U32 xhv_riter; /* current root of iterator: todo: move to loop context */ - -/* Concerning xhv_name_count: When non-zero, xhv_name_u contains a pointer - * to an array of HEK pointers, this being the length. The first element is - * the name of the stash, which may be NULL. If xhv_name_count is positive, - * then *xhv_name is one of the effective names. If xhv_name_count is nega- - * tive, then xhv_name_u.xhvnameu_names[1] is the first effective name. - */ + /* Concerning xhv_name_count: When non-zero, xhv_name_u contains a pointer + * to an array of HEK pointers, this being the length. The first element is + * the name of the stash, which may be NULL. If xhv_name_count is positive, + * then *xhv_name is one of the effective names. If xhv_name_count is negative + * then xhv_name_u.xhvnameu_names[1] is the first effective name. + */ I32 xhv_name_count; struct mro_meta *xhv_mro_meta; #ifdef PERL_HASH_RANDOMIZE_KEYS @@ -126,6 +121,7 @@ struct xpvhv { union _xmgu xmg_u; U32 xhv_keys; /* total keys, including placeholders */ U32 xhv_max; /* subscript of last element of xhv_array */ + struct xpvhv_aux* xhv_aux; }; #define HV_NO_RITER (U32)U32_MAX @@ -344,7 +340,7 @@ C. #define HvHASH_INDEX(hash, max) (hash & (max)) #endif -/* these hash entry flags ride on hent_klen (for use only in magic/tied HVs) */ +/* These hash entry flags ride on hent_klen (for use only in magic/tied HVs) */ #define HEf_SVKEY -2 /* hent_key is an SV* */ #ifndef PERL_CORE @@ -353,9 +349,7 @@ C. #define HvARRAY(hv) ((hv)->sv_u.svu_hash) #define HvFILL(hv) Perl_hv_fill(aTHX_ MUTABLE_HV(hv)) #define HvMAX(hv) ((XPVHV*)SvANY(hv))->xhv_max -/* This quite intentionally does no flag checking first. That's your - responsibility. */ -#define HvAUX(hv) ((struct xpvhv_aux*)&(HvARRAY(hv)[HvMAX(hv)+1])) +#define HvAUX(hv) ((XPVHV*)SvANY(hv))->xhv_aux #define HvRITER(hv) (*Perl_hv_riter_p(aTHX_ MUTABLE_HV(hv))) #define HvEITER(hv) (*Perl_hv_eiter_p(aTHX_ MUTABLE_HV(hv))) #define HvRITER_set(hv,r) Perl_hv_riter_set(aTHX_ MUTABLE_HV(hv), r) @@ -485,17 +479,17 @@ C. #else # define AHeHASH_set(ahep, hash) #endif -#define HeNEXT(he) (he)->hent_next -#define HeKEY_hek(he) (he)->hent_hek -#define HeKEY(he) HEK_KEY(HeKEY_hek(he)) +#define HeNEXT(he) (he+1) +#define HeKEY_hek(he) (*(HEK**)&(he)->hent_hash) +#define HeKEY(he) (he)->hent_key #define HeKEY_sv(he) (*(SV**)HeKEY(he)) -#define HeKLEN(he) HEK_LEN(HeKEY_hek(he)) -#define HeKUTF8(he) HEK_UTF8(HeKEY_hek(he)) -#define HeKWASUTF8(he) HEK_WASUTF8(HeKEY_hek(he)) +#define HeKLEN(he) (he)->hent_len +#define HeKUTF8(he) (HeKFLAGS(he) & HVhek_UTF8) +#define HeKWASUTF8(he) (HeKFLAGS(he) & HVhek_WASUTF8) #define HeKLEN_UTF8(he) (HeKUTF8(he) ? -HeKLEN(he) : HeKLEN(he)) -#define HeKFLAGS(he) HEK_FLAGS(HeKEY_hek(he)) +#define HeKFLAGS(he) (*((unsigned char *)(HeKEY(he))+HeKLEN(he)+1)) #define HeVAL(he) (he)->he_valu.hent_val -#define HeHASH(he) HEK_HASH(HeKEY_hek(he)) +#define HeHASH(he) (he)->hent_hash /* Here we require a STRLEN lp */ #define HePV(he,lp) ((He_IS_SVKEY(he)) ? \ SvPV(HeKEY_sv(he),lp) : \ @@ -821,14 +815,36 @@ Creates a new HV. The reference count is set to 1. #define newHV() MUTABLE_HV(newSV_type(SVt_PVHV)) +/*#define HE_INC(entry) */ /* entry is the initial hash hit, check all collisions. an empty hash slot has entry==NULL. */ -#define HE_EACH(hv,_entry,block) \ +#ifdef HASH_OPEN_LINEAR +#define HE_EACH(hv,hindex,_entry,block) \ + { AHE* ahe; \ + for (; ahe; ahe = &HvARRAY(hv)[++hindex]) { \ + /*if (ahe->hent_hash != hash) continue;*/\ + _entry = ahe->hent_he; \ + block; \ + } \ + } +#else +#define HE_EACH(hv,hindex,_entry,block) \ for (; _entry; _entry = HeNEXT(_entry)) { \ block; \ } +#endif -#define HE_EACH_POST(hv,_entry,post,block) \ +#ifdef HASH_OPEN_LINEAR +#define HE_EACH_POST(hv,_entry,post,block) \ + for (; _entry; _entry = HvARRAY(hv)[++hindex], post) { \ + block; \ + } +#define HE_EACH_CMP(hv,_entry,cmp,block) \ + for (; cmp; _entry = HvARRAY(hv)[++hindex])) { \ + block; \ + } +#else +#define HE_EACH_POST(hv,_entry,post,block) \ for (; _entry; _entry = HeNEXT(_entry), post) { \ block; \ } @@ -836,17 +852,59 @@ Creates a new HV. The reference count is set to 1. for (; cmp; _entry = HeNEXT(_entry)) { \ block; \ } +#endif #ifdef PERL_CORE /* oentry is the changable entry ptr, entry the initial hash hit. check all collisions */ -#define HE_OEACH(hv,oentry,_entry,block) \ +#ifdef HASH_OPEN_LINEAR +#define HE_OEACH(hv,hindex,oentry,_entry,block) \ + { AHE* ahe; \ + for (; ahe; oentry = &HvARRAY(hv)[++hindex], ahe = *oentry) { \ + if (ahe->hent_hash != hash) continue; \ + entry = AHe(ahe); \ + block; \ + } \ + } +#else +#define HE_OEACH(hv,hindex,oentry,_entry,block) \ for (; _entry; oentry = &HeNEXT(_entry), _entry = *oentry) { \ block; \ } #endif +#endif #include "hv_func.h" +#define hv_begin(h) (U32)0 +#define hv_end(h) HvMAX(h) +#define hv_empty(h,i) !HvARRAY(h)[i].hent_he +#define hv_key(h, i) HvARRAY(h)[i].hent_he +#define hv_val(h, i) HvARRAY(h)[i].hent_he->hent_val + +/* ahe being &HvARRAY(h)[i] */ +#define hv_isequal(ahe, hek) \ + HEK_HASH(hek) == ahe->hent_hash \ + && HeKLEN(ahe->hent_he) == HEK_LEN(hek) \ + && memEQ(HeKEY(ahe->hent_he), HEK_KEY(hek), HEK_LEN(hek)) + +#define hv_foreach(h, kvar, vvar, code) \ + { U32 __i; \ + for (__i = hv_begin(h); __i != hv_end(h); ++__i) { \ + if (hv_empty(h,__i)) continue; \ + (kvar) = hv_key(h,__i); \ + (vvar) = hv_val(h,__i); \ + code; \ + } \ + } +#define hv_foreach_value(h, vvar, code) \ + { U32 __i; \ + for (__i = hv_begin(h); __i != hv_end(h); ++__i) { \ + if (hv_empty(h,__i)) continue; \ + (vvar) = hv_val(h,__i); \ + code; \ + } \ + } + /* * ex: set ts=8 sts=4 sw=4 et: */ diff --git a/hv_func.h b/hv_func.h index 8bec52d758c..c360b03acd5 100644 --- a/hv_func.h +++ b/hv_func.h @@ -11,132 +11,18 @@ #ifndef PERL_SEEN_HV_FUNC_H /* compile once */ #define PERL_SEEN_HV_FUNC_H -/* use shorter names */ -#ifdef PERL_HASH_FUNC_ONE_AT_A_TIME -#define PERL_HASH_FUNC_OAAT -#endif -#ifdef PERL_HASH_FUNC_ONE_AT_A_TIME_HARD -#define PERL_HASH_FUNC_OAAT_HARD -#endif -#ifdef PERL_HASH_FUNC_MURMUR_HASH_64A -#define PERL_HASH_FUNC_MURMUR64A -#endif -#ifdef PERL_HASH_FUNC_MURMUR_HASH_64B -#define PERL_HASH_FUNC_MURMUR64B -#endif - #ifdef HAS_QUAD #define CAN64BITHASH #endif -#if !( defined(PERL_HASH_FUNC_SIPHASH) \ - || defined(PERL_HASH_FUNC_SDBM) \ - || defined(PERL_HASH_FUNC_DJB2) \ - || defined(PERL_HASH_FUNC_SUPERFAST) \ - || defined(PERL_HASH_FUNC_MURMUR3) \ - || defined(PERL_HASH_FUNC_OAAT) \ - || defined(PERL_HASH_FUNC_OAAT_HARD) \ - || defined(PERL_HASH_FUNC_MURMUR64A) \ - || defined(PERL_HASH_FUNC_MURMUR64B) \ - || defined(PERL_HASH_FUNC_FNV1A) \ - || defined(PERL_HASH_FUNC_FNV1A_YOSHIMITSUTRIAD) \ - || defined(PERL_HASH_FUNC_CRC32) \ - || defined(PERL_HASH_FUNC_METRO64CRC) \ - || defined(PERL_HASH_FUNC_METRO64) \ - || defined(PERL_HASH_FUNC_SPOOKY32) \ - || defined(PERL_HASH_FUNC_FARMHASH64) \ - ) -/* FNV1A and CRC32 are the fastest, - SPOOKY32, METRO64CRC and MURMUR3 the fastest of the stable ones. - See https://github.com/rurban/smhasher#smhasher - and https://github.com/rurban/perl-hash-stats - */ -# ifdef USE_CPERL -# define PERL_HASH_FUNC_FNV1A -# else -# ifdef CAN64BITHASH -# define PERL_HASH_FUNC_HYBRID_OAATHU_SIPHASH13 -# else -# define PERL_HASH_FUNC_ONE_AT_A_TIME_HARD -# endif -# endif -#endif - -#if defined(PERL_HASH_FUNC_SIPHASH) -# define PERL_HASH_FUNC "SIPHASH_2_4" -# define PERL_HASH_SEED_BYTES 16 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_siphash_2_4((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_SUPERFAST) -# define PERL_HASH_FUNC "SUPERFAST" -# define PERL_HASH_SEED_BYTES 4 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_superfast((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_MURMUR3) -# define PERL_HASH_FUNC "MURMUR3" -# define PERL_HASH_SEED_BYTES 4 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur3((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_DJB2) -# define PERL_HASH_FUNC "DJB2" -# define PERL_HASH_SEED_BYTES 4 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_djb2((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_SDBM) -# define PERL_HASH_FUNC "SDBM" -# define PERL_HASH_SEED_BYTES 4 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_sdbm((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_OAAT_HARD) -# define PERL_HASH_FUNC "ONE_AT_A_TIME_HARD" -# define PERL_HASH_SEED_BYTES 8 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time_hard((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_OAAT) -# define PERL_HASH_FUNC "ONE_AT_A_TIME" -# define PERL_HASH_SEED_BYTES 4 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_one_at_a_time((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_MURMUR64A) -# define PERL_HASH_FUNC "MURMUR_HASH_64A" -# define PERL_HASH_SEED_BYTES 8 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur_hash_64a((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_MURMUR64B) -# define PERL_HASH_FUNC "MURMUR_HASH_64B" -# define PERL_HASH_SEED_BYTES 8 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_murmur_hash_64b((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_FNV1A) -# define PERL_HASH_FUNC "FNV1A" -# define PERL_HASH_SEED_BYTES 4 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_fnv1a((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_FNV1A_YOSHIMITSUTRIAD) -# define PERL_HASH_FUNC "FNV1A_YoshimitsuTRIAD" -# define PERL_HASH_SEED_BYTES 4 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_fnv1a_yt((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_CRC32) -# define PERL_HASH_FUNC "CRC32" -# define PERL_HASH_SEED_BYTES 4 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_crc32((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_METRO64CRC) -# define PERL_HASH_FUNC "METRO64CRC" -# define PERL_HASH_SEED_BYTES 8 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_metro64crc((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_METRO64) -# define PERL_HASH_FUNC "METRO64" -# define PERL_HASH_SEED_BYTES 8 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_metro64((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_SPOOKY32) -# define PERL_HASH_FUNC "SPOOKY32" -# define PERL_HASH_SEED_BYTES 16 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_spooky32((seed),(U8*)(str),(len)) -#elif defined(PERL_HASH_FUNC_FARMHASH64) -# define PERL_HASH_FUNC "FARMHASH64" -# define PERL_HASH_SEED_BYTES 8 -# define PERL_HASH_WITH_SEED(seed,hash,str,len) (hash)= S_perl_hash_farmhash64((seed),(U8*)(str),(len)) -#endif +/* double hashing with those 2 */ +#define PERL_HASH_FUNC_FNV1A +#define PERL_HASH_FUNC_DJB2 -#ifndef PERL_HASH_WITH_SEED -#error "No hash function defined!" -#endif -#ifndef PERL_HASH_SEED_BYTES -#error "PERL_HASH_SEED_BYTES not defined" -#endif -#ifndef PERL_HASH_FUNC -#error "PERL_HASH_FUNC not defined" -#endif +#define PERL_HASH_FUNC "FNV1A" +#define PERL_HASH_SEED_BYTES 4 +#define PERL_HASH_WITH_SEED(seed,hash,str,len) \ + (hash)= S_perl_hash_fnv1a((seed),(U8*)(str),(len)) #ifndef PERL_HASH_SEED # if defined(USE_HASH_SEED) @@ -156,1051 +42,42 @@ #include -/*----------------------------------------------------------------------------- - * Endianess, misalignment capabilities and util macros - * - * The following 3 macros are defined in this section. The other macros defined - * are only needed to help derive these 3. - * - * U8TO32_LE(x) Read a little endian unsigned 32-bit int - * UNALIGNED_SAFE Defined if unaligned access is safe - * ROTL32(x,r) Rotate x left by r bits - */ - -#if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ - || defined(_MSC_VER) || defined (__TURBOC__) -#define U8TO16_LE(d) (*((const U16 *) (d))) -#endif - -#if !defined (U8TO16_LE) -#define U8TO16_LE(d) ((((const U8 *)(d))[1] << 8)\ - +((const U8 *)(d))[0]) -#endif - - -#define UNALIGNED_SAFE 0 -/* Now find best way we can to READ_UINT32 */ -#if (BYTEORDER == 0x1234 || BYTEORDER == 0x12345678) && U32SIZE == 4 - /* CPU endian matches murmurhash algorithm, so read 32-bit word directly */ - #define U8TO32_LE(ptr) (*((const U32*)(ptr))) -#elif BYTEORDER == 0x4321 || BYTEORDER == 0x87654321 - /* TODO: Add additional cases below where a compiler provided bswap32 is available */ - #if defined(__GNUC__) && (__GNUC__>4 || (__GNUC__==4 && __GNUC_MINOR__>=3)) - #define U8TO32_LE(ptr) (__builtin_bswap32(*((U32*)(ptr)))) - #else - /* Without a known fast bswap32 we're just as well off doing this */ - #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) - #define UNALIGNED_SAFE 1 - #endif -#else - /* Unknown endianess so last resort is to read individual bytes */ - #define U8TO32_LE(ptr) (ptr[0]|ptr[1]<<8|ptr[2]<<16|ptr[3]<<24) - /* Since we're not doing word-reads we can skip the messing about with realignment */ - #define UNALIGNED_SAFE 1 -#endif - -#ifdef CAN64BITHASH -#ifndef U64TYPE -/* This probably isn't going to work, but failing with a compiler error due to - lack of uint64_t is no worse than failing right now with an #error. */ -#define U64 uint64_t -#endif -#endif - -/* Find best way to ROTL32/ROTL64 */ -#if defined(_MSC_VER) - #include /* Microsoft put _rotl declaration in here */ - #define ROTL32(x,r) _rotl(x,r) - #ifdef CAN64BITHASH - #define ROTL64(x,r) _rotl64(x,r) - #endif -#else - /* gcc recognises this code and generates a rotate instruction for CPUs with one */ - #define ROTL32(x,r) (((U32)x << r) | ((U32)x >> (32 - r))) - #ifdef CAN64BITHASH - #define ROTL64(x,r) (((U64)x << r) | ((U64)x >> (64 - r))) - #endif -#endif - -#ifdef UV_IS_QUAD -#define ROTL_UV(x,r) ROTL64(x,r) -#else -#define ROTL_UV(x,r) ROTL32(x,r) -#endif - -/* This is SipHash by Jean-Philippe Aumasson and Daniel J. Bernstein. - * The authors claim it is relatively secure compared to the - * alternatives. But's by far the slowest of all suitable hash functions - * and its security relies in the non-exposability of the seed. - * With the seed it's still trivially brute-forcable. - * See http://perl11.org/blog/seed.html - * - * It is 64 bit only. - */ +/* From khash: + Use quadratic probing. When the capacity is power of 2, stepping function + i*(i+1)/2 guarantees to traverse each bucket. It is better than double + hashing on cache performance and is more robust than linear probing. -#if defined(PERL_HASH_FUNC_SIPHASH) -#ifdef CAN64BITHASH - -#define U8TO64_LE(p) \ - (((U64)((p)[0]) ) | \ - ((U64)((p)[1]) << 8) | \ - ((U64)((p)[2]) << 16) | \ - ((U64)((p)[3]) << 24) | \ - ((U64)((p)[4]) << 32) | \ - ((U64)((p)[5]) << 40) | \ - ((U64)((p)[6]) << 48) | \ - ((U64)((p)[7]) << 56)) - -#define SIPROUND \ - do { \ - v0 += v1; v1=ROTL64(v1,13); v1 ^= v0; v0=ROTL64(v0,32); \ - v2 += v3; v3=ROTL64(v3,16); v3 ^= v2; \ - v0 += v3; v3=ROTL64(v3,21); v3 ^= v0; \ - v2 += v1; v1=ROTL64(v1,17); v1 ^= v2; v2=ROTL64(v2,32); \ - } while(0) - -/* SipHash-2-4 */ - -PERL_STATIC_INLINE U32 -S_perl_hash_siphash_2_4(const unsigned char * const seed, const unsigned char *in, const STRLEN inlen) { - /* "somepseudorandomlygeneratedbytes" */ - U64 v0 = UINT64_C(0x736f6d6570736575); - U64 v1 = UINT64_C(0x646f72616e646f6d); - U64 v2 = UINT64_C(0x6c7967656e657261); - U64 v3 = UINT64_C(0x7465646279746573); - - U64 b; - U64 k0 = ((const U64*)seed)[0]; - U64 k1 = ((const U64*)seed)[1]; - U64 m; - const int left = inlen & 7; - const U8 *end = in + inlen - left; - - b = ( ( U64 )(inlen) ) << 56; - v3 ^= k1; - v2 ^= k0; - v1 ^= k1; - v0 ^= k0; - - for ( ; in != end; in += 8 ) - { - m = U8TO64_LE( in ); - v3 ^= m; - SIPROUND; - SIPROUND; - v0 ^= m; - } - - switch( left ) - { - case 7: b |= ( ( U64 )in[ 6] ) << 48; - case 6: b |= ( ( U64 )in[ 5] ) << 40; - case 5: b |= ( ( U64 )in[ 4] ) << 32; - case 4: b |= ( ( U64 )in[ 3] ) << 24; - case 3: b |= ( ( U64 )in[ 2] ) << 16; - case 2: b |= ( ( U64 )in[ 1] ) << 8; - case 1: b |= ( ( U64 )in[ 0] ); break; - case 0: break; - } - - v3 ^= b; - SIPROUND; - SIPROUND; - v0 ^= b; - - v2 ^= 0xff; - SIPROUND; - SIPROUND; - SIPROUND; - SIPROUND; - b = v0 ^ v1 ^ v2 ^ v3; - return (U32)(b & U32_MAX); -} -#endif /* defined(CAN64BITHASH) */ -#endif /* defined(PERL_HASH_FUNC_SIPHASH) */ - -/* FYI: This is the "Super-Fast" algorithm mentioned by Bob Jenkins in - * (http://burtleburtle.net/bob/hash/doobs.html) - * It is by Paul Hsieh (c) 2004 and is analysed here - * http://www.azillionmonkeys.com/qed/hash.html - * license terms are here: - * http://www.azillionmonkeys.com/qed/weblicense.html - */ - -#if defined(PERL_HASH_FUNC_SUPERFAST) -PERL_STATIC_INLINE U32 -S_perl_hash_superfast(const unsigned char * const seed, const unsigned char *str, STRLEN len) { - U32 hash = *((const U32*)seed) + (U32)len; - U32 tmp; - int rem= len & 3; - len >>= 2; - - assert(hash); - for (;len > 0; len--) { - hash += U8TO16_LE (str); - tmp = (U8TO16_LE (str+2) << 11) ^ hash; - hash = (hash << 16) ^ tmp; - str += 2 * sizeof (U16); - hash += hash >> 11; - } - - /* Handle end cases */ - switch (rem) { \ - case 3: hash += U8TO16_LE (str); - hash ^= hash << 16; - hash ^= str[sizeof (U16)] << 18; - hash += hash >> 11; - break; - case 2: hash += U8TO16_LE (str); - hash ^= hash << 11; - hash += hash >> 17; - break; - case 1: hash += *str; - hash ^= hash << 10; - hash += hash >> 1; - } - /* Force "avalanching" of final 127 bits */ - hash ^= hash << 3; - hash += hash >> 5; - hash ^= hash << 4; - hash += hash >> 17; - hash ^= hash << 25; - return (hash + (hash >> 6)); -} -#endif /* defined(PERL_HASH_FUNC_SUPERFAST) */ - -/*----------------------------------------------------------------------------- - * MurmurHash3 was written by Austin Appleby, and is placed in the public - * domain. - * - * This implementation was originally written by Shane Day, and is also public domain, - * and was modified to function as a macro similar to other perl hash functions by - * Yves Orton. - * - * This is a portable ANSI C implementation of MurmurHash3_x86_32 (Murmur3A) - * with support for progressive processing. - * - * If you want to understand the MurmurHash algorithm you would be much better - * off reading the original source. Just point your browser at: - * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp - * - * How does it work? - * - * We can only process entire 32 bit chunks of input, except for the very end - * that may be shorter. - * - * To handle endianess I simply use a macro that reads a U32 and define - * that macro to be a direct read on little endian machines, a read and swap - * on big endian machines, or a byte-by-byte read if the endianess is unknown. - */ + In theory, double hashing should be more robust than quadratic probing. + However, my implementation is probably not for large hash tables, because + the second hash function is closely tied to the first hash function, + which reduce the effectiveness of double hashing. -#if defined(PERL_HASH_FUNC_MURMUR3) -/*----------------------------------------------------------------------------- - * Core murmurhash algorithm macros */ - -#define MURMUR_C1 (0xcc9e2d51) -#define MURMUR_C2 (0x1b873593) -#define MURMUR_C3 (0xe6546b64) -#define MURMUR_C4 (0x85ebca6b) -#define MURMUR_C5 (0xc2b2ae35) - -/* This is the main processing body of the algorithm. It operates - * on each full 32-bits of input. */ -#define MURMUR_DOBLOCK(h1, k1) STMT_START { \ - k1 *= MURMUR_C1; \ - k1 = ROTL32(k1,15); \ - k1 *= MURMUR_C2; \ - \ - h1 ^= k1; \ - h1 = ROTL32(h1,13); \ - h1 = h1 * 5 + MURMUR_C3; \ -} STMT_END - - -/* Append unaligned bytes to carry, forcing hash churn if we have 4 bytes */ -/* cnt=bytes to process, h1=name of h1 var, c=carry, n=bytes in c, ptr/len=payload */ -#define MURMUR_DOBYTES(cnt, h1, c, n, ptr, len) STMT_START { \ - int MURMUR_DOBYTES_i = cnt; \ - while(MURMUR_DOBYTES_i--) { \ - c = c>>8 | *ptr++<<24; \ - n++; len--; \ - if(n==4) { \ - MURMUR_DOBLOCK(h1, c); \ - n = 0; \ - } \ - } \ -} STMT_END - - -/* now we create the hash function */ -PERL_STATIC_INLINE U32 -S_perl_hash_murmur3(const unsigned char * const seed, const unsigned char *ptr, STRLEN len) { - U32 h1 = *((const U32*)seed); - U32 k1; - U32 carry = 0; - - const unsigned char *end; - int bytes_in_carry = 0; /* bytes in carry */ - I32 total_length= (I32)len; - -#if UNALIGNED_SAFE - /* Handle carry: commented out as its only used in incremental mode - it never fires for us - int i = (4-n) & 3; - if(i && i <= len) { - MURMUR_DOBYTES(i, h1, carry, bytes_in_carry, ptr, len); - } - */ - - /* This CPU handles unaligned word access */ - /* Process 32-bit chunks */ - end = ptr + len/4*4; - for( ; ptr < end ; ptr+=4) { - k1 = U8TO32_LE(ptr); - MURMUR_DOBLOCK(h1, k1); - } -#else - /* This CPU does not handle unaligned word access */ - - /* Consume enough so that the next data byte is word aligned */ - STRLEN i = -PTR2IV(ptr) & 3; - if(i && i <= len) { - MURMUR_DOBYTES((int)i, h1, carry, bytes_in_carry, ptr, len); - } - - /* We're now aligned. Process in aligned blocks. Specialise for each possible carry count */ - end = ptr + len/4*4; - switch(bytes_in_carry) { /* how many bytes in carry */ - case 0: /* c=[----] w=[3210] b=[3210]=w c'=[----] */ - for( ; ptr < end ; ptr+=4) { - k1 = U8TO32_LE(ptr); - MURMUR_DOBLOCK(h1, k1); - } - break; - case 1: /* c=[0---] w=[4321] b=[3210]=c>>24|w<<8 c'=[4---] */ - for( ; ptr < end ; ptr+=4) { - k1 = carry>>24; - carry = U8TO32_LE(ptr); - k1 |= carry<<8; - MURMUR_DOBLOCK(h1, k1); - } - break; - case 2: /* c=[10--] w=[5432] b=[3210]=c>>16|w<<16 c'=[54--] */ - for( ; ptr < end ; ptr+=4) { - k1 = carry>>16; - carry = U8TO32_LE(ptr); - k1 |= carry<<16; - MURMUR_DOBLOCK(h1, k1); - } - break; - case 3: /* c=[210-] w=[6543] b=[3210]=c>>8|w<<24 c'=[654-] */ - for( ; ptr < end ; ptr+=4) { - k1 = carry>>8; - carry = U8TO32_LE(ptr); - k1 |= carry<<24; - MURMUR_DOBLOCK(h1, k1); - } - } -#endif - /* Advance over whole 32-bit chunks, possibly leaving 1..3 bytes */ - len -= len/4*4; - - /* Append any remaining bytes into carry */ - MURMUR_DOBYTES((int)len, h1, carry, bytes_in_carry, ptr, len); - - if (bytes_in_carry) { - k1 = carry >> ( 4 - bytes_in_carry ) * 8; - k1 *= MURMUR_C1; - k1 = ROTL32(k1,15); - k1 *= MURMUR_C2; - h1 ^= k1; - } - h1 ^= total_length; - - /* fmix */ - h1 ^= h1 >> 16; - h1 *= MURMUR_C4; - h1 ^= h1 >> 13; - h1 *= MURMUR_C5; - h1 ^= h1 >> 16; - return h1; -} -#endif /* defined(PERL_HASH_FUNC_MURMUR3) */ + Reference: http://research.cs.vt.edu/AVresearch/hashing/quadratic.php +*/ -#if defined(PERL_HASH_FUNC_DJB2) PERL_STATIC_INLINE U32 -S_perl_hash_djb2(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { +S_perl_hash_fnv1a(const unsigned char * const seed, + const unsigned char *str, const STRLEN len) { const unsigned char * const end = (const unsigned char *)str + len; - U32 hash = *((const U32*)seed) + (U32)len; - assert(hash); + U32 hash = 0x811C9DC5 + *((U32*)seed); /* maybe also get rid of seed */ while (str < end) { - hash = ((hash << 5) + hash) + *str; - str++; + hash ^= *str++; + hash *= 16777619; } return hash; } -#endif /* defined(PERL_HASH_FUNC_DJB2) */ -#if defined(PERL_HASH_FUNC_SDBM) PERL_STATIC_INLINE U32 -S_perl_hash_sdbm(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { +S_perl_hash_djb2(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { const unsigned char * const end = (const unsigned char *)str + len; U32 hash = *((const U32*)seed) + (U32)len; assert(hash); while (str < end) { - hash = (hash << 6) + (hash << 16) - hash + *str; + hash = ((hash << 5) + hash) + *str; str++; } return hash; } -#endif /* defined(PERL_HASH_FUNC_SDBM) */ - -/* - ONE_AT_A_TIME_HARD is the 5.17+ recommend ONE_AT_A_TIME algorithm - * - ONE_AT_A_TIME is a 5.17+ tweak of ONE_AT_A_TIME_OLD to - * prevent strings of only \0 but different lengths from colliding - * - * Security-wise, from best to worst, - * ONE_AT_A_TIME_HARD > ONE_AT_A_TIME > ONE_AT_A_TIME_OLD - * There is a big drop-off in security between ONE_AT_A_TIME_HARD and - * ONE_AT_A_TIME - * */ - -/* This is the "One-at-a-Time" algorithm by Bob Jenkins - * from requirements by Colin Plumb. - * (http://burtleburtle.net/bob/hash/doobs.html) - * With seed/len tweak. - * */ -#if defined(PERL_HASH_FUNC_ONE_AT_A_TIME) -PERL_STATIC_INLINE U32 -S_perl_hash_one_at_a_time(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { - const unsigned char * const end = (const unsigned char *)str + len; - U32 hash = *((const U32*)seed) + (U32)len; - assert(hash); - while (str < end) { - hash += *str++; - hash += (hash << 10); - hash ^= (hash >> 6); - } - hash += (hash << 3); - hash ^= (hash >> 11); - return (hash + (hash << 15)); -} -#endif /* defined(PERL_HASH_FUNC_ONE_AT_A_TIME) */ - -/* Derived from "One-at-a-Time" algorithm by Bob Jenkins */ -#if defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) -PERL_STATIC_INLINE U32 -S_perl_hash_one_at_a_time_hard(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { - const unsigned char * const end = (const unsigned char *)str + len; - U32 hash = *((const U32*)seed) + (U32)len; - assert(hash); - - while (str < end) { - hash += (hash << 10); - hash ^= (hash >> 6); - hash += *str++; - } - - hash += (hash << 10); - hash ^= (hash >> 6); - hash += seed[4]; - - hash += (hash << 10); - hash ^= (hash >> 6); - hash += seed[5]; - - hash += (hash << 10); - hash ^= (hash >> 6); - hash += seed[6]; - - hash += (hash << 10); - hash ^= (hash >> 6); - hash += seed[7]; - - hash += (hash << 10); - hash ^= (hash >> 6); - - hash += (hash << 3); - hash ^= (hash >> 11); - return (hash + (hash << 15)); -} -#endif /* defined(PERL_HASH_FUNC_ONE_AT_A_TIME_HARD) */ - -#ifdef CAN64BITHASH - -#ifdef PERL_HASH_FUNC_MURMUR64A -/* This code is from Austin Appleby and is in the public domain. - Altered by Yves Orton to match Perl's hash interface, and to - return a 32 bit hash. - - Note uses unaligned 64 bit loads - will NOT work on machines with - strict alignment requirements. - - Also this code may not be suitable for big-endian machines. -*/ - -/* a 64 bit hash where we only use the low 32 bits */ -PERL_STATIC_INLINE U32 -S_perl_hash_murmur_hash_64a (const unsigned char * const seed, const unsigned char *str, const STRLEN len) -{ - const U64 m = UINT64_C(0xc6a4a7935bd1e995); - const int r = 47; - U64 h = *((const U64*)seed) ^ len; - const U64 * data = (const U64 *)str; - const U64 * end = data + (len/8); - const unsigned char * data2; - - while(data != end) - { - U64 k = *data++; - - k *= m; - k ^= k >> r; - k *= m; - - h ^= k; - h *= m; - } - - data2 = (const unsigned char *)data; - - switch(len & 7) - { - case 7: h ^= (U64)(data2[6]) << 48; /* fallthrough */ - case 6: h ^= (U64)(data2[5]) << 40; /* fallthrough */ - case 5: h ^= (U64)(data2[4]) << 32; /* fallthrough */ - case 4: h ^= (U64)(data2[3]) << 24; /* fallthrough */ - case 3: h ^= (U64)(data2[2]) << 16; /* fallthrough */ - case 2: h ^= (U64)(data2[1]) << 8; /* fallthrough */ - case 1: h ^= (U64)(data2[0]); /* fallthrough */ - h *= m; - }; - - h ^= h >> r; - h *= m; - h ^= h >> r; - - /* was: return h; */ - return h & 0xFFFFFFFF; -} - -#endif - -#ifdef PERL_HASH_FUNC_MURMUR64B -/* This code is from Austin Appleby and is in the public domain. - Altered by Yves Orton to match Perl's hash interface and return - a 32 bit value - - Note uses unaligned 32 bit loads - will NOT work on machines with - strict alignment requirements. - - Also this code may not be suitable for big-endian machines. -*/ - -/* a 64-bit hash for 32-bit platforms where we only use the low 32 bits */ -PERL_STATIC_INLINE U32 -S_perl_hash_murmur_hash_64b (const unsigned char * const seed, const unsigned char *str, STRLEN len) -{ - const U32 m = 0x5bd1e995; - const int r = 24; - - U32 h1 = ((U32 *)seed)[0] ^ len; - U32 h2 = ((U32 *)seed)[1]; - - const U32 * data = (const U32 *)str; - - while(len >= 8) - { - U32 k1, k2; - k1 = *data++; - k1 *= m; k1 ^= k1 >> r; k1 *= m; - h1 *= m; h1 ^= k1; - len -= 4; - - k2 = *data++; - k2 *= m; k2 ^= k2 >> r; k2 *= m; - h2 *= m; h2 ^= k2; - len -= 4; - } - - if(len >= 4) - { - U32 k1 = *data++; - k1 *= m; k1 ^= k1 >> r; k1 *= m; - h1 *= m; h1 ^= k1; - len -= 4; - } - - switch(len) - { - case 3: h2 ^= ((unsigned char*)data)[2] << 16; /* fallthrough */ - case 2: h2 ^= ((unsigned char*)data)[1] << 8; /* fallthrough */ - case 1: h2 ^= ((unsigned char*)data)[0]; /* fallthrough */ - h2 *= m; - }; - - h1 ^= h2 >> 18; h1 *= m; - h2 ^= h1 >> 22; h2 *= m; - /* - The following code has been removed as it is unused - when only the low 32 bits are used. -- Yves - - h1 ^= h2 >> 17; h1 *= m; - - U64 h = h1; - - h = (h << 32) | h2; - */ - - return h2; -} -#endif -#endif /* defined(CAN64BITHASH) */ - -#ifdef PERL_HASH_FUNC_FNV1A -/* schmorp: without any experiments, fnv1a should be faster than - one-at-a-time, but should be easily beaten by murmur hash (for long - data), which would probably be preferable if I had more time - to add a portable version of it. */ -PERL_STATIC_INLINE U32 -S_perl_hash_fnv1a(const unsigned char * const seed, const unsigned char *str, const STRLEN len) { - const unsigned char * const end = (const unsigned char *)str + len; - U32 hash = 0x811C9DC5 + *((U32*)seed); /* maybe also get rid of seed */ - while (str < end) { - hash ^= *str++; - hash *= 16777619; - } - return hash; -} -#endif - -#ifdef PERL_HASH_FUNC_FNV1A_YOSHIMITSUTRIAD -/* faster unrolled fnv1a variant by sanmayce. http://www.sanmayce.com/Fastest_Hash/ - fixed for some same basic security problems. */ -PERL_STATIC_INLINE U32 -S_perl_hash_fnv1a_yt(const unsigned char * const s, const unsigned char *str, const STRLEN l) { - const U8 *p = (const U8 *)str; - STRLEN len = (STRLEN)l; - const U32 seed = *(U32*)s; - const U32 PRIME = 709607; - U32 hash32A = seed ^ 2166136261; - U32 hash32B = 2166136261 + len; - U32 hash32C = 2166136261; - - for (; len >= 3 * 2 * sizeof(U32); len -= 3 * 2 * sizeof(U32), p += 3 * 2 * sizeof(U32)) { - hash32A = (hash32A ^ (ROTL32(*(U32 *) (p + 0), 5) ^ *(U32 *) (p + 4))) * PRIME; - hash32B = (hash32B ^ (ROTL32(*(U32 *) (p + 8), 5) ^ *(U32 *) (p + 12))) * PRIME; - hash32C = (hash32C ^ (ROTL32(*(U32 *) (p + 16), 5) ^ *(U32 *) (p + 20))) * PRIME; - } - if (p != str) { - hash32A = (hash32A ^ ROTL32(hash32C, 5)) * PRIME; - } - /* Cases 0..31 */ - if (len & 4 * sizeof(U32)) { - hash32A = (hash32A ^ (ROTL32(*(U32 *) (p + 0), 5) ^ *(U32 *) (p + 4))) * PRIME; - hash32B = (hash32B ^ (ROTL32(*(U32 *) (p + 8), 5) ^ *(U32 *) (p + 12))) * PRIME; - p += 8 * sizeof(U16); - } - /* Cases 0..15 */ - if (len & 2 * sizeof(U32)) { - hash32A = (hash32A ^ *(U32 *) (p + 0)) * PRIME; - hash32B = (hash32B ^ *(U32 *) (p + 4)) * PRIME; - p += 4 * sizeof(U16); - } - /* Cases 0..7 */ - if (len & sizeof(U32)) { - hash32A = (hash32A ^ *(U16 *) (p + 0)) * PRIME; - hash32B = (hash32B ^ *(U16 *) (p + 2)) * PRIME; - p += 2 * sizeof(U16); - } - /* Cases 0..3 */ - if (len & sizeof(U16)) { - hash32A = (hash32A ^ *(U16 *) p) * PRIME; - p += sizeof(U16); - } - if (len & 1) - hash32A = (hash32A ^ *p) * PRIME; - - hash32A = (hash32A ^ ROTL32(hash32B, 5)) * PRIME; - return hash32A ^ (hash32A >> 16); -} -#endif - -#if defined(PERL_HASH_FUNC_CRC32) && (defined(__SSE4_2__) || defined(AARCH64_FL_CRC)) -#include - -/* Byte-boundary alignment issues */ -#define ALIGN_SIZE 0x08UL -#define ALIGN_MASK (ALIGN_SIZE - 1) -#define CALC_CRC(op, crc, type, buf, len) \ - do { \ - for (; (len) >= sizeof (type); (len) -= sizeof(type), buf += sizeof (type)) { \ - (crc) = op((crc), *(type *) (buf)); \ - } \ - } while(0) - -/* iSCSCI CRC32-C is using the HW intrinsics. By far the fastest, and - measured as one of the best hash functions, but is however very easy to break, - and has low qualities in smhasher. - See https://github.com/rurban/smhasher -*/ -PERL_STATIC_INLINE U32 -S_perl_hash_crc32(const unsigned char * const seed, const unsigned char *str, STRLEN len) { - const char* buf = (const char*)str; - U32 hash = *((U32*)seed); /* tested nok + len in variant .1 much higher collision costs */ - - /* Align the input to the word boundary */ - for (; (len > 0) && ((size_t)buf & ALIGN_MASK); len--, buf++) { - hash = _mm_crc32_u8(hash, *buf); - } - -#ifdef __x86_64__ - CALC_CRC(_mm_crc32_u64, hash, U64TYPE, buf, len); -#endif - CALC_CRC(_mm_crc32_u32, hash, U32, buf, len); - CALC_CRC(_mm_crc32_u16, hash, U16, buf, len); - CALC_CRC(_mm_crc32_u8, hash, U8, buf, len); - - return hash; -} -#endif - -#if defined(CAN64BITHASH) && (defined(PERL_HASH_FUNC_METRO64CRC) || defined(PERL_HASH_FUNC_METRO64)) -/* rotate right idiom recognized by compiler*/ -inline static U64TYPE rotate_right(U64TYPE v, unsigned k) { - return (v >> k) | (v << (64 - k)); -} -// unaligned reads, fast and safe on Nehalem and later microarchitectures -inline static U64TYPE read_u64(const void * const ptr) { - return *(U64TYPE*)ptr; -} -inline static U64TYPE read_u32(const void * const ptr) { - return (U64TYPE)(*(U32*)ptr); -} -inline static U64TYPE read_u16(const void * const ptr) { - return (U64TYPE)(*(U16*)ptr); -} -inline static U64TYPE read_u8 (const void * const ptr) { - return (U64TYPE)(*(U8*)ptr); -} -#endif - -/* metrohash is also optionally using the CRC32 HW intrinsics, - is almost as fast as CRC32, one of the best hash functions - and relatively secure. - cfarmhash for 32 bit would be a bit better though. */ -#if defined(PERL_HASH_FUNC_METRO64CRC) && \ - (defined(__SSE4_2__) || defined(AARCH64_FL_CRC)) -#include - -/* The MIT License (MIT) - Copyright (c) 2015 J. Andrew Rogers - Copyright (c) 2015 cPanel Inc. - See https://github.com/rurban/smhasher - */ -PERL_STATIC_INLINE U32 -S_perl_hash_metro64crc(const unsigned char * const seed, const unsigned char *str, STRLEN len) { - static const U64TYPE k0 = 0xC83A91E1; - static const U64TYPE k1 = 0x8648DBDB; - static const U64TYPE k2 = 0x7BDEC03B; - static const U64TYPE k3 = 0x2F5870A5; - - const U8 * ptr = (const U8*)(str); - const U8 * const end = ptr + len; - - U64TYPE hash = ((*(U64TYPE*)seed + k2) * k0) + len; - - if (len >= 32) { - U64TYPE v[4]; - v[0] = hash; - v[1] = hash; - v[2] = hash; - v[3] = hash; - - do { - v[0] ^= _mm_crc32_u64(v[0], read_u64(ptr) * k0); ptr += 8; - v[1] ^= _mm_crc32_u64(v[1], read_u64(ptr) * k1); ptr += 8; - v[2] ^= _mm_crc32_u64(v[2], read_u64(ptr) * k2); ptr += 8; - v[3] ^= _mm_crc32_u64(v[3], read_u64(ptr) * k3); ptr += 8; - } while (ptr <= (end - 32)); - - v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; - v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; - v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; - v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; - hash += v[0] ^ v[1]; - } - if ((end - ptr) >= 16) { - U64TYPE v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1; - U64TYPE v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2; - v0 ^= rotate_right(v0 * k0, 35) + v1; - v1 ^= rotate_right(v1 * k3, 35) + v0; - hash += v1; - } - if ((end - ptr) >= 8) { - hash += _mm_crc32_u64(hash, read_u64(ptr)); ptr += 8; - hash ^= rotate_right(hash, 33) * k1; - } - if ((end - ptr) >= 4) { - hash ^= _mm_crc32_u64(hash, read_u32(ptr)); ptr += 4; - hash ^= rotate_right(hash, 15) * k1; - } - if ((end - ptr) >= 2) { - hash ^= _mm_crc32_u64(hash, read_u16(ptr)); ptr += 2; - hash ^= rotate_right(hash, 13) * k1; - } - if ((end - ptr) >= 1) { - hash ^= _mm_crc32_u64(hash, read_u8(ptr)); - hash ^= rotate_right(hash, 25) * k1; - } - hash ^= rotate_right(hash, 33); - hash *= k0; - hash ^= rotate_right(hash, 33); - - return (U32)hash; -} -#endif - -#if defined(PERL_HASH_FUNC_METRO64) && defined(CAN64BITHASH) -PERL_STATIC_INLINE U32 -S_perl_hash_metro64(const unsigned char * const seed, const unsigned char *str, STRLEN len) { - static const U64TYPE k0 = 0xC83A91E1; - static const U64TYPE k1 = 0x8648DBDB; - static const U64TYPE k2 = 0x7BDEC03B; - static const U64TYPE k3 = 0x2F5870A5; - - const U8 * ptr = (const U8*)(str); - const U8 * const end = ptr + len; - - U64TYPE hash = ((*(U64TYPE*)seed + k2) * k0) + len; - - if (len >= 32) { - U64TYPE v[4]; - v[0] = hash; - v[1] = hash; - v[2] = hash; - v[3] = hash; - - do { - v[0] += read_u64(ptr) * k0; ptr += 8; v[0] = rotate_right(v[0],29) + v[2]; - v[1] += read_u64(ptr) * k1; ptr += 8; v[1] = rotate_right(v[1],29) + v[3]; - v[2] += read_u64(ptr) * k2; ptr += 8; v[2] = rotate_right(v[2],29) + v[0]; - v[3] += read_u64(ptr) * k3; ptr += 8; v[3] = rotate_right(v[3],29) + v[1]; - } while (ptr <= (end - 32)); - - v[2] ^= rotate_right(((v[0] + v[3]) * k0) + v[1], 33) * k1; - v[3] ^= rotate_right(((v[1] + v[2]) * k1) + v[0], 33) * k0; - v[0] ^= rotate_right(((v[0] + v[2]) * k0) + v[3], 33) * k1; - v[1] ^= rotate_right(((v[1] + v[3]) * k1) + v[2], 33) * k0; - hash += v[0] ^ v[1]; - } - if ((end - ptr) >= 16) { - U64TYPE v0 = hash + (read_u64(ptr) * k0); ptr += 8; v0 = rotate_right(v0,33) * k1; - U64TYPE v1 = hash + (read_u64(ptr) * k1); ptr += 8; v1 = rotate_right(v1,33) * k2; - v0 ^= rotate_right(v0 * k0, 35) + v1; - v1 ^= rotate_right(v1 * k3, 35) + v0; - hash += v1; - } - if ((end - ptr) >= 8) { - hash += read_u64(ptr) * k3; ptr += 8; - hash ^= rotate_right(hash, 33) * k1; - } - if ((end - ptr) >= 4) { - hash += read_u32(ptr) * k3; ptr += 4; - hash ^= rotate_right(hash, 15) * k1; - } - if ((end - ptr) >= 2) { - hash += read_u16(ptr) * k3; ptr += 2; - hash ^= rotate_right(hash, 13) * k1; - } - if ((end - ptr) >= 1) { - hash += read_u8 (ptr) * k3; - hash ^= rotate_right(hash, 25) * k1; - } - - hash ^= rotate_right(hash, 33); - hash *= k0; - hash ^= rotate_right(hash, 33); - - return (U32)hash; -} -#endif - -#if defined(PERL_HASH_FUNC_SPOOKY32) && defined(CAN64BITHASH) - -/* Spooky Hash - A 128-bit noncryptographic hash, for checksums and table lookup - By Bob Jenkins. Public domain. - Oct 31 2010: published framework, disclaimer ShortHash isn't right - Nov 7 2010: disabled ShortHash - Oct 31 2011: replace End, ShortMix, ShortEnd, enable ShortHash again */ - -/* left rotate a 64-bit value by k bytes */ -PERL_STATIC_INLINE U64TYPE -Rot64(U64TYPE x, int k) { - return (x << k) | (x >> (64 - k)); -} - -/* The goal is for each bit of the input to expand into 128 bits of */ -/* apparent entropy before it is fully overwritten. */ -/* n trials both set and cleared at least m bits of h0 h1 h2 h3 */ -/* n: 2 m: 29 */ -/* n: 3 m: 46 */ -/* n: 4 m: 57 */ -/* n: 5 m: 107 */ -/* n: 6 m: 146 */ -/* n: 7 m: 152 */ -/* when run forwards or backwards */ -/* for all 1-bit and 2-bit diffs */ -/* with diffs defined by either xor or subtraction */ -/* with a base of all zeros plus a counter, or plus another bit, or random */ - -#define ShortMix(h0, h1, h2, h3) \ - h2 = Rot64(h2,50); h2 += h3; h0 ^= h2; \ - h3 = Rot64(h3,52); h3 += h0; h1 ^= h3; \ - h0 = Rot64(h0,30); h0 += h1; h2 ^= h0; \ - h1 = Rot64(h1,41); h1 += h2; h3 ^= h1; \ - h2 = Rot64(h2,54); h2 += h3; h0 ^= h2; \ - h3 = Rot64(h3,48); h3 += h0; h1 ^= h3; \ - h0 = Rot64(h0,38); h0 += h1; h2 ^= h0; \ - h1 = Rot64(h1,37); h1 += h2; h3 ^= h1; \ - h2 = Rot64(h2,62); h2 += h3; h0 ^= h2; \ - h3 = Rot64(h3,34); h3 += h0; h1 ^= h3; \ - h0 = Rot64(h0,5); h0 += h1; h2 ^= h0; \ - h1 = Rot64(h1,36); h1 += h2; h3 ^= h1 - -/* Mix all 4 inputs together so that h0, h1 are a hash of them all. */ - -/* For two inputs differing in just the input bits */ -/* Where "differ" means xor or subtraction */ -/* And the base value is random, or a counting value starting at that bit */ -/* The final result will have each bit of h0, h1 flip */ -/* For every input bit, */ -/* with probability 50 +- .3% (it is probably better than that) */ -/* For every pair of input bits, */ -/* with probability 50 +- .75% (the worst case is approximately that) */ -#define ShortEnd(h0, h1, h2, h3) \ - h3 ^= h2; h2 = Rot64(h2,15); h3 += h2; \ - h0 ^= h3; h3 = Rot64(h3,52); h0 += h3; \ - h1 ^= h0; h0 = Rot64(h0,26); h1 += h0; \ - h2 ^= h1; h1 = Rot64(h1,51); h2 += h1; \ - h3 ^= h2; h2 = Rot64(h2,28); h3 += h2; \ - h0 ^= h3; h3 = Rot64(h3,9); h0 += h3; \ - h1 ^= h0; h0 = Rot64(h0,47); h1 += h0; \ - h2 ^= h1; h1 = Rot64(h1,54); h2 += h1; \ - h3 ^= h2; h2 = Rot64(h2,32); h3 += h2; \ - h0 ^= h3; h3 = Rot64(h3,25); h0 += h3; \ - h1 ^= h0; h0 = Rot64(h0,63); h1 += h0; - -/* sc_const: a constant which: */ -/* * is not zero */ -/* * is odd */ -/* * is a not-very-regular mix of 1's and 0's */ -/* * does not need any other special mathematical properties */ -static const U64TYPE sc_const = 0xdeadbeefdeadbeefULL; - -PERL_STATIC_INLINE U32 -S_perl_hash_spooky32(const unsigned char * const seed, const unsigned char *str, STRLEN len) -{ - U64TYPE *hash1 = (U64TYPE*)seed; - U64TYPE *hash2 = (U64TYPE*)(seed+8); - U64TYPE buf[12]; - union - { - const U8 *p8; - U32 *p32; - U64TYPE *p64; - size_t i; - } u; - - u.p8 = (const U8 *)str; - - if (!UNALIGNED_SAFE && (u.i & 0x7)) - { - memcpy(buf, str, len); - u.p64 = buf; - } - - size_t remainder = len%32; - U64TYPE a = *hash1; - U64TYPE b = *hash2; - U64TYPE c = sc_const; - U64TYPE d = sc_const; - - if (len > 15) - { - const U64TYPE *end = u.p64 + (len/32)*4; - - // handle all complete sets of 32 bytes - for (; u.p64 < end; u.p64 += 4) - { - c += u.p64[0]; - d += u.p64[1]; - ShortMix(a,b,c,d); - a += u.p64[2]; - b += u.p64[3]; - } - - //Handle the case of 16+ remaining bytes. - if (remainder >= 16) - { - c += u.p64[0]; - d += u.p64[1]; - ShortMix(a,b,c,d); - u.p64 += 2; - remainder -= 16; - } - } - - // Handle the last 0..15 bytes, and its len - d = ((U64TYPE)len) << 56; - switch (remainder) - { - case 15: - d += ((U64TYPE)u.p8[14]) << 48; - case 14: - d += ((U64TYPE)u.p8[13]) << 40; - case 13: - d += ((U64TYPE)u.p8[12]) << 32; - case 12: - d += u.p32[2]; - c += u.p64[0]; - break; - case 11: - d += ((U64TYPE)u.p8[10]) << 16; - case 10: - d += ((U64TYPE)u.p8[9]) << 8; - case 9: - d += (U64TYPE)u.p8[8]; - case 8: - c += u.p64[0]; - break; - case 7: - c += ((U64TYPE)u.p8[6]) << 48; - case 6: - c += ((U64TYPE)u.p8[5]) << 40; - case 5: - c += ((U64TYPE)u.p8[4]) << 32; - case 4: - c += u.p32[0]; - break; - case 3: - c += ((U64TYPE)u.p8[2]) << 16; - case 2: - c += ((U64TYPE)u.p8[1]) << 8; - case 1: - c += (U64TYPE)u.p8[0]; - break; - case 0: - c += sc_const; - d += sc_const; - } - ShortEnd(a,b,c,d); - /* *hash1 = a; - *hash2 = b; */ - return (U32)a; -} -#endif - /* legacy - only mod_perl should be doing this. */ #ifdef PERL_HASH_INTERNAL_ACCESS diff --git a/mro_core.c b/mro_core.c index a05b7f12e16..5bb5b0657e9 100644 --- a/mro_core.c +++ b/mro_core.c @@ -1103,7 +1103,7 @@ S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, for (; riter <= xhv->xhv_max; riter++) { entry = AHe(HvARRAY(oldstash)[riter]); /* Iterate through the entries in this list */ - HE_EACH(oldstash, entry, { + HE_EACH(oldstash, riter, entry, { const char* key; I32 len; @@ -1178,7 +1178,7 @@ S_mro_gather_and_rename(pTHX_ HV * const stashes, HV * const seen_stashes, for (riter=0; riter <= xhv->xhv_max; riter++) { entry = AHe(HvARRAY(stash)[riter]); /* Iterate through the entries in this list */ - HE_EACH(stash, entry, { + HE_EACH(stash, riter, entry, { const char* key; I32 len; diff --git a/perl.c b/perl.c index abbef3b1807..1a34d99f305 100644 --- a/perl.c +++ b/perl.c @@ -1290,7 +1290,7 @@ perl_destruct(pTHXx) AHE * const array = HvARRAY(PL_strtab); HE *hent = AHe(array[0]); - HE_EACH(PL_strtab, hent, { + HE_EACH(PL_strtab, riter, hent, { if (hent && ckWARN_d(WARN_INTERNAL)) { HE * const next = HeNEXT(hent); Perl_warner(aTHX_ packWARN(WARN_INTERNAL), diff --git a/sv.c b/sv.c index facb9865519..177381f5bcd 100644 --- a/sv.c +++ b/sv.c @@ -9946,7 +9946,7 @@ Perl_sv_resetpvn(pTHX_ const char *s, STRLEN len, HV * const stash) } for (i = 0; i <= (I32) HvMAX(stash); i++) { HE *entry = AHe(HvARRAY(stash)[i]); - HE_EACH(hv, entry, { + HE_EACH(hv, i, entry, { GV *gv; SV *sv; @@ -16270,7 +16270,7 @@ S_find_hash_subscript(pTHX_ const HV *const hv, const SV *const val) array = HvARRAY(hv); for (i = HvMAX(hv); i >= 0; i--) { HE *entry = AHe(array[i]); - HE_EACH(hv, entry, { + HE_EACH(hv, i, entry, { if (HeVAL(entry) != val) continue; if (HeVAL(entry) == UNDEF || He_IS_PLACEHOLDER(entry))