diff --git a/common.mk b/common.mk index fe01da34277c54..e223f291626602 100644 --- a/common.mk +++ b/common.mk @@ -11199,6 +11199,7 @@ parser_st.$(OBJEXT): {$(VPATH)}parser_bits.h parser_st.$(OBJEXT): {$(VPATH)}parser_st.c parser_st.$(OBJEXT): {$(VPATH)}parser_st.h parser_st.$(OBJEXT): {$(VPATH)}parser_value.h +parser_st.$(OBJEXT): {$(VPATH)}st.c proc.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h proc.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h proc.$(OBJEXT): $(CCAN_DIR)/list/list.h diff --git a/parser_st.c b/parser_st.c index 7dbeaaf28a898f..b2df6cbad29dcb 100644 --- a/parser_st.c +++ b/parser_st.c @@ -1,105 +1,3 @@ -/* This is a public domain general purpose hash table package - originally written by Peter Moore @ UCB. - - The hash table data structures were redesigned and the package was - rewritten by Vladimir Makarov . */ - -/* The original package implemented classic bucket-based hash tables - with entries doubly linked for an access by their insertion order. - To decrease pointer chasing and as a consequence to improve a data - locality the current implementation is based on storing entries in - an array and using hash tables with open addressing. The current - entries are more compact in comparison with the original ones and - this also improves the data locality. - - The hash table has two arrays called *bins* and *entries*. - - bins: - ------- - | | entries array: - |-------| -------------------------------- - | index | | | entry: | | | - |-------| | | | | | - | ... | | ... | hash | ... | ... | - |-------| | | key | | | - | empty | | | record | | | - |-------| -------------------------------- - | ... | ^ ^ - |-------| |_ entries start |_ entries bound - |deleted| - ------- - - o The entry array contains table entries in the same order as they - were inserted. - - When the first entry is deleted, a variable containing index of - the current first entry (*entries start*) is changed. In all - other cases of the deletion, we just mark the entry as deleted by - using a reserved hash value. - - Such organization of the entry storage makes operations of the - table shift and the entries traversal very fast. - - o The bins provide access to the entries by their keys. The - key hash is mapped to a bin containing *index* of the - corresponding entry in the entry array. - - The bin array size is always power of two, it makes mapping very - fast by using the corresponding lower bits of the hash. - Generally it is not a good idea to ignore some part of the hash. - But alternative approach is worse. For example, we could use a - modulo operation for mapping and a prime number for the size of - the bin array. Unfortunately, the modulo operation for big - 64-bit numbers are extremely slow (it takes more than 100 cycles - on modern Intel CPUs). - - Still other bits of the hash value are used when the mapping - results in a collision. In this case we use a secondary hash - value which is a result of a function of the collision bin - index and the original hash value. The function choice - guarantees that we can traverse all bins and finally find the - corresponding bin as after several iterations the function - becomes a full cycle linear congruential generator because it - satisfies requirements of the Hull-Dobell theorem. - - When an entry is removed from the table besides marking the - hash in the corresponding entry described above, we also mark - the bin by a special value in order to find entries which had - a collision with the removed entries. - - There are two reserved values for the bins. One denotes an - empty bin, another one denotes a bin for a deleted entry. - - o The length of the bin array is at least two times more than the - entry array length. This keeps the table load factor healthy. - The trigger of rebuilding the table is always a case when we can - not insert an entry anymore at the entries bound. We could - change the entries bound too in case of deletion but than we need - a special code to count bins with corresponding deleted entries - and reset the bin values when there are too many bins - corresponding deleted entries - - Table rebuilding is done by creation of a new entry array and - bins of an appropriate size. We also try to reuse the arrays - in some cases by compacting the array and removing deleted - entries. - - o To save memory very small tables have no allocated arrays - bins. We use a linear search for an access by a key. - - o To save more memory we use 8-, 16-, 32- and 64- bit indexes in - bins depending on the current hash table size. - - o The implementation takes into account that the table can be - rebuilt during hashing or comparison functions. It can happen if - the functions are implemented in Ruby and a thread switch occurs - during their execution. - - This implementation speeds up the Ruby hash table benchmarks in - average by more 40% on Intel Haswell CPU. - -*/ - #include "parser_st.h" #include "parser_bits.h" @@ -111,8 +9,9 @@ # define FALSE 0 #endif -#define NOT_RUBY 1 +#undef NOT_RUBY #undef RUBY +#undef RUBY_EXPORT #undef MEMCPY #define MEMCPY(p1,p2,type,n) nonempty_memcpy((p1), (p2), (sizeof(type) * (n))) @@ -179,6 +78,8 @@ nonempty_memcpy(void *dest, const void *src, size_t n) #define st_init_numtable rb_parser_st_init_numtable #undef st_init_table_with_size #define st_init_table_with_size rb_parser_st_init_table_with_size +#undef st_init_existing_table_with_size +#define st_init_existing_table_with_size rb_parser_st_init_existing_table_with_size #undef st_insert #define st_insert rb_parser_st_insert #undef st_lookup @@ -243,1925 +144,8 @@ nonempty_memcpy(void *dest, const void *src, size_t n) #undef st_locale_insensitive_strncasecmp #define st_locale_insensitive_strncasecmp rb_parser_st_locale_insensitive_strncasecmp -/* The type of hashes. */ -typedef st_index_t st_hash_t; - -struct st_table_entry { - st_hash_t hash; - st_data_t key; - st_data_t record; -}; - -#define type_numhash st_hashtype_num -static const struct st_hash_type st_hashtype_num = { - st_numcmp, - st_numhash, -}; - -static int st_strcmp(st_data_t, st_data_t); -static st_index_t strhash(st_data_t); -static const struct st_hash_type type_strhash = { - st_strcmp, - strhash, -}; - -static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs); -static st_index_t strcasehash(st_data_t); -static const struct st_hash_type type_strcasehash = { - st_locale_insensitive_strcasecmp_i, - strcasehash, -}; - -/* Value used to catch uninitialized entries/bins during debugging. - There is a possibility for a false alarm, but its probability is - extremely small. */ -#define ST_INIT_VAL 0xafafafafafafafaf -#define ST_INIT_VAL_BYTE 0xafa - -#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0) -#define PTR_EQUAL(tab, ptr, hash_val, key_) \ - ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key)) - -/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set - up to TRUE if the table is rebuilt during the comparison. */ -#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \ - do { \ - unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \ - res = PTR_EQUAL(tab, ptr, hash_val, key); \ - rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \ - } while (FALSE) - -/* Features of a table. */ -struct st_features { - /* Power of 2 used for number of allocated entries. */ - unsigned char entry_power; - /* Power of 2 used for number of allocated bins. Depending on the - table size, the number of bins is 2-4 times more than the - number of entries. */ - unsigned char bin_power; - /* Enumeration of sizes of bins (8-bit, 16-bit etc). */ - unsigned char size_ind; - /* Bins are packed in words of type st_index_t. The following is - a size of bins counted by words. */ - st_index_t bins_words; -}; - -/* Features of all possible size tables. */ -#if SIZEOF_ST_INDEX_T == 8 -#define MAX_POWER2 62 -static const struct st_features features[] = { - {0, 1, 0, 0x0}, - {1, 2, 0, 0x1}, - {2, 3, 0, 0x1}, - {3, 4, 0, 0x2}, - {4, 5, 0, 0x4}, - {5, 6, 0, 0x8}, - {6, 7, 0, 0x10}, - {7, 8, 0, 0x20}, - {8, 9, 1, 0x80}, - {9, 10, 1, 0x100}, - {10, 11, 1, 0x200}, - {11, 12, 1, 0x400}, - {12, 13, 1, 0x800}, - {13, 14, 1, 0x1000}, - {14, 15, 1, 0x2000}, - {15, 16, 1, 0x4000}, - {16, 17, 2, 0x10000}, - {17, 18, 2, 0x20000}, - {18, 19, 2, 0x40000}, - {19, 20, 2, 0x80000}, - {20, 21, 2, 0x100000}, - {21, 22, 2, 0x200000}, - {22, 23, 2, 0x400000}, - {23, 24, 2, 0x800000}, - {24, 25, 2, 0x1000000}, - {25, 26, 2, 0x2000000}, - {26, 27, 2, 0x4000000}, - {27, 28, 2, 0x8000000}, - {28, 29, 2, 0x10000000}, - {29, 30, 2, 0x20000000}, - {30, 31, 2, 0x40000000}, - {31, 32, 2, 0x80000000}, - {32, 33, 3, 0x200000000}, - {33, 34, 3, 0x400000000}, - {34, 35, 3, 0x800000000}, - {35, 36, 3, 0x1000000000}, - {36, 37, 3, 0x2000000000}, - {37, 38, 3, 0x4000000000}, - {38, 39, 3, 0x8000000000}, - {39, 40, 3, 0x10000000000}, - {40, 41, 3, 0x20000000000}, - {41, 42, 3, 0x40000000000}, - {42, 43, 3, 0x80000000000}, - {43, 44, 3, 0x100000000000}, - {44, 45, 3, 0x200000000000}, - {45, 46, 3, 0x400000000000}, - {46, 47, 3, 0x800000000000}, - {47, 48, 3, 0x1000000000000}, - {48, 49, 3, 0x2000000000000}, - {49, 50, 3, 0x4000000000000}, - {50, 51, 3, 0x8000000000000}, - {51, 52, 3, 0x10000000000000}, - {52, 53, 3, 0x20000000000000}, - {53, 54, 3, 0x40000000000000}, - {54, 55, 3, 0x80000000000000}, - {55, 56, 3, 0x100000000000000}, - {56, 57, 3, 0x200000000000000}, - {57, 58, 3, 0x400000000000000}, - {58, 59, 3, 0x800000000000000}, - {59, 60, 3, 0x1000000000000000}, - {60, 61, 3, 0x2000000000000000}, - {61, 62, 3, 0x4000000000000000}, - {62, 63, 3, 0x8000000000000000}, -}; - -#else -#define MAX_POWER2 30 - -static const struct st_features features[] = { - {0, 1, 0, 0x1}, - {1, 2, 0, 0x1}, - {2, 3, 0, 0x2}, - {3, 4, 0, 0x4}, - {4, 5, 0, 0x8}, - {5, 6, 0, 0x10}, - {6, 7, 0, 0x20}, - {7, 8, 0, 0x40}, - {8, 9, 1, 0x100}, - {9, 10, 1, 0x200}, - {10, 11, 1, 0x400}, - {11, 12, 1, 0x800}, - {12, 13, 1, 0x1000}, - {13, 14, 1, 0x2000}, - {14, 15, 1, 0x4000}, - {15, 16, 1, 0x8000}, - {16, 17, 2, 0x20000}, - {17, 18, 2, 0x40000}, - {18, 19, 2, 0x80000}, - {19, 20, 2, 0x100000}, - {20, 21, 2, 0x200000}, - {21, 22, 2, 0x400000}, - {22, 23, 2, 0x800000}, - {23, 24, 2, 0x1000000}, - {24, 25, 2, 0x2000000}, - {25, 26, 2, 0x4000000}, - {26, 27, 2, 0x8000000}, - {27, 28, 2, 0x10000000}, - {28, 29, 2, 0x20000000}, - {29, 30, 2, 0x40000000}, - {30, 31, 2, 0x80000000}, -}; - -#endif - -/* The reserved hash value and its substitution. */ -#define RESERVED_HASH_VAL (~(st_hash_t) 0) -#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0) - -/* Return hash value of KEY for table TAB. */ -static inline st_hash_t -do_hash(st_data_t key, st_table *tab) -{ - st_hash_t hash = (st_hash_t)(tab->type->hash)(key); - - /* RESERVED_HASH_VAL is used for a deleted entry. Map it into - another value. Such mapping should be extremely rare. */ - return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash; -} - -/* Power of 2 defining the minimal number of allocated entries. */ -#define MINIMAL_POWER2 2 - -#if MINIMAL_POWER2 < 2 -#error "MINIMAL_POWER2 should be >= 2" -#endif - -/* If the power2 of the allocated `entries` is less than the following - value, don't allocate bins and use a linear search. */ -#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4 - -/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */ -static int -get_power2(st_index_t size) -{ - unsigned int n = ST_INDEX_BITS - nlz_intptr(size); - if (n <= MAX_POWER2) - return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n; -#ifndef NOT_RUBY - /* Ran out of the table entries */ - rb_raise(rb_eRuntimeError, "st_table too big"); -#endif - /* should raise exception */ - return -1; -} - -/* Return value of N-th bin in array BINS of table with bins size - index S. */ -static inline st_index_t -get_bin(st_index_t *bins, int s, st_index_t n) -{ - return (s == 0 ? ((unsigned char *) bins)[n] - : s == 1 ? ((unsigned short *) bins)[n] - : s == 2 ? ((unsigned int *) bins)[n] - : ((st_index_t *) bins)[n]); -} - -/* Set up N-th bin in array BINS of table with bins size index S to - value V. */ -static inline void -set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) -{ - if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v; - else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v; - else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v; - else ((st_index_t *) bins)[n] = v; -} - -/* These macros define reserved values for empty table bin and table - bin which contains a deleted entry. We will never use such values - for an entry index in bins. */ -#define EMPTY_BIN 0 -#define DELETED_BIN 1 -/* Base of a real entry index in the bins. */ -#define ENTRY_BASE 2 - -/* Mark I-th bin of table TAB as empty, in other words not - corresponding to any entry. */ -#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN)) - -/* Values used for not found entry and bin with given - characteristics. */ -#define UNDEFINED_ENTRY_IND (~(st_index_t) 0) -#define UNDEFINED_BIN_IND (~(st_index_t) 0) - -/* Entry and bin values returned when we found a table rebuild during - the search. */ -#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1) -#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1) - -/* Mark I-th bin of table TAB as corresponding to a deleted table - entry. Update number of entries in the table and number of bins - corresponding to deleted entries. */ -#define MARK_BIN_DELETED(tab, i) \ - do { \ - set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \ - } while (0) - -/* Macros to check that value B is used empty bins and bins - corresponding deleted entries. */ -#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN) -#define DELETED_BIN_P(b) ((b) == DELETED_BIN) -#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN) - -/* Macros to check empty bins and bins corresponding to deleted - entries. Bins are given by their index I in table TAB. */ -#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) -#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) -#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i))) - -/* Macros for marking and checking deleted entries given by their - pointer E_PTR. */ -#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL) -#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL) - -/* Return bin size index of table TAB. */ -static inline unsigned int -get_size_ind(const st_table *tab) -{ - return tab->size_ind; -} - -/* Return the number of allocated bins of table TAB. */ -static inline st_index_t -get_bins_num(const st_table *tab) -{ - return ((st_index_t) 1)<bin_power; -} - -/* Return mask for a bin index in table TAB. */ -static inline st_index_t -bins_mask(const st_table *tab) -{ - return get_bins_num(tab) - 1; -} - -/* Return the index of table TAB bin corresponding to - HASH_VALUE. */ -static inline st_index_t -hash_bin(st_hash_t hash_value, st_table *tab) -{ - return hash_value & bins_mask(tab); -} - -/* Return the number of allocated entries of table TAB. */ -static inline st_index_t -get_allocated_entries(const st_table *tab) -{ - return ((st_index_t) 1)<entry_power; -} - -/* Return size of the allocated bins of table TAB. */ -static inline st_index_t -bins_size(const st_table *tab) -{ - return features[tab->entry_power].bins_words * sizeof (st_index_t); -} - -/* Mark all bins of table TAB as empty. */ -static void -initialize_bins(st_table *tab) -{ - memset(tab->bins, 0, bins_size(tab)); -} - -/* Make table TAB empty. */ -static void -make_tab_empty(st_table *tab) -{ - tab->num_entries = 0; - tab->entries_start = tab->entries_bound = 0; - if (tab->bins != NULL) - initialize_bins(tab); -} - -#ifdef HASH_LOG -#ifdef HAVE_UNISTD_H -#include -#endif -static struct { - int all, total, num, str, strcase; -} collision; - -/* Flag switching off output of package statistics at the end of - program. */ -static int init_st = 0; - -/* Output overall number of table searches and collisions into a - temporary file. */ -static void -stat_col(void) -{ - char fname[10+sizeof(long)*3]; - FILE *f; - if (!collision.total) return; - f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w"); - if (f == NULL) - return; - fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total, - ((double)collision.all / (collision.total)) * 100); - fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase); - fclose(f); -} -#endif - -/* Create and return table with TYPE which can hold at least SIZE - entries. The real number of entries which the table can hold is - the nearest power of two for SIZE. */ -st_table * -st_init_table_with_size(const struct st_hash_type *type, st_index_t size) -{ - st_table *tab; - int n; - -#ifdef HASH_LOG -#if HASH_LOG+0 < 0 - { - const char *e = getenv("ST_HASH_LOG"); - if (!e || !*e) init_st = 1; - } -#endif - if (init_st == 0) { - init_st = 1; - atexit(stat_col); - } -#endif - - n = get_power2(size); -#ifndef RUBY - if (n < 0) - return NULL; -#endif - tab = (st_table *) malloc(sizeof (st_table)); -#ifndef RUBY - if (tab == NULL) - return NULL; -#endif - tab->type = type; - tab->entry_power = n; - tab->bin_power = features[n].bin_power; - tab->size_ind = features[n].size_ind; - if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) - tab->bins = NULL; - else { - tab->bins = (st_index_t *) malloc(bins_size(tab)); -#ifndef RUBY - if (tab->bins == NULL) { - free(tab); - return NULL; - } -#endif - } - tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab) - * sizeof(st_table_entry)); -#ifndef RUBY - if (tab->entries == NULL) { - st_free_table(tab); - return NULL; - } -#endif - make_tab_empty(tab); - tab->rebuilds_num = 0; - return tab; -} - -size_t -st_table_size(const struct st_table *tbl) -{ - return tbl->num_entries; -} - -/* Create and return table with TYPE which can hold a minimal number - of entries (see comments for get_power2). */ -st_table * -st_init_table(const struct st_hash_type *type) -{ - return st_init_table_with_size(type, 0); -} - -/* Create and return table which can hold a minimal number of - numbers. */ -st_table * -st_init_numtable(void) -{ - return st_init_table(&type_numhash); -} - -/* Create and return table which can hold SIZE numbers. */ -st_table * -st_init_numtable_with_size(st_index_t size) -{ - return st_init_table_with_size(&type_numhash, size); -} - -/* Create and return table which can hold a minimal number of - strings. */ -st_table * -st_init_strtable(void) -{ - return st_init_table(&type_strhash); -} - -/* Create and return table which can hold SIZE strings. */ -st_table * -st_init_strtable_with_size(st_index_t size) -{ - return st_init_table_with_size(&type_strhash, size); -} - -/* Create and return table which can hold a minimal number of strings - whose character case is ignored. */ -st_table * -st_init_strcasetable(void) -{ - return st_init_table(&type_strcasehash); -} - -/* Create and return table which can hold SIZE strings whose character - case is ignored. */ -st_table * -st_init_strcasetable_with_size(st_index_t size) -{ - return st_init_table_with_size(&type_strcasehash, size); -} - -/* Make table TAB empty. */ -void -st_clear(st_table *tab) -{ - make_tab_empty(tab); - tab->rebuilds_num++; -} - -/* Free table TAB space. */ -void -st_free_table(st_table *tab) -{ - if (tab->bins != NULL) - free(tab->bins); - free(tab->entries); - free(tab); -} - -/* Return byte size of memory allocated for table TAB. */ -size_t -st_memsize(const st_table *tab) -{ - return(sizeof(st_table) - + (tab->bins == NULL ? 0 : bins_size(tab)) - + get_allocated_entries(tab) * sizeof(st_table_entry)); -} - -static st_index_t -find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key); - -static st_index_t -find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key); - -static st_index_t -find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key); - -static st_index_t -find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, - st_data_t key, st_index_t *bin_ind); - -#ifdef HASH_LOG -static void -count_collision(const struct st_hash_type *type) -{ - collision.all++; - if (type == &type_numhash) { - collision.num++; - } - else if (type == &type_strhash) { - collision.strcase++; - } - else if (type == &type_strcasehash) { - collision.str++; - } -} - -#define COLLISION (collision_check ? count_collision(tab->type) : (void)0) -#define FOUND_BIN (collision_check ? collision.total++ : (void)0) -#define collision_check 0 -#else -#define COLLISION -#define FOUND_BIN -#endif - -/* If the number of entries in the table is at least REBUILD_THRESHOLD - times less than the entry array length, decrease the table - size. */ -#define REBUILD_THRESHOLD 4 - -#if REBUILD_THRESHOLD < 2 -#error "REBUILD_THRESHOLD should be >= 2" -#endif - -/* Rebuild table TAB. Rebuilding removes all deleted bins and entries - and can change size of the table entries and bins arrays. - Rebuilding is implemented by creation of a new table or by - compaction of the existing one. */ -static void -rebuild_table(st_table *tab) -{ - st_index_t i, ni; - unsigned int size_ind; - st_table *new_tab; - st_table_entry *new_entries; - st_table_entry *curr_entry_ptr; - st_index_t *bins; - st_index_t bin_ind; - - if ((2 * tab->num_entries <= get_allocated_entries(tab) - && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab)) - || tab->num_entries < (1 << MINIMAL_POWER2)) { - /* Compaction: */ - tab->num_entries = 0; - if (tab->bins != NULL) - initialize_bins(tab); - new_tab = tab; - new_entries = tab->entries; - } - else { - /* This allocation could trigger GC and compaction. If tab is the - * gen_iv_tbl, then tab could have changed in size due to objects being - * freed and/or moved. Do not store attributes of tab before this line. */ - new_tab = st_init_table_with_size(tab->type, - 2 * tab->num_entries - 1); - new_entries = new_tab->entries; - } - - ni = 0; - bins = new_tab->bins; - size_ind = get_size_ind(new_tab); - st_index_t bound = tab->entries_bound; - st_table_entry *entries = tab->entries; - - for (i = tab->entries_start; i < bound; i++) { - curr_entry_ptr = &entries[i]; - PREFETCH(entries + i + 1, 0); - if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) - continue; - if (&new_entries[ni] != curr_entry_ptr) - new_entries[ni] = *curr_entry_ptr; - if (EXPECT(bins != NULL, 1)) { - bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash, - curr_entry_ptr->key); - set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE); - } - new_tab->num_entries++; - ni++; - } - if (new_tab != tab) { - tab->entry_power = new_tab->entry_power; - tab->bin_power = new_tab->bin_power; - tab->size_ind = new_tab->size_ind; - if (tab->bins != NULL) - free(tab->bins); - tab->bins = new_tab->bins; - free(tab->entries); - tab->entries = new_tab->entries; - free(new_tab); - } - tab->entries_start = 0; - tab->entries_bound = tab->num_entries; - tab->rebuilds_num++; -} - -/* Return the next secondary hash index for table TAB using previous - index IND and PERTURB. Finally modulo of the function becomes a - full *cycle linear congruential generator*, in other words it - guarantees traversing all table bins in extreme case. - - According the Hull-Dobell theorem a generator - "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if - o m and c are relatively prime - o a-1 is divisible by all prime factors of m - o a-1 is divisible by 4 if m is divisible by 4. - - For our case a is 5, c is 1, and m is a power of two. */ -static inline st_index_t -secondary_hash(st_index_t ind, st_table *tab, st_index_t *perturb) -{ - *perturb >>= 11; - ind = (ind << 2) + ind + *perturb + 1; - return hash_bin(ind, tab); -} - -/* Find an entry with HASH_VALUE and KEY in TABLE using a linear - search. Return the index of the found entry in array `entries`. - If it is not found, return UNDEFINED_ENTRY_IND. If the table was - rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */ -static inline st_index_t -find_entry(st_table *tab, st_hash_t hash_value, st_data_t key) -{ - int eq_p, rebuilt_p; - st_index_t i, bound; - st_table_entry *entries; - - bound = tab->entries_bound; - entries = tab->entries; - for (i = tab->entries_start; i < bound; i++) { - DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p); - if (EXPECT(rebuilt_p, 0)) - return REBUILT_TABLE_ENTRY_IND; - if (eq_p) - return i; - } - return UNDEFINED_ENTRY_IND; -} - -/* Use the quadratic probing. The method has a better data locality - but more collisions than the current approach. In average it - results in a bit slower search. */ -/*#define QUADRATIC_PROBE*/ - -/* Return index of entry with HASH_VALUE and KEY in table TAB. If - there is no such entry, return UNDEFINED_ENTRY_IND. If the table - was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */ -static st_index_t -find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key) -{ - int eq_p, rebuilt_p; - st_index_t ind; -#ifdef QUADRATIC_PROBE - st_index_t d; -#else - st_index_t perturb; -#endif - st_index_t bin; - st_table_entry *entries = tab->entries; - - ind = hash_bin(hash_value, tab); -#ifdef QUADRATIC_PROBE - d = 1; -#else - perturb = hash_value; -#endif - FOUND_BIN; - for (;;) { - bin = get_bin(tab->bins, get_size_ind(tab), ind); - if (! EMPTY_OR_DELETED_BIN_P(bin)) { - DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); - if (EXPECT(rebuilt_p, 0)) - return REBUILT_TABLE_ENTRY_IND; - if (eq_p) - break; - } - else if (EMPTY_BIN_P(bin)) - return UNDEFINED_ENTRY_IND; -#ifdef QUADRATIC_PROBE - ind = hash_bin(ind + d, tab); - d++; -#else - ind = secondary_hash(ind, tab, &perturb); -#endif - COLLISION; - } - return bin; -} - -/* Find and return index of table TAB bin corresponding to an entry - with HASH_VALUE and KEY. If there is no such bin, return - UNDEFINED_BIN_IND. If the table was rebuilt during the search, - return REBUILT_TABLE_BIN_IND. */ -static st_index_t -find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key) -{ - int eq_p, rebuilt_p; - st_index_t ind; -#ifdef QUADRATIC_PROBE - st_index_t d; -#else - st_index_t perturb; -#endif - st_index_t bin; - st_table_entry *entries = tab->entries; - - ind = hash_bin(hash_value, tab); -#ifdef QUADRATIC_PROBE - d = 1; -#else - perturb = hash_value; -#endif - FOUND_BIN; - for (;;) { - bin = get_bin(tab->bins, get_size_ind(tab), ind); - if (! EMPTY_OR_DELETED_BIN_P(bin)) { - DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p); - if (EXPECT(rebuilt_p, 0)) - return REBUILT_TABLE_BIN_IND; - if (eq_p) - break; - } - else if (EMPTY_BIN_P(bin)) - return UNDEFINED_BIN_IND; -#ifdef QUADRATIC_PROBE - ind = hash_bin(ind + d, tab); - d++; -#else - ind = secondary_hash(ind, tab, &perturb); -#endif - COLLISION; - } - return ind; -} - -/* Find and return index of table TAB bin corresponding to an entry - with HASH_VALUE and KEY. The entry should be in the table - already. */ -static st_index_t -find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key) -{ - st_index_t ind; -#ifdef QUADRATIC_PROBE - st_index_t d; -#else - st_index_t perturb; -#endif - st_index_t bin; - - ind = hash_bin(hash_value, tab); -#ifdef QUADRATIC_PROBE - d = 1; -#else - perturb = hash_value; -#endif - FOUND_BIN; - for (;;) { - bin = get_bin(tab->bins, get_size_ind(tab), ind); - if (EMPTY_OR_DELETED_BIN_P(bin)) - return ind; -#ifdef QUADRATIC_PROBE - ind = hash_bin(ind + d, tab); - d++; -#else - ind = secondary_hash(ind, tab, &perturb); -#endif - COLLISION; - } -} - -/* Return index of table TAB bin for HASH_VALUE and KEY through - BIN_IND and the pointed value as the function result. Reserve the - bin for inclusion of the corresponding entry into the table if it - is not there yet. We always find such bin as bins array length is - bigger entries array. Although we can reuse a deleted bin, the - result bin value is always empty if the table has no entry with - KEY. Return the entries array index of the found entry or - UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt - during the search, return REBUILT_TABLE_ENTRY_IND. */ -static st_index_t -find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value, - st_data_t key, st_index_t *bin_ind) -{ - int eq_p, rebuilt_p; - st_index_t ind; - st_hash_t curr_hash_value = *hash_value; -#ifdef QUADRATIC_PROBE - st_index_t d; -#else - st_index_t perturb; -#endif - st_index_t entry_index; - st_index_t first_deleted_bin_ind; - st_table_entry *entries; - - ind = hash_bin(curr_hash_value, tab); -#ifdef QUADRATIC_PROBE - d = 1; -#else - perturb = curr_hash_value; -#endif - FOUND_BIN; - first_deleted_bin_ind = UNDEFINED_BIN_IND; - entries = tab->entries; - for (;;) { - entry_index = get_bin(tab->bins, get_size_ind(tab), ind); - if (EMPTY_BIN_P(entry_index)) { - tab->num_entries++; - entry_index = UNDEFINED_ENTRY_IND; - if (first_deleted_bin_ind != UNDEFINED_BIN_IND) { - /* We can reuse bin of a deleted entry. */ - ind = first_deleted_bin_ind; - MARK_BIN_EMPTY(tab, ind); - } - break; - } - else if (! DELETED_BIN_P(entry_index)) { - DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p); - if (EXPECT(rebuilt_p, 0)) - return REBUILT_TABLE_ENTRY_IND; - if (eq_p) - break; - } - else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) - first_deleted_bin_ind = ind; -#ifdef QUADRATIC_PROBE - ind = hash_bin(ind + d, tab); - d++; -#else - ind = secondary_hash(ind, tab, &perturb); -#endif - COLLISION; - } - *bin_ind = ind; - return entry_index; -} - -/* Find an entry with KEY in table TAB. Return non-zero if we found - it. Set up *RECORD to the found entry record. */ -int -st_lookup(st_table *tab, st_data_t key, st_data_t *value) -{ - st_index_t bin; - st_hash_t hash = do_hash(key, tab); - - retry: - if (tab->bins == NULL) { - bin = find_entry(tab, hash, key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - if (bin == UNDEFINED_ENTRY_IND) - return 0; - } - else { - bin = find_table_entry_ind(tab, hash, key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - if (bin == UNDEFINED_ENTRY_IND) - return 0; - bin -= ENTRY_BASE; - } - if (value != 0) - *value = tab->entries[bin].record; - return 1; -} - -/* Find an entry with KEY in table TAB. Return non-zero if we found - it. Set up *RESULT to the found table entry key. */ -int -st_get_key(st_table *tab, st_data_t key, st_data_t *result) -{ - st_index_t bin; - st_hash_t hash = do_hash(key, tab); - - retry: - if (tab->bins == NULL) { - bin = find_entry(tab, hash, key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - if (bin == UNDEFINED_ENTRY_IND) - return 0; - } - else { - bin = find_table_entry_ind(tab, hash, key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - if (bin == UNDEFINED_ENTRY_IND) - return 0; - bin -= ENTRY_BASE; - } - if (result != 0) - *result = tab->entries[bin].key; - return 1; -} - -/* Check the table and rebuild it if it is necessary. */ -static inline void -rebuild_table_if_necessary (st_table *tab) -{ - st_index_t bound = tab->entries_bound; - - if (bound == get_allocated_entries(tab)) - rebuild_table(tab); -} - -/* Insert (KEY, VALUE) into table TAB and return zero. If there is - already entry with KEY in the table, return nonzero and update - the value of the found entry. */ -int -st_insert(st_table *tab, st_data_t key, st_data_t value) -{ - st_table_entry *entry; - st_index_t bin; - st_index_t ind; - st_hash_t hash_value; - st_index_t bin_ind; - int new_p; - - hash_value = do_hash(key, tab); - retry: - rebuild_table_if_necessary(tab); - if (tab->bins == NULL) { - bin = find_entry(tab, hash_value, key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - new_p = bin == UNDEFINED_ENTRY_IND; - if (new_p) - tab->num_entries++; - bin_ind = UNDEFINED_BIN_IND; - } - else { - bin = find_table_bin_ptr_and_reserve(tab, &hash_value, - key, &bin_ind); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - new_p = bin == UNDEFINED_ENTRY_IND; - bin -= ENTRY_BASE; - } - if (new_p) { - ind = tab->entries_bound++; - entry = &tab->entries[ind]; - entry->hash = hash_value; - entry->key = key; - entry->record = value; - if (bin_ind != UNDEFINED_BIN_IND) - set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); - return 0; - } - tab->entries[bin].record = value; - return 1; -} - -/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have - entry with KEY before the insertion. */ -static inline void -st_add_direct_with_hash(st_table *tab, - st_data_t key, st_data_t value, st_hash_t hash) -{ - st_table_entry *entry; - st_index_t ind; - st_index_t bin_ind; - - rebuild_table_if_necessary(tab); - ind = tab->entries_bound++; - entry = &tab->entries[ind]; - entry->hash = hash; - entry->key = key; - entry->record = value; - tab->num_entries++; - if (tab->bins != NULL) { - bin_ind = find_table_bin_ind_direct(tab, hash, key); - set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); - } -} - -/* Insert (KEY, VALUE) into table TAB. The table should not have - entry with KEY before the insertion. */ -void -st_add_direct(st_table *tab, st_data_t key, st_data_t value) -{ - st_hash_t hash_value; - - hash_value = do_hash(key, tab); - st_add_direct_with_hash(tab, key, value, hash_value); -} - -/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If - there is already entry with KEY in the table, return nonzero and - update the value of the found entry. */ -int -st_insert2(st_table *tab, st_data_t key, st_data_t value, - st_data_t (*func)(st_data_t)) -{ - st_table_entry *entry; - st_index_t bin; - st_index_t ind; - st_hash_t hash_value; - st_index_t bin_ind; - int new_p; - - hash_value = do_hash(key, tab); - retry: - rebuild_table_if_necessary (tab); - if (tab->bins == NULL) { - bin = find_entry(tab, hash_value, key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - new_p = bin == UNDEFINED_ENTRY_IND; - if (new_p) - tab->num_entries++; - bin_ind = UNDEFINED_BIN_IND; - } - else { - bin = find_table_bin_ptr_and_reserve(tab, &hash_value, - key, &bin_ind); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - new_p = bin == UNDEFINED_ENTRY_IND; - bin -= ENTRY_BASE; - } - if (new_p) { - key = (*func)(key); - ind = tab->entries_bound++; - entry = &tab->entries[ind]; - entry->hash = hash_value; - entry->key = key; - entry->record = value; - if (bin_ind != UNDEFINED_BIN_IND) - set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE); - return 0; - } - tab->entries[bin].record = value; - return 1; -} - -/* Create and return a copy of table OLD_TAB. */ -st_table * -st_copy(st_table *old_tab) -{ - st_table *new_tab; - - new_tab = (st_table *) malloc(sizeof(st_table)); -#ifndef RUBY - if (new_tab == NULL) - return NULL; -#endif - *new_tab = *old_tab; - if (old_tab->bins == NULL) - new_tab->bins = NULL; - else { - new_tab->bins = (st_index_t *) malloc(bins_size(old_tab)); -#ifndef RUBY - if (new_tab->bins == NULL) { - free(new_tab); - return NULL; - } -#endif - } - new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab) - * sizeof(st_table_entry)); -#ifndef RUBY - if (new_tab->entries == NULL) { - st_free_table(new_tab); - return NULL; - } -#endif - MEMCPY(new_tab->entries, old_tab->entries, st_table_entry, - get_allocated_entries(old_tab)); - if (old_tab->bins != NULL) - MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab)); - return new_tab; -} - -/* Update the entries start of table TAB after removing an entry - with index N in the array entries. */ -static inline void -update_range_for_deleted(st_table *tab, st_index_t n) -{ - /* Do not update entries_bound here. Otherwise, we can fill all - bins by deleted entry value before rebuilding the table. */ - if (tab->entries_start == n) { - st_index_t start = n + 1; - st_index_t bound = tab->entries_bound; - st_table_entry *entries = tab->entries; - while (start < bound && DELETED_ENTRY_P(&entries[start])) start++; - tab->entries_start = start; - } -} - -/* Delete entry with KEY from table TAB, set up *VALUE (unless - VALUE is zero) from deleted table entry, and return non-zero. If - there is no entry with KEY in the table, clear *VALUE (unless VALUE - is zero), and return zero. */ -static int -st_general_delete(st_table *tab, st_data_t *key, st_data_t *value) -{ - st_table_entry *entry; - st_index_t bin; - st_index_t bin_ind; - st_hash_t hash; - - hash = do_hash(*key, tab); - retry: - if (tab->bins == NULL) { - bin = find_entry(tab, hash, *key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - if (bin == UNDEFINED_ENTRY_IND) { - if (value != 0) *value = 0; - return 0; - } - } - else { - bin_ind = find_table_bin_ind(tab, hash, *key); - if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) - goto retry; - if (bin_ind == UNDEFINED_BIN_IND) { - if (value != 0) *value = 0; - return 0; - } - bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; - MARK_BIN_DELETED(tab, bin_ind); - } - entry = &tab->entries[bin]; - *key = entry->key; - if (value != 0) *value = entry->record; - MARK_ENTRY_DELETED(entry); - tab->num_entries--; - update_range_for_deleted(tab, bin); - return 1; -} - -int -st_delete(st_table *tab, st_data_t *key, st_data_t *value) -{ - return st_general_delete(tab, key, value); -} - -/* The function and other functions with suffix '_safe' or '_check' - are originated from the previous implementation of the hash tables. - It was necessary for correct deleting entries during traversing - tables. The current implementation permits deletion during - traversing without a specific way to do this. */ -int -st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value, - st_data_t never ATTRIBUTE_UNUSED) -{ - return st_general_delete(tab, key, value); -} - -/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and - return zero. Otherwise, remove the first entry in the table. - Return its key through KEY and its record through VALUE (unless - VALUE is zero). */ -int -st_shift(st_table *tab, st_data_t *key, st_data_t *value) -{ - st_index_t i, bound; - st_index_t bin; - st_table_entry *entries, *curr_entry_ptr; - st_index_t bin_ind; - - entries = tab->entries; - bound = tab->entries_bound; - for (i = tab->entries_start; i < bound; i++) { - curr_entry_ptr = &entries[i]; - if (! DELETED_ENTRY_P(curr_entry_ptr)) { - st_hash_t entry_hash = curr_entry_ptr->hash; - st_data_t entry_key = curr_entry_ptr->key; - - if (value != 0) *value = curr_entry_ptr->record; - *key = entry_key; - retry: - if (tab->bins == NULL) { - bin = find_entry(tab, entry_hash, entry_key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) { - entries = tab->entries; - goto retry; - } - curr_entry_ptr = &entries[bin]; - } - else { - bin_ind = find_table_bin_ind(tab, entry_hash, entry_key); - if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) { - entries = tab->entries; - goto retry; - } - curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind) - - ENTRY_BASE]; - MARK_BIN_DELETED(tab, bin_ind); - } - MARK_ENTRY_DELETED(curr_entry_ptr); - tab->num_entries--; - update_range_for_deleted(tab, i); - return 1; - } - } - if (value != 0) *value = 0; - return 0; -} - -/* See comments for function st_delete_safe. */ -void -st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED, - st_data_t never ATTRIBUTE_UNUSED) -{ -} - -/* Find entry with KEY in table TAB, call FUNC with pointers to copies - of the key and the value of the found entry, and non-zero as the - 3rd argument. If the entry is not found, call FUNC with a pointer - to KEY, a pointer to zero, and a zero argument. If the call - returns ST_CONTINUE, the table will have an entry with key and - value returned by FUNC through the 1st and 2nd parameters. If the - call of FUNC returns ST_DELETE, the table will not have entry with - KEY. The function returns flag of that the entry with KEY was in - the table before the call. */ -int -st_update(st_table *tab, st_data_t key, - st_update_callback_func *func, st_data_t arg) -{ - st_table_entry *entry = NULL; /* to avoid uninitialized value warning */ - st_index_t bin = 0; /* Ditto */ - st_table_entry *entries; - st_index_t bin_ind; - st_data_t value = 0, old_key; - int retval, existing; - st_hash_t hash = do_hash(key, tab); - - retry: - entries = tab->entries; - if (tab->bins == NULL) { - bin = find_entry(tab, hash, key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - existing = bin != UNDEFINED_ENTRY_IND; - entry = &entries[bin]; - bin_ind = UNDEFINED_BIN_IND; - } - else { - bin_ind = find_table_bin_ind(tab, hash, key); - if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) - goto retry; - existing = bin_ind != UNDEFINED_BIN_IND; - if (existing) { - bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; - entry = &entries[bin]; - } - } - if (existing) { - key = entry->key; - value = entry->record; - } - old_key = key; - retval = (*func)(&key, &value, arg, existing); - switch (retval) { - case ST_CONTINUE: - if (! existing) { - st_add_direct_with_hash(tab, key, value, hash); - break; - } - if (old_key != key) { - entry->key = key; - } - entry->record = value; - break; - case ST_DELETE: - if (existing) { - if (bin_ind != UNDEFINED_BIN_IND) - MARK_BIN_DELETED(tab, bin_ind); - MARK_ENTRY_DELETED(entry); - tab->num_entries--; - update_range_for_deleted(tab, bin); - } - break; - } - return existing; -} - -/* Traverse all entries in table TAB calling FUNC with current entry - key and value and zero. If the call returns ST_STOP, stop - traversing. If the call returns ST_DELETE, delete the current - entry from the table. In case of ST_CHECK or ST_CONTINUE, continue - traversing. The function returns zero unless an error is found. - CHECK_P is flag of st_foreach_check call. The behavior is a bit - different for ST_CHECK and when the current element is removed - during traversing. */ -static inline int -st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg, - int check_p) -{ - st_index_t bin; - st_index_t bin_ind; - st_table_entry *entries, *curr_entry_ptr; - enum st_retval retval; - st_index_t i, rebuilds_num; - st_hash_t hash; - st_data_t key; - int error_p, packed_p = tab->bins == NULL; - - entries = tab->entries; - /* The bound can change inside the loop even without rebuilding - the table, e.g. by an entry insertion. */ - for (i = tab->entries_start; i < tab->entries_bound; i++) { - curr_entry_ptr = &entries[i]; - if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0)) - continue; - key = curr_entry_ptr->key; - rebuilds_num = tab->rebuilds_num; - hash = curr_entry_ptr->hash; - retval = (*func)(key, curr_entry_ptr->record, arg, 0); - - if (retval == ST_REPLACE && replace) { - st_data_t value; - value = curr_entry_ptr->record; - retval = (*replace)(&key, &value, arg, TRUE); - curr_entry_ptr->key = key; - curr_entry_ptr->record = value; - } - - if (rebuilds_num != tab->rebuilds_num) { - retry: - entries = tab->entries; - packed_p = tab->bins == NULL; - if (packed_p) { - i = find_entry(tab, hash, key); - if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - error_p = i == UNDEFINED_ENTRY_IND; - } - else { - i = find_table_entry_ind(tab, hash, key); - if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0)) - goto retry; - error_p = i == UNDEFINED_ENTRY_IND; - i -= ENTRY_BASE; - } - if (error_p && check_p) { - /* call func with error notice */ - retval = (*func)(0, 0, arg, 1); - return 1; - } - curr_entry_ptr = &entries[i]; - } - switch (retval) { - case ST_REPLACE: - break; - case ST_CONTINUE: - break; - case ST_CHECK: - if (check_p) - break; - case ST_STOP: - return 0; - case ST_DELETE: { - st_data_t key = curr_entry_ptr->key; - - again: - if (packed_p) { - bin = find_entry(tab, hash, key); - if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) - goto again; - if (bin == UNDEFINED_ENTRY_IND) - break; - } - else { - bin_ind = find_table_bin_ind(tab, hash, key); - if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) - goto again; - if (bin_ind == UNDEFINED_BIN_IND) - break; - bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE; - MARK_BIN_DELETED(tab, bin_ind); - } - curr_entry_ptr = &entries[bin]; - MARK_ENTRY_DELETED(curr_entry_ptr); - tab->num_entries--; - update_range_for_deleted(tab, bin); - break; - } - } - } - return 0; -} - -int -st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg) -{ - return st_general_foreach(tab, func, replace, arg, TRUE); -} - -struct functor { - st_foreach_callback_func *func; - st_data_t arg; -}; - -static int -apply_functor(st_data_t k, st_data_t v, st_data_t d, int _) -{ - const struct functor *f = (void *)d; - return f->func(k, v, f->arg); -} - -int -st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg) -{ - const struct functor f = { func, arg }; - return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE); -} - -/* See comments for function st_delete_safe. */ -int -st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg, - st_data_t never ATTRIBUTE_UNUSED) -{ - return st_general_foreach(tab, func, 0, arg, TRUE); -} - -/* Set up array KEYS by at most SIZE keys of head table TAB entries. - Return the number of keys set up in array KEYS. */ -static inline st_index_t -st_general_keys(st_table *tab, st_data_t *keys, st_index_t size) -{ - st_index_t i, bound; - st_data_t key, *keys_start, *keys_end; - st_table_entry *curr_entry_ptr, *entries = tab->entries; - - bound = tab->entries_bound; - keys_start = keys; - keys_end = keys + size; - for (i = tab->entries_start; i < bound; i++) { - if (keys == keys_end) - break; - curr_entry_ptr = &entries[i]; - key = curr_entry_ptr->key; - if (! DELETED_ENTRY_P(curr_entry_ptr)) - *keys++ = key; - } - - return keys - keys_start; -} - -st_index_t -st_keys(st_table *tab, st_data_t *keys, st_index_t size) -{ - return st_general_keys(tab, keys, size); -} - -/* See comments for function st_delete_safe. */ -st_index_t -st_keys_check(st_table *tab, st_data_t *keys, st_index_t size, - st_data_t never ATTRIBUTE_UNUSED) -{ - return st_general_keys(tab, keys, size); -} - -/* Set up array VALUES by at most SIZE values of head table TAB - entries. Return the number of values set up in array VALUES. */ -static inline st_index_t -st_general_values(st_table *tab, st_data_t *values, st_index_t size) -{ - st_index_t i, bound; - st_data_t *values_start, *values_end; - st_table_entry *curr_entry_ptr, *entries = tab->entries; - - values_start = values; - values_end = values + size; - bound = tab->entries_bound; - for (i = tab->entries_start; i < bound; i++) { - if (values == values_end) - break; - curr_entry_ptr = &entries[i]; - if (! DELETED_ENTRY_P(curr_entry_ptr)) - *values++ = curr_entry_ptr->record; - } - - return values - values_start; -} - -st_index_t -st_values(st_table *tab, st_data_t *values, st_index_t size) -{ - return st_general_values(tab, values, size); -} - -/* See comments for function st_delete_safe. */ -st_index_t -st_values_check(st_table *tab, st_data_t *values, st_index_t size, - st_data_t never ATTRIBUTE_UNUSED) -{ - return st_general_values(tab, values, size); -} - -#define FNV1_32A_INIT 0x811c9dc5 - -/* - * 32 bit magic FNV-1a prime - */ -#define FNV_32_PRIME 0x01000193 - -/* __POWERPC__ added to accommodate Darwin case. */ -#ifndef UNALIGNED_WORD_ACCESS -# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \ - defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \ - defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \ - defined(__mc68020__) -# define UNALIGNED_WORD_ACCESS 1 -# endif -#endif -#ifndef UNALIGNED_WORD_ACCESS -# define UNALIGNED_WORD_ACCESS 0 -#endif - -/* This hash function is quite simplified MurmurHash3 - * Simplification is legal, cause most of magic still happens in finalizator. - * And finalizator is almost the same as in MurmurHash3 */ -#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y)) -#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n))) - -#if ST_INDEX_BITS <= 32 -#define C1 (st_index_t)0xcc9e2d51 -#define C2 (st_index_t)0x1b873593 -#else -#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5); -#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f); -#endif - -#if defined(NO_SANITIZE) && RBIMPL_COMPILER_IS(GCC) -/* GCC warns about unknown sanitizer, which is annoying. */ -# include "internal/warnings.h" -# undef NO_SANITIZE -# define NO_SANITIZE(x, y) \ - COMPILER_WARNING_PUSH; \ - COMPILER_WARNING_IGNORED(-Wattributes); \ - __attribute__((__no_sanitize__(x))) y; \ - COMPILER_WARNING_POP -#endif - #ifndef NO_SANITIZE # define NO_SANITIZE(x, y) y #endif -NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k)); -NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h)); -NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h)); - -static inline st_index_t -murmur_step(st_index_t h, st_index_t k) -{ -#if ST_INDEX_BITS <= 32 -#define r1 (17) -#define r2 (11) -#else -#define r1 (33) -#define r2 (24) -#endif - k *= C1; - h ^= ROTL(k, r1); - h *= C2; - h = ROTL(h, r2); - return h; -} -#undef r1 -#undef r2 - -static inline st_index_t -murmur_finish(st_index_t h) -{ -#if ST_INDEX_BITS <= 32 -#define r1 (16) -#define r2 (13) -#define r3 (16) - const st_index_t c1 = 0x85ebca6b; - const st_index_t c2 = 0xc2b2ae35; -#else -/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */ -#define r1 (30) -#define r2 (27) -#define r3 (31) - const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9); - const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb); -#endif -#if ST_INDEX_BITS > 64 - h ^= h >> 64; - h *= c2; - h ^= h >> 65; -#endif - h ^= h >> r1; - h *= c1; - h ^= h >> r2; - h *= c2; - h ^= h >> r3; - return h; -} -#undef r1 -#undef r2 -#undef r3 - -st_index_t -st_hash(const void *ptr, size_t len, st_index_t h) -{ - const char *data = ptr; - st_index_t t = 0; - size_t l = len; - -#define data_at(n) (st_index_t)((unsigned char)data[(n)]) -#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0) -#if SIZEOF_ST_INDEX_T > 4 -#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4 -#if SIZEOF_ST_INDEX_T > 8 -#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \ - UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8 -#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16 -#endif -#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8 -#else -#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4 -#endif -#undef SKIP_TAIL - if (len >= sizeof(st_index_t)) { -#if !UNALIGNED_WORD_ACCESS - int align = (int)((st_data_t)data % sizeof(st_index_t)); - if (align) { - st_index_t d = 0; - int sl, sr, pack; - - switch (align) { -#ifdef WORDS_BIGENDIAN -# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \ - t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2) -#else -# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \ - t |= data_at(n) << CHAR_BIT*(n) -#endif - UNALIGNED_ADD_ALL; -#undef UNALIGNED_ADD - } - -#ifdef WORDS_BIGENDIAN - t >>= (CHAR_BIT * align) - CHAR_BIT; -#else - t <<= (CHAR_BIT * align); -#endif - - data += sizeof(st_index_t)-align; - len -= sizeof(st_index_t)-align; - - sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align); - sr = CHAR_BIT * align; - - while (len >= sizeof(st_index_t)) { - d = *(st_index_t *)data; -#ifdef WORDS_BIGENDIAN - t = (t << sr) | (d >> sl); -#else - t = (t >> sr) | (d << sl); -#endif - h = murmur_step(h, t); - t = d; - data += sizeof(st_index_t); - len -= sizeof(st_index_t); - } - - pack = len < (size_t)align ? (int)len : align; - d = 0; - switch (pack) { -#ifdef WORDS_BIGENDIAN -# define UNALIGNED_ADD(n) case (n) + 1: \ - d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1) -#else -# define UNALIGNED_ADD(n) case (n) + 1: \ - d |= data_at(n) << CHAR_BIT*(n) -#endif - UNALIGNED_ADD_ALL; -#undef UNALIGNED_ADD - } -#ifdef WORDS_BIGENDIAN - t = (t << sr) | (d >> sl); -#else - t = (t >> sr) | (d << sl); -#endif - - if (len < (size_t)align) goto skip_tail; -# define SKIP_TAIL 1 - h = murmur_step(h, t); - data += pack; - len -= pack; - } - else -#endif -#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED -#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t)) -#else -#define aligned_data data -#endif - { - do { - h = murmur_step(h, *(st_index_t *)aligned_data); - data += sizeof(st_index_t); - len -= sizeof(st_index_t); - } while (len >= sizeof(st_index_t)); - } - } - - t = 0; - switch (len) { -#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8 - /* in this case byteorder doesn't really matter */ -#if SIZEOF_ST_INDEX_T > 4 - case 7: t |= data_at(6) << 48; - case 6: t |= data_at(5) << 40; - case 5: t |= data_at(4) << 32; - case 4: - t |= (st_index_t)*(uint32_t*)aligned_data; - goto skip_tail; -# define SKIP_TAIL 1 -#endif - case 3: t |= data_at(2) << 16; - case 2: t |= data_at(1) << 8; - case 1: t |= data_at(0); -#else -#ifdef WORDS_BIGENDIAN -# define UNALIGNED_ADD(n) case (n) + 1: \ - t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1) -#else -# define UNALIGNED_ADD(n) case (n) + 1: \ - t |= data_at(n) << CHAR_BIT*(n) -#endif - UNALIGNED_ADD_ALL; -#undef UNALIGNED_ADD -#endif -#ifdef SKIP_TAIL - skip_tail: -#endif - h ^= t; h -= ROTL(t, 7); - h *= C2; - } - h ^= l; -#undef aligned_data - - return murmur_finish(h); -} - -st_index_t -st_hash_uint32(st_index_t h, uint32_t i) -{ - return murmur_step(h, i); -} - -NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i)); -st_index_t -st_hash_uint(st_index_t h, st_index_t i) -{ - i += h; -/* no matter if it is BigEndian or LittleEndian, - * we hash just integers */ -#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8 - h = murmur_step(h, i >> 8*8); -#endif - h = murmur_step(h, i); - return h; -} - -st_index_t -st_hash_end(st_index_t h) -{ - h = murmur_finish(h); - return h; -} - -#undef st_hash_start -st_index_t -rb_st_hash_start(st_index_t h) -{ - return h; -} - -static st_index_t -strhash(st_data_t arg) -{ - register const char *string = (const char *)arg; - return st_hash(string, strlen(string), FNV1_32A_INIT); -} - -int -st_locale_insensitive_strcasecmp(const char *s1, const char *s2) -{ - char c1, c2; - - while (1) { - c1 = *s1++; - c2 = *s2++; - if (c1 == '\0' || c2 == '\0') { - if (c1 != '\0') return 1; - if (c2 != '\0') return -1; - return 0; - } - if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A'; - if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A'; - if (c1 != c2) { - if (c1 > c2) - return 1; - else - return -1; - } - } -} - -int -st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n) -{ - char c1, c2; - size_t i; - - for (i = 0; i < n; i++) { - c1 = *s1++; - c2 = *s2++; - if (c1 == '\0' || c2 == '\0') { - if (c1 != '\0') return 1; - if (c2 != '\0') return -1; - return 0; - } - if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A'; - if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A'; - if (c1 != c2) { - if (c1 > c2) - return 1; - else - return -1; - } - } - return 0; -} - -static int -st_strcmp(st_data_t lhs, st_data_t rhs) -{ - const char *s1 = (char *)lhs; - const char *s2 = (char *)rhs; - return strcmp(s1, s2); -} - -static int -st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs) -{ - const char *s1 = (char *)lhs; - const char *s2 = (char *)rhs; - return st_locale_insensitive_strcasecmp(s1, s2); -} - -NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t))); -static st_index_t -strcasehash(st_data_t arg) -{ - register const char *string = (const char *)arg; - register st_index_t hval = FNV1_32A_INIT; - - /* - * FNV-1a hash each octet in the buffer - */ - while (*string) { - unsigned int c = (unsigned char)*string++; - if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A'; - hval ^= c; - - /* multiply by the 32 bit FNV magic prime mod 2^32 */ - hval *= FNV_32_PRIME; - } - return hval; -} - -int -st_numcmp(st_data_t x, st_data_t y) -{ - return x != y; -} - -st_index_t -st_numhash(st_data_t n) -{ - enum {s1 = 11, s2 = 3}; - return (st_index_t)((n>>s1|(n<>s2)); -} +#include "st.c" diff --git a/parser_st.h b/parser_st.h index 6f722203801aa2..85db3b58479494 100644 --- a/parser_st.h +++ b/parser_st.h @@ -105,6 +105,7 @@ enum parser_st_retval {ST2_CONTINUE, ST2_STOP, ST2_DELETE, ST2_CHECK, ST2_REPLAC size_t rb_parser_st_table_size(const struct parser_st_table *tbl); parser_st_table *rb_parser_st_init_table(const struct parser_st_hash_type *); parser_st_table *rb_parser_st_init_table_with_size(const struct parser_st_hash_type *, parser_st_index_t); +parser_st_table *rb_parser_st_init_existing_table_with_size(parser_st_table *, const struct parser_st_hash_type *, parser_st_index_t); parser_st_table *rb_parser_st_init_numtable(void); parser_st_table *rb_parser_st_init_numtable_with_size(parser_st_index_t); parser_st_table *rb_parser_st_init_strtable(void); diff --git a/st.c b/st.c index 2fc600760cbfa5..7d44171e9740df 100644 --- a/st.c +++ b/st.c @@ -103,7 +103,7 @@ #ifdef NOT_RUBY #include "regint.h" #include "st.h" -#else +#elif defined RUBY_EXPORT #include "internal.h" #include "internal/bits.h" #include "internal/hash.h"