Skip to content

Commit

Permalink
add a hashtable implementation that supports O(1) removal
Browse files Browse the repository at this point in the history
The existing hashtable implementation (in hash.[ch]) uses open addressing
(i.e. resolve hash collisions by distributing entries across the table).
Thus, removal is difficult to implement with less than O(n) complexity.
Resolving collisions of entries with identical hashes (e.g. via chaining)
is left to the client code.

Add a hashtable implementation that supports O(1) removal and is slightly
easier to use due to builtin entry chaining.

Supports all basic operations init, free, get, put, remove and iteration.
Also includes ready-to-use hash functions based on the public domain FNV-1
algorithm (http://www.isthe.com/chongo/tech/comp/fnv).

The per-entry data structure (hashmap_entry) is meant to be piggybacked
onto the client's data structure to save memory. See test-hashmap.c for
usage examples.

The hashtable is resized by a factor of four when 80% full. With these
settings, average memory consumption is about 2/3 of hash.[ch], and
insertion is twice as fast (due to less frequent resizing).

Signed-off-by: Karsten Blees <blees@dcon.de>
  • Loading branch information
kblees committed Oct 24, 2012
1 parent 52d3a75 commit f7eb85c
Show file tree
Hide file tree
Showing 5 changed files with 804 additions and 0 deletions.
3 changes: 3 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ TEST_PROGRAMS_NEED_X += test-date
TEST_PROGRAMS_NEED_X += test-delta
TEST_PROGRAMS_NEED_X += test-dump-cache-tree
TEST_PROGRAMS_NEED_X += test-genrandom
TEST_PROGRAMS_NEED_X += test-hashmap
TEST_PROGRAMS_NEED_X += test-index-version
TEST_PROGRAMS_NEED_X += test-line-buffer
TEST_PROGRAMS_NEED_X += test-match-trees
Expand Down Expand Up @@ -644,6 +645,7 @@ LIB_H += gpg-interface.h
LIB_H += graph.h
LIB_H += grep.h
LIB_H += hash.h
LIB_H += hashmap.h
LIB_H += help.h
LIB_H += http.h
LIB_H += kwset.h
Expand Down Expand Up @@ -755,6 +757,7 @@ LIB_OBJS += gpg-interface.o
LIB_OBJS += graph.o
LIB_OBJS += grep.o
LIB_OBJS += hash.o
LIB_OBJS += hashmap.o
LIB_OBJS += help.o
LIB_OBJS += hex.o
LIB_OBJS += ident.o
Expand Down
209 changes: 209 additions & 0 deletions hashmap.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,209 @@
/*
* Generic implementation of hash-based key value mappings.
*/
#include "cache.h"
#include "hashmap.h"

#define FNV32_BASE ((unsigned int) 0x811c9dc5)
#define FNV32_PRIME ((unsigned int) 0x01000193)

unsigned int strhash(const char *str)
{
unsigned int c, hash = FNV32_BASE;
while ((c = (unsigned char) *str++))
hash = (hash * FNV32_PRIME) ^ c;
return hash;
}

unsigned int strihash(const char *str)
{
unsigned int c, hash = FNV32_BASE;
while ((c = (unsigned char) *str++)) {
if (c >= 'a' && c <= 'z')
c -= 'a' - 'A';
hash = (hash * FNV32_PRIME) ^ c;
}
return hash;
}

unsigned int memhash(const void *buf, size_t len)
{
unsigned int hash = FNV32_BASE;
unsigned char *ucbuf = (unsigned char*) buf;
while (len--) {
unsigned int c = *ucbuf++;
hash = (hash * FNV32_PRIME) ^ c;
}
return hash;
}

unsigned int memihash(const void *buf, size_t len)
{
unsigned int hash = FNV32_BASE;
unsigned char *ucbuf = (unsigned char*) buf;
while (len--) {
unsigned int c = *ucbuf++;
if (c >= 'a' && c <= 'z')
c -= 'a' - 'A';
hash = (hash * FNV32_PRIME) ^ c;
}
return hash;
}

#define HASHMAP_INITIAL_SIZE 64
/* grow / shrink by 2^2 */
#define HASHMAP_GROW 2
/* grow if > 80% full (to 20%) */
#define HASHMAP_GROW_AT 1.25
/* shrink if < 16.6% full (to 66.6%) */
#define HASHMAP_SHRINK_AT 6

static inline int entry_equals(const hashmap *map, const hashmap_entry *e1,
const hashmap_entry *e2)
{
return (e1 == e2) || (e1->hash == e2->hash && !(*map->cmpfn)(e1, e2));
}

static inline unsigned int bucket(const hashmap *map, const hashmap_entry *key)
{
return key->hash & (map->tablesize - 1);
}

static void rehash(hashmap *map, unsigned int newsize)
{
unsigned int i, oldsize = map->tablesize;
hashmap_entry **oldtable = map->table;

map->tablesize = newsize;
map->table = xcalloc(sizeof(hashmap_entry*), map->tablesize);
for (i = 0; i < oldsize; i++) {
hashmap_entry *e = oldtable[i];
while (e) {
hashmap_entry *next = e->next;
unsigned int b = bucket(map, e);
e->next = map->table[b];
map->table[b] = e;
e = next;
}
}
free(oldtable);
}

void hashmap_init(hashmap *map, hashmap_cmp_fn equals_function,
size_t initial_size)
{
map->size = 0;
map->cmpfn = equals_function;
/* calculate initial table size and allocate the table */
map->tablesize = HASHMAP_INITIAL_SIZE;
initial_size *= HASHMAP_GROW_AT;
while (initial_size > map->tablesize)
map->tablesize <<= HASHMAP_GROW;
map->table = xcalloc(sizeof(hashmap_entry*), map->tablesize);
}

void hashmap_free(hashmap *map, hashmap_free_fn free_function)
{
if (!map || !map->table)
return;
if (free_function) {
hashmap_iter iter;
hashmap_entry *e;
hashmap_iter_init(map, &iter);
while ((e = hashmap_iter_next(&iter)))
(*free_function)(e);
}
free(map->table);
memset(map, 0, sizeof(*map));
}

hashmap_entry *hashmap_get(const hashmap *map, const hashmap_entry *key)
{
hashmap_entry *e = map->table[bucket(map, key)];
while (e && !entry_equals(map, e, key))
e = e->next;
return e;
}

hashmap_entry *hashmap_put(hashmap *map, hashmap_entry *entry)
{
unsigned int b = bucket(map, entry);
hashmap_entry *last = NULL, *e = map->table[b];

/* find entry */
while (e && !entry_equals(map, e, entry)) {
last = e;
e = e->next;
}

if (!e) {
/* not found, add entry */
entry->next = map->table[b];
map->table[b] = entry;

/* fix size and rehash if appropriate */
map->size++;
if (map->size * HASHMAP_GROW_AT > map->tablesize)
rehash(map, map->tablesize << HASHMAP_GROW);
} else if (e != entry) {
/* replace found entry */
if (last)
last->next = entry;
else
map->table[b] = entry;
entry->next = e->next;
e->next = NULL;
}
return e;
}

hashmap_entry *hashmap_remove(hashmap *map, const hashmap_entry *key)
{
unsigned int b = bucket(map, key);
hashmap_entry *last = NULL, *e = map->table[b];

/* find entry */
while (e && !entry_equals(map, e, key)) {
last = e;
e = e->next;
}

if (e) {
/* remove found entry */
if (last)
last->next = e->next;
else
map->table[b] = e->next;
e->next = NULL;

/* fix size and rehash if appropriate */
map->size--;
if (map->tablesize > HASHMAP_INITIAL_SIZE && map->size
* HASHMAP_SHRINK_AT < map->tablesize)
rehash(map, map->tablesize >> HASHMAP_GROW);
}
return e;
}

void hashmap_iter_init(hashmap *map, hashmap_iter *iter)
{
iter->map = map;
iter->tablepos = 0;
iter->next = NULL;
}

hashmap_entry *hashmap_iter_next(hashmap_iter *iter)
{
hashmap_entry *current = iter->next;
for (;;) {
if (current) {
iter->next = current->next;
return current;
}

if (iter->tablepos >= iter->map->tablesize)
return NULL;

current = iter->map->table[iter->tablepos++];
}
}
161 changes: 161 additions & 0 deletions hashmap.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
#ifndef HASHMAP_H
#define HASHMAP_H

/*
* Generic implementation of hash-based key value mappings.
* Supports basic operations get, put, remove and iteration.
*
* Also contains a set of ready-to-use hash functions for strings, using the
* FNV-1 algorithm (see http://www.isthe.com/chongo/tech/comp/fnv).
*/

/*
* Case-sensitive FNV-1 hash of 0-terminated string.
* str: the string
* returns hash code
*/
extern unsigned int strhash(const char *buf);

/*
* Case-insensitive FNV-1 hash of 0-terminated string.
* str: the string
* returns hash code
*/
extern unsigned int strihash(const char *buf);

/*
* Case-sensitive FNV-1 hash of a memory block.
* buf: start of the memory block
* len: length of the memory block
* returns hash code
*/
extern unsigned int memhash(const void *buf, size_t len);

/*
* Case-insensitive FNV-1 hash of a memory block.
* buf: start of the memory block
* len: length of the memory block
* returns hash code
*/
extern unsigned int memihash(const void *buf, size_t len);

/*
* Hashmap entry data structure, intended to be used as first member of user
* data structures. Consists of a pointer and an int. Ideally it should be
* followed by an int-sized member to prevent unused memory on 64-bit systems
* due to alignment.
*/
typedef struct hashmap_entry {
struct hashmap_entry *next;
unsigned int hash;
} hashmap_entry;

/*
* User-supplied function to test two hashmap entries for equality, shall
* return 0 if the entries are equal. This function is always called with
* non-NULL parameters that have the same hash code.
*/
typedef int (*hashmap_cmp_fn)(const hashmap_entry*, const hashmap_entry*);

/*
* User-supplied function to free a hashmap entry.
*/
typedef void (*hashmap_free_fn)(const hashmap_entry*);

/*
* Hashmap data structure, use with hashmap_* functions.
*/
typedef struct hashmap {
hashmap_entry **table;
hashmap_cmp_fn cmpfn;
unsigned int size, tablesize;
} hashmap;

/*
* Hashmap iterator data structure, use with hasmap_iter_* functions.
*/
typedef struct hashmap_iter {
hashmap *map;
hashmap_entry *next;
unsigned int tablepos;
} hashmap_iter;

/*
* Initializes a hashmap_entry structure.
* entry: pointer to the entry to initialize
* hash: hash code of the entry
*/
static inline void hashmap_entry_init(hashmap_entry *entry, int hash)
{
entry->hash = hash;
entry->next = NULL;
}

/*
* Initializes a hashmap structure.
* map: hashmap to initialize
* equals_function: function to test equality of hashmap entries
* initial_size: number of initial entries, or 0 if unknown
*/
extern void hashmap_init(hashmap *map, hashmap_cmp_fn equals_function,
size_t initial_size);

/*
* Frees a hashmap structure and allocated memory.
* map: hashmap to free
* free_function: optional function to free the hashmap entries
*/
extern void hashmap_free(hashmap *map, hashmap_free_fn free_function);

/*
* Returns the hashmap entry for the specified key, or NULL if not found.
* map: the hashmap
* key: key of the entry to look up
* returns matching hashmap entry, or NULL if not found
*/
extern hashmap_entry *hashmap_get(const hashmap *map, const hashmap_entry *key);

/*
* Adds or replaces a hashmap entry.
* map: the hashmap
* entry: the entry to add or replace
* returns previous entry, or NULL if the entry is new
*/
extern hashmap_entry *hashmap_put(hashmap *map, hashmap_entry *entry);

/*
* Removes a hashmap entry matching the specified key.
* map: the hashmap
* key: key of the entry to remove
* returns removed entry, or NULL if not found
*/
extern hashmap_entry *hashmap_remove(hashmap *map, const hashmap_entry *key);

/*
* Initializes a hashmap iterator structure.
* map: the hashmap
* iter: hashmap iterator structure
*/
extern void hashmap_iter_init(hashmap *map, hashmap_iter *iter);

/**
* Returns the next hashmap entry.
* iter: hashmap iterator
* returns next entry, or NULL if there are no more entries
*/
extern hashmap_entry *hashmap_iter_next(hashmap_iter *iter);

/**
* Initializes a hashmap iterator and returns the first hashmap entry.
* map: the hashmap
* iter: hashmap iterator
* returns first entry, or NULL if there are no entries
*/
static inline hashmap_entry *hashmap_iter_first(hashmap *map,
hashmap_iter *iter)
{
hashmap_iter_init(map, iter);
return hashmap_iter_next(iter);
}

#endif
Loading

0 comments on commit f7eb85c

Please sign in to comment.