Skip to content
Permalink
Browse files
Use SipHash hash function to mitigate HashDos attempts.
This change attempts to switch to an hash function which mitigates
the effects of the HashDoS attack (denial of service attack trying
to force data structures to worst case behavior) while at the same time
providing Redis with an hash function that does not expect the input
data to be word aligned, a condition no longer true now that sds.c
strings have a varialbe length header.

Note that it is possible sometimes that even using an hash function
for which collisions cannot be generated without knowing the seed,
special implementation details or the exposure of the seed in an
indirect way (for example the ability to add elements to a Set and
check the return in which Redis returns them with SMEMBERS) may
make the attacker's life simpler in the process of trying to guess
the correct seed, however the next step would be to switch to a
log(N) data structure when too many items in a single bucket are
detected: this seems like an overkill in the case of Redis.

SPEED REGRESION TESTS:

In order to verify that switching from MurmurHash to SipHash had
no impact on speed, a set of benchmarks involving fast insertion
of 5 million of keys were performed.

The result shows Redis with SipHash in high pipelining conditions
to be about 4% slower compared to using the previous hash function.
However this could partially be related to the fact that the current
implementation does not attempt to hash whole words at a time but
reads single bytes, in order to have an output which is endian-netural
and at the same time working on systems where unaligned memory accesses
are a problem.

Further X86 specific optimizations should be tested, the function
may easily get at the same level of MurMurHash2 if a few optimizations
are performed.
  • Loading branch information
antirez committed Feb 20, 2017
1 parent 9b05aaf commit adeed29a99dcd0efdbfe4dbd5da74e7b01966c67
Showing 12 changed files with 361 additions and 82 deletions.
@@ -128,7 +128,7 @@ endif

REDIS_SERVER_NAME=redis-server
REDIS_SENTINEL_NAME=redis-sentinel
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o
REDIS_SERVER_OBJ=adlist.o quicklist.o ae.o anet.o dict.o server.o sds.o zmalloc.o lzf_c.o lzf_d.o pqsort.o zipmap.o sha1.o ziplist.o release.o networking.o util.o object.o db.o replication.o rdb.o t_string.o t_list.o t_set.o t_zset.o t_hash.o config.o aof.o pubsub.o multi.o debug.o sort.o intset.o syncio.o cluster.o crc16.o endianconv.o slowlog.o scripting.o bio.o rio.o rand.o memtest.o crc64.o bitops.o sentinel.o notify.o setproctitle.o blocked.o hyperloglog.o latency.o sparkline.o redis-check-rdb.o geo.o lazyfree.o module.o evict.o expire.o geohash.o geohash_helper.o childinfo.o defrag.o siphash.o
REDIS_CLI_NAME=redis-cli
REDIS_CLI_OBJ=anet.o adlist.o redis-cli.o zmalloc.o release.o anet.o ae.o crc64.o
REDIS_BENCHMARK_NAME=redis-benchmark
@@ -1423,7 +1423,7 @@ void configGetCommand(client *c) {
/* We use the following dictionary type to store where a configuration
* option is mentioned in the old configuration file, so it's
* like "maxmemory" -> list of line numbers (first line is zero). */
unsigned int dictSdsCaseHash(const void *key);
uint64_t dictSdsCaseHash(const void *key);
int dictSdsKeyCaseCompare(void *privdata, const void *key1, const void *key2);
void dictSdsDestructor(void *privdata, void *val);
void dictListDestructor(void *privdata, void *val);
@@ -1029,8 +1029,6 @@ void sigsegvHandler(int sig, siginfo_t *info, void *secret) {
/* Log INFO and CLIENT LIST */
serverLogRaw(LL_WARNING|LL_RAW, "\n------ INFO OUTPUT ------\n");
infostring = genRedisInfoString("all");
infostring = sdscatprintf(infostring, "hash_init_value: %u\n",
dictGetHashFunctionSeed());
serverLogRaw(LL_WARNING|LL_RAW, infostring);
serverLogRaw(LL_WARNING|LL_RAW, "\n------ CLIENT LIST OUTPUT ------\n");
clients = getAllClientsInfoString();
@@ -37,11 +37,11 @@

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <stdarg.h>
#include <limits.h>
#include <sys/time.h>
#include <ctype.h>

#include "dict.h"
#include "zmalloc.h"
@@ -71,77 +71,28 @@ static int _dictInit(dict *ht, dictType *type, void *privDataPtr);

/* -------------------------- hash functions -------------------------------- */

static uint32_t dict_hash_function_seed = 5381;
static uint8_t dict_hash_function_seed[16];

void dictSetHashFunctionSeed(uint32_t seed) {
dict_hash_function_seed = seed;
void dictSetHashFunctionSeed(uint8_t *seed) {
memcpy(dict_hash_function_seed,seed,sizeof(dict_hash_function_seed));
}

uint32_t dictGetHashFunctionSeed(void) {
uint8_t *dictGetHashFunctionSeed(void) {
return dict_hash_function_seed;
}

/* MurmurHash2, by Austin Appleby
* Note - This code makes a few assumptions about how your machine behaves -
* 1. We can read a 4-byte value from any address without crashing
* 2. sizeof(int) == 4
*
* And it has a few limitations -
*
* 1. It will not work incrementally.
* 2. It will not produce the same results on little-endian and big-endian
* machines.
*/
unsigned int dictGenHashFunction(const void *key, int len) {
/* 'm' and 'r' are mixing constants generated offline.
They're not really 'magic', they just happen to work well. */
uint32_t seed = dict_hash_function_seed;
const uint32_t m = 0x5bd1e995;
const int r = 24;

/* Initialize the hash to a 'random' value */
uint32_t h = seed ^ len;

/* Mix 4 bytes at a time into the hash */
const unsigned char *data = (const unsigned char *)key;

while(len >= 4) {
uint32_t k = *(uint32_t*)data;

k *= m;
k ^= k >> r;
k *= m;
/* The default hashing function uses SipHash implementation
* in siphash.c. */

h *= m;
h ^= k;
uint64_t siphash(const uint8_t *in, const size_t inlen, const uint8_t *k);
uint64_t siphash_nocase(const uint8_t *in, const size_t inlen, const uint8_t *k);

data += 4;
len -= 4;
}

/* Handle the last few bytes of the input array */
switch(len) {
case 3: h ^= data[2] << 16;
case 2: h ^= data[1] << 8;
case 1: h ^= data[0]; h *= m;
};

/* Do a few final mixes of the hash to ensure the last few
* bytes are well-incorporated. */
h ^= h >> 13;
h *= m;
h ^= h >> 15;

return (unsigned int)h;
uint64_t dictGenHashFunction(const void *key, int len) {
return siphash(key,len,dict_hash_function_seed);
}

/* And a case insensitive hash function (based on djb hash) */
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
unsigned int hash = (unsigned int)dict_hash_function_seed;

while (len--)
hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
return hash;
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len) {
return siphash_nocase(buf,len,dict_hash_function_seed);
}

/* ----------------------------- API implementation ------------------------- */
@@ -56,7 +56,7 @@ typedef struct dictEntry {
} dictEntry;

typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
uint64_t (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
@@ -168,15 +168,15 @@ void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictGetStats(char *buf, size_t bufsize, dict *d);
unsigned int dictGenHashFunction(const void *key, int len);
unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
uint64_t dictGenHashFunction(const void *key, int len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);
void dictSetHashFunctionSeed(unsigned int initval);
unsigned int dictGetHashFunctionSeed(void);
void dictSetHashFunctionSeed(uint8_t *seed);
uint8_t *dictGetHashFunctionSeed(void);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata);
unsigned int dictGetHash(dict *d, const void *key);
dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, unsigned int hash);
@@ -41,7 +41,7 @@ int dictStringKeyCompare(void *privdata, const void *key1, const void *key2) {
return strcmp(key1,key2) == 0;
}

unsigned int dictStringHash(const void *key) {
uint64_t dictStringHash(const void *key) {
return dictGenHashFunction(key, strlen(key));
}

@@ -3264,7 +3264,7 @@ void *RM_GetBlockedClientPrivateData(RedisModuleCtx *ctx) {
/* server.moduleapi dictionary type. Only uses plain C strings since
* this gets queries from modules. */

unsigned int dictCStringKeyHash(const void *key) {
uint64_t dictCStringKeyHash(const void *key) {
return dictGenHashFunction((unsigned char*)key, strlen((char*)key));
}

@@ -379,7 +379,7 @@ void sentinelSimFailureCrash(void);

/* ========================= Dictionary types =============================== */

unsigned int dictSdsHash(const void *key);
uint64_t dictSdsHash(const void *key);
int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
void releaseSentinelRedisInstance(sentinelRedisInstance *ri);

@@ -482,16 +482,16 @@ int dictObjKeyCompare(void *privdata, const void *key1,
return dictSdsKeyCompare(privdata,o1->ptr,o2->ptr);
}

unsigned int dictObjHash(const void *key) {
uint64_t dictObjHash(const void *key) {
const robj *o = key;
return dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
}

unsigned int dictSdsHash(const void *key) {
uint64_t dictSdsHash(const void *key) {
return dictGenHashFunction((unsigned char*)key, sdslen((char*)key));
}

unsigned int dictSdsCaseHash(const void *key) {
uint64_t dictSdsCaseHash(const void *key) {
return dictGenCaseHashFunction((unsigned char*)key, sdslen((char*)key));
}

@@ -513,7 +513,7 @@ int dictEncObjKeyCompare(void *privdata, const void *key1,
return cmp;
}

unsigned int dictEncObjHash(const void *key) {
uint64_t dictEncObjHash(const void *key) {
robj *o = (robj*) key;

if (sdsEncodedObject(o)) {
@@ -526,7 +526,7 @@ unsigned int dictEncObjHash(const void *key) {
len = ll2string(buf,32,(long)o->ptr);
return dictGenHashFunction((unsigned char*)buf, len);
} else {
unsigned int hash;
uint64_t hash;

o = getDecodedObject(o);
hash = dictGenHashFunction(o->ptr, sdslen((sds)o->ptr));
@@ -3639,7 +3639,9 @@ int main(int argc, char **argv) {
zmalloc_set_oom_handler(redisOutOfMemoryHandler);
srand(time(NULL)^getpid());
gettimeofday(&tv,NULL);
dictSetHashFunctionSeed(tv.tv_sec^tv.tv_usec^getpid());
char hashseed[16];
getRandomHexChars(hashseed,sizeof(hashseed));
dictSetHashFunctionSeed((uint8_t*)hashseed);
server.sentinel_mode = checkForSentinelMode(argc,argv);
initServerConfig();
moduleInitModulesSystem();
@@ -1765,7 +1765,7 @@ unsigned long LFUGetTimeInMinutes(void);
uint8_t LFULogIncr(uint8_t value);

/* Keys hashing / comparison functions for dict.c hash tables. */
unsigned int dictSdsHash(const void *key);
uint64_t dictSdsHash(const void *key);
int dictSdsKeyCompare(void *privdata, const void *key1, const void *key2);
void dictSdsDestructor(void *privdata, void *val);

0 comments on commit adeed29

Please sign in to comment.