Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit ff7c3e1
Showing
9 changed files
with
721 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
*~ | ||
.eunit | ||
c_src/*.o | ||
doc | ||
ebin | ||
priv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
## Description | ||
|
||
The **erlang-murmurhash** library provides [MurmurHash](http://code.google.com/p/smhasher/) hash functions implemented in Erlang NIFs. | ||
|
||
## Building and Installing | ||
|
||
The **erlang-murmurhash** library is built with [rebar](https://github.com/basho/rebar), which must be in the command `PATH`. | ||
|
||
## Erlang Version | ||
|
||
The **erlang-murmurhash** library requires Erlang R14B or later. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,276 @@ | ||
#include "MurmurHash2.h" | ||
|
||
//----------------------------------------------------------------------------- | ||
// MurmurHash2, by Austin Appleby | ||
|
||
// Note - This code makes a few assumptions about how your machine behaves - | ||
|
||
// 1. We can read a 4-byte value from any address without crashing | ||
// 2. sizeof(int) == 4 | ||
|
||
// And it has a few limitations - | ||
|
||
// 1. It will not work incrementally. | ||
// 2. It will not produce the same results on little-endian and big-endian | ||
// machines. | ||
|
||
uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ) | ||
{ | ||
// 'm' and 'r' are mixing constants generated offline. | ||
// They're not really 'magic', they just happen to work well. | ||
|
||
const uint32_t m = 0x5bd1e995; | ||
const int r = 24; | ||
|
||
// Initialize the hash to a 'random' value | ||
|
||
uint32_t h = seed ^ len; | ||
|
||
// Mix 4 bytes at a time into the hash | ||
|
||
const unsigned char * data = (const unsigned char *)key; | ||
|
||
while(len >= 4) | ||
{ | ||
uint32_t k = *(uint32_t*)data; | ||
|
||
k *= m; | ||
k ^= k >> r; | ||
k *= m; | ||
|
||
h *= m; | ||
h ^= k; | ||
|
||
data += 4; | ||
len -= 4; | ||
} | ||
|
||
// Handle the last few bytes of the input array | ||
|
||
switch(len) | ||
{ | ||
case 3: h ^= data[2] << 16; | ||
case 2: h ^= data[1] << 8; | ||
case 1: h ^= data[0]; | ||
h *= m; | ||
}; | ||
|
||
// Do a few final mixes of the hash to ensure the last few | ||
// bytes are well-incorporated. | ||
|
||
h ^= h >> 13; | ||
h *= m; | ||
h ^= h >> 15; | ||
|
||
return h; | ||
} | ||
|
||
//----------------------------------------------------------------------------- | ||
// MurmurHash2, 64-bit versions, by Austin Appleby | ||
|
||
// The same caveats as 32-bit MurmurHash2 apply here - beware of alignment | ||
// and endian-ness issues if used across multiple platforms. | ||
|
||
// 64-bit hash for 64-bit platforms | ||
|
||
uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ) | ||
{ | ||
const uint64_t m = 0xc6a4a7935bd1e995; | ||
const int r = 47; | ||
|
||
uint64_t h = seed ^ (len * m); | ||
|
||
const uint64_t * data = (const uint64_t *)key; | ||
const uint64_t * end = data + (len/8); | ||
|
||
while(data != end) | ||
{ | ||
uint64_t k = *data++; | ||
|
||
k *= m; | ||
k ^= k >> r; | ||
k *= m; | ||
|
||
h ^= k; | ||
h *= m; | ||
} | ||
|
||
const unsigned char * data2 = (const unsigned char*)data; | ||
|
||
switch(len & 7) | ||
{ | ||
case 7: h ^= uint64_t(data2[6]) << 48; | ||
case 6: h ^= uint64_t(data2[5]) << 40; | ||
case 5: h ^= uint64_t(data2[4]) << 32; | ||
case 4: h ^= uint64_t(data2[3]) << 24; | ||
case 3: h ^= uint64_t(data2[2]) << 16; | ||
case 2: h ^= uint64_t(data2[1]) << 8; | ||
case 1: h ^= uint64_t(data2[0]); | ||
h *= m; | ||
}; | ||
|
||
h ^= h >> r; | ||
h *= m; | ||
h ^= h >> r; | ||
|
||
return h; | ||
} | ||
|
||
|
||
// 64-bit hash for 32-bit platforms | ||
|
||
uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ) | ||
{ | ||
const uint32_t m = 0x5bd1e995; | ||
const int r = 24; | ||
|
||
uint32_t h1 = uint32_t(seed) ^ len; | ||
uint32_t h2 = uint32_t(seed >> 32); | ||
|
||
const uint32_t * data = (const uint32_t *)key; | ||
|
||
while(len >= 8) | ||
{ | ||
uint32_t k1 = *data++; | ||
k1 *= m; k1 ^= k1 >> r; k1 *= m; | ||
h1 *= m; h1 ^= k1; | ||
len -= 4; | ||
|
||
uint32_t k2 = *data++; | ||
k2 *= m; k2 ^= k2 >> r; k2 *= m; | ||
h2 *= m; h2 ^= k2; | ||
len -= 4; | ||
} | ||
|
||
if(len >= 4) | ||
{ | ||
uint32_t k1 = *data++; | ||
k1 *= m; k1 ^= k1 >> r; k1 *= m; | ||
h1 *= m; h1 ^= k1; | ||
len -= 4; | ||
} | ||
|
||
switch(len) | ||
{ | ||
case 3: h2 ^= ((unsigned char*)data)[2] << 16; | ||
case 2: h2 ^= ((unsigned char*)data)[1] << 8; | ||
case 1: h2 ^= ((unsigned char*)data)[0]; | ||
h2 *= m; | ||
}; | ||
|
||
h1 ^= h2 >> 18; h1 *= m; | ||
h2 ^= h1 >> 22; h2 *= m; | ||
h1 ^= h2 >> 17; h1 *= m; | ||
h2 ^= h1 >> 19; h2 *= m; | ||
|
||
uint64_t h = h1; | ||
|
||
h = (h << 32) | h2; | ||
|
||
return h; | ||
} | ||
|
||
//----------------------------------------------------------------------------- | ||
// MurmurHash2A, by Austin Appleby | ||
|
||
// This is a variant of MurmurHash2 modified to use the Merkle-Damgard | ||
// construction. Bulk speed should be identical to Murmur2, small-key speed | ||
// will be 10%-20% slower due to the added overhead at the end of the hash. | ||
|
||
// This variant fixes a minor issue where null keys were more likely to | ||
// collide with each other than expected, and also makes the function | ||
// more amenable to incremental implementations. | ||
|
||
#define mmix(h,k) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } | ||
|
||
uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ) | ||
{ | ||
const uint32_t m = 0x5bd1e995; | ||
const int r = 24; | ||
uint32_t l = len; | ||
|
||
const unsigned char * data = (const unsigned char *)key; | ||
|
||
uint32_t h = seed; | ||
|
||
while(len >= 4) | ||
{ | ||
uint32_t k = *(uint32_t*)data; | ||
|
||
mmix(h,k); | ||
|
||
data += 4; | ||
len -= 4; | ||
} | ||
|
||
uint32_t t = 0; | ||
|
||
switch(len) | ||
{ | ||
case 3: t ^= data[2] << 16; | ||
case 2: t ^= data[1] << 8; | ||
case 1: t ^= data[0]; | ||
}; | ||
|
||
mmix(h,t); | ||
mmix(h,l); | ||
|
||
h ^= h >> 13; | ||
h *= m; | ||
h ^= h >> 15; | ||
|
||
return h; | ||
} | ||
|
||
//----------------------------------------------------------------------------- | ||
// MurmurHashNeutral2, by Austin Appleby | ||
|
||
// Same as MurmurHash2, but endian- and alignment-neutral. | ||
// Half the speed though, alas. | ||
|
||
uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ) | ||
{ | ||
const uint32_t m = 0x5bd1e995; | ||
const int r = 24; | ||
|
||
uint32_t h = seed ^ len; | ||
|
||
const unsigned char * data = (const unsigned char *)key; | ||
|
||
while(len >= 4) | ||
{ | ||
uint32_t k; | ||
|
||
k = data[0]; | ||
k |= data[1] << 8; | ||
k |= data[2] << 16; | ||
k |= data[3] << 24; | ||
|
||
k *= m; | ||
k ^= k >> r; | ||
k *= m; | ||
|
||
h *= m; | ||
h ^= k; | ||
|
||
data += 4; | ||
len -= 4; | ||
} | ||
|
||
switch(len) | ||
{ | ||
case 3: h ^= data[2] << 16; | ||
case 2: h ^= data[1] << 8; | ||
case 1: h ^= data[0]; | ||
h *= m; | ||
}; | ||
|
||
h ^= h >> 13; | ||
h *= m; | ||
h ^= h >> 15; | ||
|
||
return h; | ||
} | ||
|
||
//----------------------------------------------------------------------------- | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
#include <inttypes.h> | ||
|
||
uint32_t MurmurHash2 ( const void * key, int len, uint32_t seed ); | ||
uint64_t MurmurHash64A ( const void * key, int len, uint64_t seed ); | ||
uint64_t MurmurHash64B ( const void * key, int len, uint64_t seed ); | ||
uint32_t MurmurHash2A ( const void * key, int len, uint32_t seed ); | ||
uint32_t MurmurHashNeutral2 ( const void * key, int len, uint32_t seed ); |
Oops, something went wrong.