Skip to content

Commit

Permalink
add Google FarmHash, deprecating CityHash
Browse files Browse the repository at this point in the history
good quality and speed. best hash funcs overall.
Note that the speed in these docs is measured on a slow machine.
I took the Speed result from my mac air laptop.
Need to redo all the Speed tests again.
  • Loading branch information
Reini Urban committed Jun 17, 2015
1 parent b68a5e6 commit 0414c0c
Show file tree
Hide file tree
Showing 10 changed files with 9,487 additions and 19 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ ELSE()
ENDIF(SSE4_2_FOUND)

IF(CMAKE_SIZEOF_VOID_P EQUAL 8)
set(X64ONLY_SRC xxhash.c metrohash64.cpp metrohash128.cpp)
set(X64ONLY_SRC xxhash.c metrohash64.cpp metrohash128.cpp farmhash.cc FarmTest.cc)
ENDIF()

add_library(
Expand Down
19 changes: 19 additions & 0 deletions FarmTest.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/* We have conflicting uint128_t types in Types.h (Blob<128>) and farmhash.h */

#undef FARMHASH_UINT128_T_DEFINED
#include "farmhash.h"

void FarmHash32_test ( const void * key, int len, uint32_t seed, void * out ) {
using namespace NAMESPACE_FOR_HASH_FUNCTIONS;
*(uint32_t*)out = Hash32WithSeed((const char *)key,(size_t)len,seed);
}
void FarmHash64_test ( const void * key, int len, uint32_t seed, void * out ) {
using namespace NAMESPACE_FOR_HASH_FUNCTIONS;
*(uint64_t*)out = Hash64WithSeed((const char *)key,(size_t)len,(uint64_t)seed);
}
void FarmHash128_test ( const void * key, int len, uint32_t seed, void * out ) {
using namespace NAMESPACE_FOR_HASH_FUNCTIONS;
uint128_t s((uint64_t)seed, (uint64_t)0UL);
uint128_t result = Hash128WithSeed((const char *)key, (size_t)len, s);
memcpy(out, &result, 128/8);
}
6 changes: 6 additions & 0 deletions Hashes.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#include "MurmurHash1.h"
#include "MurmurHash2.h"
#include "MurmurHash3.h"

#if defined(__x86_64__)
#include "xxhash.h"
#include "metrohash.h"
Expand Down Expand Up @@ -62,9 +63,13 @@ void SuperFastHash ( const void * key, int len, uint32_t seed, void * ou
void lookup3_test ( const void * key, int len, uint32_t seed, void * out );
void MurmurOAAT_test ( const void * key, int len, uint32_t seed, void * out );
void Crap8_test ( const void * key, int len, uint32_t seed, void * out );

void CityHash32_test ( const void * key, int len, uint32_t seed, void * out );
void CityHash64_test ( const void * key, int len, uint32_t seed, void * out );
void CityHash128_test ( const void * key, int len, uint32_t seed, void * out );
void FarmHash32_test ( const void * key, int len, uint32_t seed, void * out );
void FarmHash64_test ( const void * key, int len, uint32_t seed, void * out );
void FarmHash128_test ( const void * key, int len, uint32_t seed, void * out );

void SpookyHash32_test ( const void * key, int len, uint32_t seed, void * out );
void SpookyHash64_test ( const void * key, int len, uint32_t seed, void * out );
Expand Down Expand Up @@ -139,4 +144,5 @@ inline void metrohash128crc_1_test ( const void * key, int len, uint32_t seed, v
inline void metrohash128crc_2_test ( const void * key, int len, uint32_t seed, void * out ) {
metrohash128crc_2((const uint8_t *)key,(uint64_t)len,seed,(uint8_t *)out);
}

#endif
33 changes: 19 additions & 14 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,29 +24,33 @@ SMhasher
| superfast | 1532.52 | 34.72 | 28% bias, collisions, 30% distr |
| MurmurOAAT | 431.89 | 39.36 | 91% bias, 5273.01x collisions, 37% distr |
| Crap8 | 3064.24 | 23.01 | collisions, 99.998% distr |
| City32 | 3397.98 | 34.87 | |
| City64 | 9678.77 | 30.13 | 2 minor collisions |
| City128 | 9750.69 | 46.78 | |
| CityCrc128 | 12871.94 | 56.12 | |
| SipHash | 779.74 | 154.68 | |
| Spooky32 | 9223.07 | 51.10 | |
| Spooky64 | 9189.83 | 52.43 | |
| Spooky128 | 8883.58 | 52.30 | |
| Murmur2 | 3165.74 | 30.60 | 2.42% bias, collisions, 2% distrib |
| Murmur2A | 3064.14 | 37.15 | 1.7% bias, 81x coll, 1.7% distrib |
| Murmur2B | 5977.77 | 31.77 | 12.7% bias |
| Murmur2C | 4008.20 | 35.97 | 1.8% bias, collisions, 3.4% distrib |
| MurmurOAAT | 431.89 | 39.36 | 91% bias, collisions, distr |

| PMurHash32 | 1544.64 | 45.24 | |
| Murmur3A | 2282.61 | 35.11 | |
| Murmur3C | 3011.28 | 66.24 | |
| Murmur3F | 4415.51 | 43.96 | |
| MurmurOAAT | 431.89 | 39.36 | 91% bias, collisions, distr |
| PMurHash32 | 1544.64 | 45.24 | |
| City32 | 3397.98 | 34.87 | |
| City64 | 9678.77 | 30.13 | 2 minor collisions |
| City128 | 9750.69 | 46.78 | |
| CityCrc128 | 12871.94 | 56.12 | |
| FarmHash32 | 12852.46 | 27.88 | |
| FarmHash64 | 14164.64 | 29.82 | |
| FarmHash128 | 16153.90 | 44.99 | |
| SipHash | 779.74 | 154.68 | |
| Spooky32 | 9223.07 | 51.10 | |
| Spooky64 | 9189.83 | 52.43 | |
| Spooky128 | 8883.58 | 52.30 | |
| xxHash32 | 5780.04 | 35.02 | collisions with 4bit diff |
| xxHash64 | 7909.28 | 42.33 | |
| metrohash64_1 | 9305.80 | 34.34 | |
| metrohash64_2 | 9303.72 | 32.81 | |
| metrohash64crc_1 | 14215.93 | 25.77 | cyclic collisions with 8 byte |
| metrohash64crc_2 | 13538.51 | 31.93 | cyclic collisions with 8 byte |
| metrohash64crc_1 | 14215.93 | 25.77 | cyclic collisions 8 byte |
| metrohash64crc_2 | 13538.51 | 31.93 | cyclic collisions 8 byte |
| metrohash128_1 | 9281.99 | 41.60 | |
| metrohash128_2 | 9202.54 | 37.06 | |
| metrohash128crc_1 | 13657.21 | 37.44 | |
Expand All @@ -62,10 +66,11 @@ See also the old [https://code.google.com/p/smhasher/w/list](https://code.google

So the fastest hash functions on x86_64 without quality problems are:

* Metro
* FarmHash
* Metro (_but not 64crc yet, WIP_)
* Spooky32
* xxHash64
* City
* City (_deprecated_)

Hash functions for symbol tables or hash tables typically use 32 bit hashes,
for databases and file systems typically 64 or 128bit, for crypto now starting with 256 bit.
Expand Down
Loading

0 comments on commit 0414c0c

Please sign in to comment.