In [1]:
import math
import hashlib

In [2]:
# ## Setup methods

In [3]:
def method_1(istr):
    byte_list = istr.encode('utf-8') + b'j/3als-0423'
    maxVal = int.from_bytes([0xff] * 4, byteorder='big', signed=False)
    Q = 9874314047312
    M = 909090909090909091 # prime number
    result = 0
    for b in byte_list:
        result += result * Q + b * b
    result = result % M
    result = result % maxVal
    rbytes = result.to_bytes(4, byteorder='big', signed=False)

    return rbytes

def method_md5(istr):
    return hashlib.md5(istr.encode('utf-8')).digest()

def method_sha224(istr):
    return hashlib.sha224(istr.encode('utf-8')).digest()

def method_sha512(istr):
    return hashlib.sha512(istr.encode('utf-8')).digest()

def method_shake256(istr):
    return hashlib.shake_256(istr.encode('utf-8')).digest(4)

def display_result(buckets, ntests):
    ideal_ratio = 1.0 / len(buckets)

    for i in range(nbuckets):
        ratio = buckets[i] / float(ntests)
        print("{}: {}, {:.3f}, ±{:.3f}".format(i, buckets[i], ratio, math.fabs(ratio - ideal_ratio)))
    print("")

In [4]:
# Test setup
ntests = 23153
nbuckets = 10
truncate_len = 4

maxVal = int.from_bytes([0xff] * truncate_len, byteorder='big', signed=False)

In [5]:
# ## Custom made hash function

In [6]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_1(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

display_result(buckets, ntests)

0: 2308, 0.100, ±0.000
1: 2345, 0.101, ±0.001
2: 2323, 0.100, ±0.000
3: 2155, 0.093, ±0.007
4: 2280, 0.098, ±0.002
5: 2298, 0.099, ±0.001
6: 2291, 0.099, ±0.001
7: 2348, 0.101, ±0.001
8: 2385, 0.103, ±0.003
9: 2420, 0.105, ±0.005

Wall time: 186 ms


In [7]:
# ## MD5

In [8]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_md5(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

display_result(buckets, ntests)

0: 2325, 0.100, ±0.000
1: 2311, 0.100, ±0.000
2: 2362, 0.102, ±0.002
3: 2301, 0.099, ±0.001
4: 2319, 0.100, ±0.000
5: 2314, 0.100, ±0.000
6: 2300, 0.099, ±0.001
7: 2299, 0.099, ±0.001
8: 2341, 0.101, ±0.001
9: 2281, 0.099, ±0.001

Wall time: 70 ms


In [9]:
# ## SHA224

In [10]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_sha224(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

display_result(buckets, ntests)

0: 2262, 0.098, ±0.002
1: 2278, 0.098, ±0.002
2: 2373, 0.102, ±0.002
3: 2305, 0.100, ±0.000
4: 2290, 0.099, ±0.001
5: 2329, 0.101, ±0.001
6: 2440, 0.105, ±0.005
7: 2320, 0.100, ±0.000
8: 2272, 0.098, ±0.002
9: 2284, 0.099, ±0.001

Wall time: 65.5 ms


In [11]:
# ## SHA512

In [12]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_sha512(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

display_result(buckets, ntests)

0: 2480, 0.107, ±0.007
1: 2300, 0.099, ±0.001
2: 2277, 0.098, ±0.002
3: 2238, 0.097, ±0.003
4: 2348, 0.101, ±0.001
5: 2321, 0.100, ±0.000
6: 2393, 0.103, ±0.003
7: 2331, 0.101, ±0.001
8: 2190, 0.095, ±0.005
9: 2275, 0.098, ±0.002

Wall time: 71 ms


In [13]:
# ## Shake 256

In [14]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_shake256(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

display_result(buckets, ntests)

0: 2289, 0.099, ±0.001
1: 2288, 0.099, ±0.001
2: 2352, 0.102, ±0.002
3: 2329, 0.101, ±0.001
4: 2303, 0.099, ±0.001
5: 2340, 0.101, ±0.001
6: 2340, 0.101, ±0.001
7: 2337, 0.101, ±0.001
8: 2343, 0.101, ±0.001
9: 2232, 0.096, ±0.004

Wall time: 61 ms
