In [1]:
import math
import hashlib

In [2]:
# ## Setup methods

In [3]:
def method_1(istr):
    byte_list = istr.encode('utf-8') + b'j/3als-0423'
    maxVal = int.from_bytes([0xff] * 4, byteorder='big', signed=False)
    Q = 9874314047312
    M = 909090909090909091 # prime number
    result = 0
    for b in byte_list:
        result += result * Q + b * b
    result = result % M
    result = result % maxVal
    rbytes = result.to_bytes(4, byteorder='big', signed=False)

    return rbytes

def method_md5(istr):
    return hashlib.md5(istr.encode('utf-8')).digest()

def method_sha224(istr):
    return hashlib.sha224(istr.encode('utf-8')).digest()

def method_sha512(istr):
    return hashlib.sha512(istr.encode('utf-8')).digest()

def method_shake256(istr):
    return hashlib.shake_256(istr.encode('utf-8')).digest(4)

In [4]:
# Test setup
ntests = 13153
nbuckets = 5
truncate_len = 4
maxVal = int.from_bytes([0xff] * truncate_len, byteorder='big', signed=False)

In [5]:
# ## Custom made hash function

In [6]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_1(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

for x in ["{}: {}, {:.3f}".format(i, buckets[i], buckets[i] / float(ntests)) for i in range(nbuckets)]:
    print(x)
print("")

0: 2656, 0.202
1: 2562, 0.195
2: 2584, 0.196
3: 2609, 0.198
4: 2742, 0.208

Wall time: 105 ms


In [7]:
# ## MD5

In [8]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_md5(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

for x in ["{}: {}, {:.3f}".format(i, buckets[i], buckets[i] / float(ntests)) for i in range(nbuckets)]:
    print(x)
print("")

0: 2623, 0.199
1: 2625, 0.200
2: 2659, 0.202
3: 2598, 0.198
4: 2648, 0.201

Wall time: 47 ms


In [9]:
# ## SHA224

In [10]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_sha224(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

for x in ["{}: {}, {:.3f}".format(i, buckets[i], buckets[i] / float(ntests)) for i in range(nbuckets)]:
    print(x)
print("")

0: 2610, 0.198
1: 2620, 0.199
2: 2627, 0.200
3: 2682, 0.204
4: 2614, 0.199

Wall time: 40 ms


In [11]:
# ## SHA512

In [12]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_sha512(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

for x in ["{}: {}, {:.3f}".format(i, buckets[i], buckets[i] / float(ntests)) for i in range(nbuckets)]:
    print(x)
print("")

0: 2741, 0.208
1: 2517, 0.191
2: 2626, 0.200
3: 2683, 0.204
4: 2586, 0.197

Wall time: 40 ms


In [13]:
# ## Shake 256

In [14]:
%%time
buckets = [0]*nbuckets
for i in range(ntests):
    istr = "ab{}bc".format(i)
    val = int.from_bytes(method_shake256(istr)[:truncate_len], byteorder='big', signed=False)
    ratio = val / float(maxVal)

    idx = math.floor(ratio * nbuckets)
    buckets[idx] += 1

for x in ["{}: {}, {:.3f}".format(i, buckets[i], buckets[i] / float(ntests)) for i in range(nbuckets)]:
    print(x)
print("")

0: 2554, 0.194
1: 2717, 0.207
2: 2665, 0.203
3: 2586, 0.197
4: 2631, 0.200

Wall time: 33 ms
