In [1]:
#%autosave 0
from IPython.core.display import HTML, display
display(HTML('<style>.container { width:100%; !important } </style>'))

# Hash Tables (Open Hashing)

Given a string $s$ and the $\texttt{size}$ of the hash table, the function $\texttt{hash_code}(s, \texttt{size})$ calculates the hash code of $s$.  For a string $s = c_0c_1\cdots c_n$ of length $n+1$, this function is defined as follows:
$$ \texttt{hash_code}(s, \texttt{size}) = \left(\sum\limits_{i=0}^{n} \texttt{ord}(c_i) \cdot 128^i\right) \;\texttt{%}\; \texttt{size}  $$
In order to prevent overflows when computing the numbers $128^i$ we can define the partial sum $s_k$ for
$k=n+1,n,\cdots,1,0$ by backward induction: 
  - $s_{n+1} = 0$,
  - $s_{k} = \bigl(\texttt{ord}(c_{k}) + s_{k+1} \cdot 128 \bigr) \;\texttt{%}\; \texttt{size}$.

Then we have
$$ s_0 = \left(\sum\limits_{i=0}^n \texttt{ord}(c_i) \cdot 128^i\right) \;\texttt{%}\; \texttt{size}. $$

In [None]:
def hash_code(s, size):
    if s == '':
        return 0
    return (ord(s[0]) + 128 * hash_code(s[1:], size)) % size

Let us test this function.

In [None]:
hash_code('Exmatrikulator', 10613)

In [None]:
class HashTable:
    def __init__(self, n):
        self.mSize    = n
        self.mEntries = 0  # number of entries
        self.mArray   = [ [] for i in range(self.mSize) ]
        self.mAlpha   = 2  # load factor

Hash tables work best if their size is a prime number.  Therefore, the variable `Primes` stores a list of prime numbers.  
These numbers are organized so that `Primes[i+1]` is roughly twice as big as `Primes[i]`.

In [None]:
HashTable.Primes = [ 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 
                     8191, 16381, 32749, 65521, 131071, 262139, 524287, 
                     1048573, 2097143, 4194301, 8388593, 16777213, 
                     33554393, 67108859, 134217689, 268435399, 
                     536870909, 1073741789, 2147483647 
                   ]

In [None]:
def find(self, key):
    index = hash_code(key, self.mSize)
    aList = self.mArray[index];
    for (k, v) in aList:
        if k == key:
            return v

HashTable.find = find

In [None]:
def insert(self, key, value):
    if self.mEntries >= self.mSize * self.mAlpha:
        print('mEntries =', self.mEntries, 'mSize =', self.mSize)
        self._rehash()
    index = hash_code(key, self.mSize)
    aList = self.mArray[index]
    for i, (k, v) in enumerate(aList):
        if k == key:
            aList[i] = (key, value) 
            return
    self.mEntries += 1
    aList.append((key, value))
    
HashTable.insert = insert

In [None]:
def _rehash(self):
    for p in HashTable.Primes:
        if p * self.mAlpha > self.mEntries:
            prime = p
            break
    biggerTable = HashTable(prime)
    for aList in self.mArray:
        for k, v in aList:
            biggerTable.insert(k, v)
    self.mSize  = prime
    self.mArray = biggerTable.mArray
    
HashTable._rehash = _rehash

In [None]:
def delete(self, key):
    index = hash_code(key, self.mSize)
    aList = self.mArray[index]
    for i, (k, v) in enumerate(aList):
        if k == key:
            aList.pop(i)
            self.mEntries -= 1
            return 

HashTable.delete = delete

In [None]:
def __repr__(self):
    result = ''
    for i, aList in enumerate(self.mArray):
        result += f'{i}: {aList}\n'
    return result

HashTable.__repr__ = __repr__

In [None]:
t = HashTable(3)
t.insert('Adrian', 8)
t

In [None]:
t.insert('Benjamin', 24)
t

In [None]:
t.insert('Bereket', 1)
t

In [None]:
t.insert('Christian', 13)
t

In [None]:
t.insert('Christian', 14)
t

In [None]:
t.find('Adrian'), t.find('Christian'), t.find('Benjamin')

In [None]:
t.insert('David', 22)
t

In [None]:
t.insert('Ephraim', 19)
t

In [None]:
t.insert('Erwin', 26)
t

In [None]:
t.insert('Felix', 4)
t

In [None]:
t.insert('Florian', 9)
t

In [None]:
t.insert('Giorgio', 20)
t

In [None]:
t.insert('Jan', 7)
t

In [None]:
t.insert('Janis', 16)
t

In [None]:
t.insert('Josia', 18)
t

In [None]:
t.insert('Kai', 3)
t

In [None]:
t.insert('Lars', 21)
t

In [None]:
t.insert('Lucas', 0)
t

In [None]:
t.insert('Marcel', 5)
t

In [None]:
t.insert('Marius', 6)
t

In [None]:
t.insert('Markus', 17)
t

In [None]:
t.insert('Matthias', 10)
t

In [None]:
t.insert('Nick', 11)
t

In [None]:
t.insert('Patrick', 23)
t

In [None]:
t.insert('Petra', 27)
t

In [None]:
t.insert('Rene', 15)
t

In [None]:
t.insert('Sebastian', 25)
t

In [None]:
t.insert('Stefan', 2)
t

In [None]:
t.delete('Adrian')
t

In [None]:
t.delete('Adrian')
t.delete('Benjamin')
t.delete('Bereket')
t.delete('Christian')
t.delete('Christian')
t.delete('David')
t.delete('Ephraim')
t.delete('Erwin')
t.delete('Felix')
t.delete('Florian')
t.delete('Giorgio')
t.delete('Jan')
t.delete('Janis')
t.delete('Josia')
t.delete('Kai')
t.delete('Lars')
t.delete('Lucas')
t.delete('Marcel')
t.delete('Marius')
t.delete('Markus')
t.delete('Matthias')
t.delete('Nick')
t.delete('Patrick')
t.delete('Petra')
t.delete('Rene')
t.delete('Sebastian')
t.delete('Stefan')
t