# Hash Table


### Hash function
 * A hash function is any function that can be used to map data of arbitrary size onto data of a fixed size. The values returned by a hash function are called hash values, hash codes, digests, or simply hashes.

### Collision handling <br>
* <b>open addresiing</b> : open addressing is a collision resolution strategy where collisions are resolved by storing the colliding key in a different location when the natural choice is full.<br>
 - linear probing
 - quad probing
 - random probing<br>
 (These 3 startegies have clustering, linear is the first)
 - double hashing
 <p>
* <b>closed addressing</b> : a closed hashing implementation is one in which the elements stay in the array rather than being placed in an auxiliary collision set, such as a linked list.
 - chaning

In [9]:
class Hash_Linear:
    def __init__(self, m):
        self.m = m
        self.h = [None] * m
        
    def isfull(self):
        if None in self.h:
            return False
        else:
            return True
    
    def insert(self, key, item):
        if self.isfull() == True:
            print("hash full")
        else:
            idx = key % self.m
            if self.h[idx] == None:
                self.h[idx] = [key, item]
            else:
                for j in range(1, self.m + 1):
                    nextidx = (idx + j) % self.m
                    if self.h[nextidx] == None:
                        self.h[nextidx] = [key, item]
                        break
                        
    def get(self, key):
        idx = key % self.m
        if self.h[idx][0] == key:
            return self.h[idx][1]
        else:
            for j in range(1, self.m + 1):
                nextidx = (idx + j) % self.m
                if self.h[nextidx][0] == key:
                    return self.h[nextidx][1]
            print("item not found")
            
            
x = [25, 37, 18, 55, 22, 35, 50, 63, 95, 32, 1, 13, 17]
h = Hash_Linear(13)
for val in x:
    h.insert(val, 'a'+str(val))
    
for val in x:
    print(val, h.get(val))
    
h.get(101010)

25 a25
37 a37
18 a18
55 a55
22 a22
35 a35
50 a50
63 a63
95 a95
32 a32
1 a1
13 a13
17 a17
item not found


#### How to get appropriate value M?

* A : get prime number(Sieve of Eratosthenes)
<center><img src=" https://drive.google.com/uc?id=15gH9j7yKoUcCwGooSZnZvW5B6N7qVdKa" width="500" height="300" ></center>

In [13]:
def getPrime(n):
    import numpy as np
    isprime = np.array(list(range(n+1)))
    N_max = int(np.sqrt(n))
    for i in range(2, N_max):
        isprime[2*i::i] = 0
    isprime = np.setdiff1d(isprime, np.array([0,1]))
    return isprime[-1]

def getM(n):
    m1 = n * 3
    m2 = getPrime(m1)
    return m2

print(getPrime(100))
getM(100)

97


293

### Methods more efficiently handle clustering

* double hashing
$$ (h(key) + j*d(key)) \% M, j = 0,1,2, \cdots $$
<center><img src=" https://drive.google.com/uc?id=142rcrUHRJHDIV8n7kDK18Mbc2Rpimch0" width="500" height="300" ></center>  

 * d(key) = C - (key % C) 
 (C is a prime number smaller than M)

In [15]:
class DoubleHash:
    def __init__(self, x : list):
        k = len(x)
        self.m , self.c = self.getPrime(3 * k)
        self.h = [None] * self.m
        
    def getPrime(self, n):
        import numpy as np
        primenums = np.array(list(range(n+1)))
        N_max = int(np.sqrt(n))
        for j in range(2, N_max):
            primenums[2*j::j] = 0
        primenums = np.setdiff1d(primenums, np.array([0,1]))
        return primenums[-1], primenums[-2]
    
    def insert(self, key, item):
        if self.isfull() == True:
            print("hash full")
        else:
            idx = key % self.m
            if self.h[idx] == None:
                self.h[idx] = [key, item]
            else:
                for j in range(1, self.m):
                    nextidx = (idx + j * (self.c - key % self.c)) % self.m
                    if self.h[nextidx] == None:
                        self.h[nextidx] = [key, item]
                        break
    
    def isfull(self):
        if None in self.h:
            return False
        else:
            return True
        
    def get(self, key):
        idx = key % self.m
        if self.h[idx][0] == key:
            return self.h[idx][1]
        else:
            for j in range(1, self.m):
                nextidx = (idx + j * (self.c - key % self.c)) % self.m
                if self.h[nextidx][0] == key:
                    return self.h[nextidx][1]
            print("item not found")
            
h = DoubleHash(x)

for val in x:
    h.insert(val, 'a'+str(val))

print(h.m, h.c, h.h)
print(h.get(22))

37 31 [[37, 'a37'], [1, 'a1'], None, None, None, None, None, None, None, None, None, [32, 'a32'], None, [50, 'a50'], None, None, None, [17, 'a17'], [18, 'a18'], None, None, [95, 'a95'], [22, 'a22'], None, None, [25, 'a25'], [63, 'a63'], None, None, None, None, [13, 'a13'], [55, 'a55'], None, None, [35, 'a35'], None]
a22


### Chaining

In [17]:
class Node:
    def __init__(self, key = None, value = None):
        self.key = key
        self.value = value
        self.link = None
        
class LinkedList:
    def __init__(self):
        self.root = Node()
        
    def append(self, key, value):
        newNode = Node(key, value)
        curNode = self.root
        cnt = 0
        if curNode.key == None:
            self.root = newNode
        else:
            while curNode.link != None:
                cnt += 1
                curNode = curNode.link
            curNode.link = newNode
        return cnt

    def get(self, key):
        curNode = self.root
        if curNode.key == key:
            return curNode.value
        else:
            while curNode.link != None:
                curNode = curNode.link
                if curNode.key == key:
                    return curNode.value
            return None

class ChainHash:
    def __init__(self, x):
        k = len(x)
        self.m = self.getPrime(3 * k)
        self.h = [None] * self.m
        
    def getPrime(self, n):
        import numpy as np
        primenums = np.array(list(range(n+1)))
        N_max = int(np.sqrt(n))
        for j in range(2, N_max):
            primenums[2*j::j] = 0
        primenums = np.setdiff1d(primenums,np.array([0,1]))
        return primenums[-1]
    
    def insert(self, key, value):
        idx = key % self.m
        if self.h[idx] == None:
            self.h[idx] = LinkedList()
            self.h[idx].append(key, value)
        else:
            print(key, "충돌")
            self.h[idx].append(key, value)
            
    def get(self, key):
        idx = key % self.m
        return self.h[idx].get(key)

x = [25, 37, 18, 55, 22, 35, 50, 63]

h = ChainHash(x)

for val in x:
    h.insert(val, 'a'+str(val))

y = [26, 38, 19, 56, 23, 36, 51, 64]
for val in y:
    h.insert(val, 'a'+str(val))

print(h.get(64))
h.h

64 충돌
a64


[<__main__.LinkedList at 0x20b94f5d748>,
 None,
 <__main__.LinkedList at 0x20b86b93550>,
 <__main__.LinkedList at 0x20b94f5d438>,
 <__main__.LinkedList at 0x20b94f5d4e0>,
 <__main__.LinkedList at 0x20b94f5d780>,
 None,
 None,
 None,
 <__main__.LinkedList at 0x20b86b15d30>,
 <__main__.LinkedList at 0x20b94f5d9e8>,
 None,
 <__main__.LinkedList at 0x20b94f5d630>,
 <__main__.LinkedList at 0x20b94f5d5f8>,
 <__main__.LinkedList at 0x20b86b15f98>,
 <__main__.LinkedList at 0x20b94f5d898>,
 None,
 <__main__.LinkedList at 0x20b94f5da20>,
 <__main__.LinkedList at 0x20b86b15c18>,
 <__main__.LinkedList at 0x20b94f5d4a8>,
 None,
 None,
 <__main__.LinkedList at 0x20b94f5d8d0>]

### Two-way Chaining