# Hashtables

Hashtables are associative arrays. Basically like a dictionary, there is a record with a key and the values (data) <br>

You hash the key using the hash function to get the hash value that acts as the index to insert the new record into.
- This can be seen in the exmaple below, where the names of individuals are passed through the hashing function to get the index of the key value pair in the hashtable
- e.g. `'John Smith'` is passed through the hashing function, and the resulting index to insert the record into the hash table is 2

The hashing function that you use depends on the question, and what the question requirements are. You could be using inbuilt functions, choosing a random index based on some rules etc. 

<div style = 'background: white; height = auto'>
    <img src = 'https://upload.wikimedia.org/wikipedia/commons/thumb/7/7d/Hash_table_3_1_1_0_1_0_0_SP.svg/1200px-Hash_table_3_1_1_0_1_0_0_SP.svg.png' width = '800'>
</div>

## Hashtable by probing

When the hash function returns a hash value that is the same for different keys, what linear probing does is to search for the next avaliable spot next to the hash value index. This is though, prone to clustering where a lot of the records are placed in close proximity to each other

<img src = 'https://www.cs.emory.edu/~cheung/Courses/253/Syllabus/Map/FIGS/map44c.gif'>

This is a example of linear probing, i.e. if the spot that you want to slot in is full, you check the next slot for avaliability until you find a empty slot
- Once you pass the key through the hashing function, if the index that is specified already has a record, we then try to search for the next avaliable spot to fit the new record

In [1]:
class HashTable:

    def __init__(self, size):
        self.array = [None] * size # Array for hash table
        self.size = size

    def hash(self, key):
        # Depends on what algorithm you design
        # This hash algorithm is prone to clustering, where records are recorded in close proximity to each other
        hash_val = len(key) % self.size
        print(hash_val)

        return hash_val

    # Insertion algorithm of linear probing (open addressing - if the address is not open, search for another location
    # or closed hashing - enclosed within the original table)
    def insert(self, key, val):

        hash_val = self.hash(key)

        if self.array[hash_val] == None:
            self.array[hash_val] = (key, val)
            return 1
        
        new_hash_val = hash_val
        
        while self.array[new_hash_val] != None and new_hash_val != hash_val:
            new_hash_val = (hash_val + 1) % self.size

        if new_hash_val != hash_val:
            self.array[new_hash_val] = (key, val)
            return 1
        
        return -1

    # Search Probing

    def search(self, key, data):
        hash_val = self.hash(key)

        if self.array[hash_val][1] == data and self.array[hash_val][0] == key:
            return f'Found at {hash_val}, ({key} : {data})'
        
        probe = (hash_val + 1) % self.size

        while probe != hash_val:
            if self.array[probe] == data:
                return f'Found at {probe} ({key} : {data})'
            
            probe = (probe + 1) % self.size

        return -1
    
    def display(self):

        print(f'{"Index":<15} | {"Value"}')
        print('_'*100)

        index = 0
        for item in self.array:
            print(f'{index:<15} | {item}')
            index += 1


ht = HashTable(size = 20)
ht.insert('abcdefg', 5)
ht.insert('ab', 2)
ht.insert('abc', 6)
ht.insert('abcd', 8)

ht.display()

ht.search('ab', 2)

7
2
3
4
Index           | Value
____________________________________________________________________________________________________
0               | None
1               | None
2               | ('ab', 2)
3               | ('abc', 6)
4               | ('abcd', 8)
5               | None
6               | None
7               | ('abcdefg', 5)
8               | None
9               | None
10              | None
11              | None
12              | None
13              | None
14              | None
15              | None
16              | None
17              | None
18              | None
19              | None
2


'Found at 2, (ab : 2)'

## Hashtable by Chaining - array implementation

<div style= 'height: auto'>
    <img src = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcTfv8WNTc-Yuei6RrVKJu4sK_CeM8ILy27WgQhD_W1jsA&s' width = '700px'>
</div>

For chaining, instead of trying to probe for the next slot for avaliability, you just append it to a linked list / array at the hashtable record index.

Chaining addresses the potential collision in the hash table, where the hash function generates the same hash value for a different key. 

In [2]:
class HashTable: #Chaining using nested list
    
    def __init__(self, size):
        self.array = []
        self.size = size
        for i in range(size):
            self.array.append([])
        
    #Bad hash function algo, only for demo 
    #causes clustering, many records will be hashed in close proximity of one another
    #collision - 2 or more records are hashed to same location
    def hash_func(self, key): #depends on the algorithm designed for the system 
        hash_val = key % len(str(key))
        return hash_val    
    
    def insert(self, key, val):
        
        hash_val = self.hash_func(key)
        
        self.array[hash_val].append((key, val))
        
        return
    
    def search(self, key, data):
        
        hash_val = self.hash_func(key)
        
        count = 0
        for item in self.array[hash_val]:
            if item[0] == key and item[1] == data:
                return f'Found at {hash_val}, location {count}'
            count += 1
            
        return -1
    

## Hashtable using chaining - linked list implementation

In [3]:
#Chaining using Linked List
class Node:
    
    def __init__(self, key, val, nxt = None):
        self.key = key
        self.val = val
        self.nxt = nxt
        
class LinkedList:
    
    def __init__(self):
        self.head = None

    def __str__(self):
        stuff = ''

        current = self.head

        while current != None:
            stuff += f'({current.key} : {current.val}) --> ' 
            if current.nxt == None:
                stuff += f'({current.key}: {current.val})'
            current = current.nxt
            
        return stuff
        
    def insert(self, key, data):
        new_node = Node(key, data)
        
        if self.head == None:
            self.head = new_node
            
        else:
            new_node.nxt = self.head
            self.head = new_node

           
class HashTable:
    
    def __init__(self, size):
        self.array = []
        for i in range(size):
            bucket = LinkedList()
            self.array.append(bucket)
        self.size = size
        
    #Bad hash function algo, only for demo 
    #causes clustering, many records will be hashed in close proximity of one another
    #collision - 2 or more records are hashed to same location
        
    def hash_func(self, key): #depends on the algorithm designed for the system 
        hash_val = len(str(key)) % self.size
        return hash_val    
     
        
    def insert(self, key, val):
        
        hash_val = self.hash_func(key)
        
        self.array[hash_val].insert(key, val) #Calls the insert of the linked list
        return
    
    def search(self, key):

        hash_val = self.hash_func(key)

        bucket = self.array[hash_val]

        current = bucket.head

        location = 0
        while current.key != key:
            if current.nxt == None:
                return -1
            
            current = current.nxt
            location += 1

        return (hash_val - 1, current.val)
    
    def display(self):
        print(f'{"Index":<10} | {"Value"}')
        index = 0
        for ll in self.array:
            print(f'{index:<10} | {ll}')
            index += 1

hashtable = HashTable(5)
hashtable.insert('abc', 123)
hashtable.insert('def', 123)
hashtable.insert('abcdefg', 256)
hashtable.insert('abcd', 456)
hashtable.display()
hashtable.search('def')

Index      | Value
0          | 
1          | 
2          | (abcdefg : 256) --> (abcdefg: 256)
3          | (def : 123) --> (abc : 123) --> (abc: 123)
4          | (abcd : 456) --> (abcd: 456)


(2, 123)