# Imports

In [None]:
from pathlib import Path
import os
from plot_utils import set_title_axes_labels, fig_setup, finalize
import numpy as np
import matplotlib.pyplot as plt
import random

DATA_DIR = Path(os.getcwd()) / "data"

# Unicode -- character-to-integer encoding

In [None]:
print(f"Unicode value of 'c': {ord('c')}")
print(f"Unicode value of '1': {ord('1')}")

# Hashing functions

- [`ord()` documentation](https://docs.python.org/3/library/functions.html#ord)
- [`hash()` documentation](https://docs.python.org/3/library/functions.html#hash)

In [None]:
def unicode_sum_hash(key, N):
    s = 0
    for i in range(len(key)):
        # ord(c) gets corresponding Unicode integer of character c 
        s += ord(key[i])
    return s % N

def polynomial_rolling_hash(key, N, p=53, m=2**64):
    """
    Guidelines: 
    - set p to be a prime number roughly equal to the number of 
        characters in the input alphabet
    - set m quite large number, since the probability of two random 
        strings colliding is about 1/m. Sometimes m=2^64 is chosen
    """
    s = 0
    for i in range(len(key)):
        s += ord(key[i]) * p**i
    s = s % m
    return s % N 

def python_hash(key, N):
    return hash(key) % N

## Hash function uniformity

TO DO

# Hash tables

In [None]:
class KeyValue:
    def __init__(self, key, value):
        self.key = key
        self.value = value
    def __repr__(self):
        return f"key: {self.key}; value: {self.value}"

class Node:
    def __init__(self, data, next=None):
        self.data = data
        self.next = None


class LinkedList:
    def __init__(self):
        self.head = None

    def insert_node_at_beginning(self, new_data):
        """ 
        Insert a new node at the beginning of the linked list
        """
        # Make new node point to what was HEAD
        new_node = Node(data=new_data)
        new_node.next = self.head

        # Update HEAD to new node
        self.head = new_node

class ChainedHashTable:
    """Hash table with linked list chaining to handle collisions"""
    def __init__(self, N, hash_fcn):
        self.hash_fcn = hash_fcn
        self.N = N
        self.array = [ LinkedList() for i in range(N) ]
        self.num_added_keys = 0

    def insert(self, key, value):
        """
        Insert (key, value) pair into hash table
        """
        new_data = KeyValue(key=key, value=value)
        hash_val = self.hash_fcn(key=key, N=self.N)
        print(f"Inserting {new_data} at index {hash_val}")
        self.array[hash_val].insert_node_at_beginning(new_data=new_data)
        
    def get(self, key):
        """ 
        Given a key, get the corresponding value if the key exists. Raise an 
        error if the key doesn't exist
        """
        # Get the array's linked list at the hash value of the key
        hash_val = self.hash_fcn(key=key, N=self.N)
        linked_list = self.array[hash_val]

        # Determine if the linked list contains a node with data.key == key
        curr_node = linked_list.head
        while curr_node is not None:
            if curr_node.data.key == key:
                return curr_node.data.value
            curr_node = curr_node.next
        raise KeyError(f"key='{key}' doesn't exist in chained hash table")


N = 10
hash_fcn = polynomial_rolling_hash
cht = ChainedHashTable(N=N, hash_fcn=hash_fcn)

hash_fcn(key="Ron", N=10)
hash_fcn(key="Penstemon", N=10)
hash_fcn(key="Lilac", N=10)

# cht.insert(key="Ron", value="Burgandy")
# cht.insert(key="Penstemon", value="Buttersworth")
# cht.insert(key="Lilac", value="Cornelius")

# cht.array[1].head.data
# cht.array[5].head.data
# cht.array[5].head.next.data

# cht.get(key="Ron")
# cht.get(key="Florence")

5

5

1

Inserting key: Ron; value: Burgandy at index 5
Inserting key: Penstemon; value: Buttersworth at index 5
Inserting key: Lilac; value: Cornelius at index 1


key: Lilac; value: Cornelius

key: Penstemon; value: Buttersworth

key: Ron; value: Burgandy

'Burgandy'

KeyError: "key='Florence' doesn't exist in chained hash table"