# Imports

In [19]:
from pathlib import Path
import os
from plot_utils import set_title_axes_labels, fig_setup, finalize
import numpy as np
import matplotlib.pyplot as plt
import random

DATA_DIR = Path(os.getcwd()) / "data"

# Unicode -- character-to-integer encoding

In [7]:
print(f"Unicode value of 'c': {ord('c')}")
print(f"Unicode value of '1': {ord('1')}")

Unicode value of 'c': 99
Unicode value of '1': 49


# Hashing functions

- [`ord()` documentation](https://docs.python.org/3/library/functions.html#ord)
- [`hash()` documentation](https://docs.python.org/3/library/functions.html#hash)

In [8]:
def unicode_sum_hash(key, N):
    s = 0
    for i in range(len(key)):
        # ord(c) gets corresponding Unicode integer of character c 
        s += ord(key[i])
    return s % N

def polynomial_rolling_hash(key, N, p=53, m=2**64):
    """
    Guidelines: 
    - set p to be a prime number roughly equal to the number of 
        characters in the input alphabet
    - set m quite large number, since the probability of two random 
        strings colliding is about 1/m. Sometimes m=2^64 is chosen
    """
    s = 0
    for i in range(len(key)):
        s += ord(key[i]) * p**i
    s = s % m
    return s % N 

def python_hash(key, N):
    return hash(key) % N

## Hash function uniformity

TO DO

# Hash tables

In [None]:
class KeyValue:
    def __init__(self, key, value):
        self.key = key
        self.value = value

class Node:
    def __init__(self, data):
        self.data = data
        self.next = None


class LinkedList:
    def __init__(self):
        self.head = None

    def insert_node_at_beginning(self, new_data):
        """ 
        Insert a new node at the beginning of the linked list
        """
        # Make new node point to what was head
        new_node = Node(data=new_data)
        new_node.next = self.head

        # Update what is head
        self.head = new_node
        

    def search(self, query_data):
        """ 
        Determine if there's a node in the linked list whose data == query_data.
        Return True if there is such a node and False otherwise
        """
        # TODO
        pass


class ChainHashTable:
    """Hash table with linked list chaining to handle collisions"""
    def __init__(self, N, hash_fcn):
        self.hash_fcn = hash_fcn
        self.N = N
        self.array = [ LinkedList() for i in range(N) ]
        self.num_added_keys = 0

    def insert(self, key, value):
        """
        Insert (key, value) pair into hash table
        """
        hash_idx = self.hash_fcn(key, self.N)

        self.array[hash_idx].insert_node_at_beginning(
            new_data=KeyValue(key=key, value=value)
        )
        self.num_added_keys += 1

    def get(self, key):
        """ 
        Given a key, get the corresponding (key, value) pair if it exists
        """
        hash_idx = self.hash_fcn(key, self.N)
        
        # Handle empty linked list
        head = self.array[hash_idx].head
        if head is None:
            return None
        
        # Hanle non-empty linked list
        curr_node = head
        if curr_node.data.key == key:
            return curr_node.data.value
        
        while curr_node.next is not None:
            if curr_node.data.key == key:
                return curr_node.data.value
            curr_node = curr_node.next

        return None

ht = ChainHashTable(N=1000, hash_fcn=polynomial_rolling_hash)
ht.insert(key="a", value=1)
print(ht.get("b"))
print(ht.get("a"))

None
1
