# Chapter 3: Better Living Through Better Hashing

## Setup

In [1]:
from datetime import date
from typing import Any
import calendar
from matplotlib import pyplot as plt

## Associating Values with Keys

In [2]:
month_length = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
months_array = [
    "January",
    "February",
    "March",
    "April",
    "May",
    "June",
    "July",
    "August",
    "September",
    "October",
    "November",
    "December",
]
idx = months_array.index("February")
print(f"Febrary has {month_length[idx]} days")

Febrary has 28 days


In [3]:
# Code to print readable calendar for any month and year
def print_month(month: str, year: int):
    idx = months_array.index(month)
    day = 1
    wd = date(year, idx + 1, day).weekday()
    wd = (wd + 1) % 7
    end = month_length[idx]
    if calendar.isleap(year) and idx == 1:
        end += 1
    print(f"{month} {year}".center(20))
    print("Su Mo Tu We Th Fr Sa")
    print("   " * wd, end="")
    while day <= end:
        print(f"{day:2d} ", end="")
        wd = (wd + 1) % 7
        day += 1
        if wd == 0:
            print()
    print()

In [4]:
print_month("February", 2024)

   February 2024    
Su Mo Tu We Th Fr Sa
             1  2  3 
 4  5  6  7  8  9 10 
11 12 13 14 15 16 17 
18 19 20 21 22 23 24 
25 26 27 28 29 


In [5]:
days_in_months = {
    "January": 31,
    "February": 28,
    "March": 31,
    "April": 30,
    "May": 31,
    "June": 30,
    "July": 31,
    "August": 31,
    "September": 30,
    "October": 31,
    "November": 30,
    "December": 31,
}
print(f"April has {days_in_months['April']} days")

April has 30 days


In [6]:
def base26(word: str) -> int:
    val = 0
    for ch in word.lower():
        new_digit = ord(ch) - ord("a")
        val = 26 * val + new_digit
    return val

## A Hastable Structure for (Key, Value) Pairs

In [7]:
class Entry:
    def __init__(self, key: Any, value: Any):
        self.key = key
        self.value = value


class Hashtable:
    def __init__(self, M: int = 10):
        self.table = [None] * M
        self.M = M

    def get(self, key: Any) -> Any:
        hc = hash(key) % self.M
        return self.table[hc].value if self.table[hc] else None

    def put(self, key: Any, value: Any):
        hc = hash(key) % self.M
        entry = self.table[hc]
        if entry:
            if entry.key == key:
                entry.value = value
            else:
                raise RuntimeError(f"Key collision: {key} and {entry.key}")
        else:
            self.table[hc] = Entry(key, value)

In [8]:
table = Hashtable(1000)
table.put("April", "30")
table.put("May", "31")
table.put("June", "30")

print(table.get("April"))
print(table.get("August"))


30
None


## Detecting and Resolving Collisions with Linear Probing

In [9]:
# Hashtable with Linear Probing
class LinearProbingHashtable:
    def __init__(self, M: int = 10):
        self.table = [None] * M
        self.M = M
        self.N = 0

    def get(self, key):
        hash_code = hash(key) % self.M
        while self.table[hash_code]:
            if self.table[hash_code].key == key:
                return self.table[hash_code].value
            hash_code = (hash_code + 1) % self.M
        return None

    def put(self, key, value):
        hash_code = hash(key) % self.M
        while self.table[hash_code]:
            if self.table[hash_code].key == key:
                self.table[hash_code].value = value
                return
            hash_code = (hash_code + 1) % self.M
        if self.N >= self.M - 1:
            raise RuntimeError("Table is full")

        self.table[hash_code] = Entry(key, value)
        self.N += 1

## Separate Chaining with Linked Lists

In [10]:
class LinkedEntry:
    def __init__(self, key, value, next=None):
        self.key = key
        self.value = value
        self.next = next

In [12]:
class LinkedListHashtable:
    def __init__(self, M: int = 10):
        self.table = [None] * M
        self.M = M
        self.N = 0

    def get(self, key):
        hash_code = hash(key) % self.M
        entry = self.table[hash_code]
        while entry:
            if entry.key == key:
                return entry.value
            entry = entry.next
        return None

    def put(self, key, value):
        hash_code = hash(key) % self.M
        entry = self.table[hash_code]
        while entry:
            if entry.key == key:
                entry.value = value
                return
            entry = entry.next

        self.table[hash_code] = LinkedEntry(key, value, self.table[hash_code])
        self.N += 1

    def remove(self, key):
        hash_code = hash(key) % self.M
        entry = self.table[hash_code]
        prev = None
        while entry:
            if entry.key == key:
                if prev:
                    prev.next = entry.next
                else:
                    self.table[hash_code] = entry.next
                self.N -= 1
                return entry.value
            prev, entry = entry, entry.next
        return None

## Growing Hashtables

In [None]:
class DynamicHashtable:
    def __init__(self, M: int = 10):
        self.table = [None] * M
        self.M = M
        self.N = 0
        self.load_factor = 0.75
        self.threshold = min(M * self.load_factor, M - 1)

    def get(self, key):
        pass

    def put(self, key, value):
        hash_code = hash(key) % self.M
        entry = self.table[hash_code]
        while entry:
            if entry.key == key:
                entry.value = value
                return
            entry = entry.next
        self.table[hash_code] = LinkedEntry(key, value, self.table[hash_code])
        self.N += 1
        if self.N >= self.threshold:
            self.resize(2 * self.M + 1)
    
    def resize(self, new_size):
        temp = DynamicHashtable(new_size)
        for n in self.table:
            while n:
                temp.put(n.key, n.value)
                n = n.next
        self.table = temp.table
        self.M = temp.M
        self.threshold = self.load_factor * self.M

## Perfect Hashing

In [16]:
G = [0, 8, 1, 4, 7, 10, 2, 0, 9, 11, 1, 5]
s1 = [9, 4, 8, 6, 6]
s2 = [2, 10, 6, 3, 5]


def hash_f(key, T):
    return sum(T[i % 5] * ord(c) for i, c in enumerate(key)) % 12


def perfect_hash(key):
    return (G[hash_f(key, s1)] + G[hash_f(key, s2)]) % 12

## Iterate over (key, value) pairs

In [17]:
def __iter__(self):
    for entry in self.table:
        if entry:
            yield (entry.key, entry.value)


def __iter__(self):
    for entry in self.table:
        while entry:
            yield (entry.key, entry.value)
            entry = entry.next