Reading notes and partial solutions to [Data Structures and Algorithms in Python](https://blackwells.co.uk/bookshop/product/9781118290279?gC=f177369a3b&gclid=Cj0KCQjwhJrqBRDZARIsALhp1WTBIyoxeQGXedlVy80vsglvFbNkVf7jTP0Z0zXEIP87lfqbtb4_diYaAr8dEALw_wcB).

In [1]:
import random
from matplotlib import pyplot as plt
%matplotlib inline
import math
from datetime import datetime
import time
import numpy as np

# Maps, Hash Tables, and Skip Lists

## Maps and Dictionaries

### Map Abstract Base Class

In [6]:
from collections.abc import MutableMapping

class MapBase(MutableMapping):
    """Our own abstract base class with a nonpublic _Item class."""
    class _Item:
        """Composite to store key-value pairs as map items."""
        __slots__ = "_key", "_value"
        
        def __init__(self, k, v):
            self._key = k
            self._value = v
        
        def __eq__(self, other): # compare items based on keys
            return self._key == other._key
        
        def __ne__(self, other):
            return not (self == other)
        
        def __lt__(self, other):
            return self._key < other._key

### Unsorted Table Map

In [7]:
class UnsortedTableMap(MapBase):
    """Map implementation using an unordered list."""
    
    def __init__(self):
        """Create an empty map."""
        self._table = [] # list of MapBase._Items
        
    def __getitem__(self, k):
        """Return value associated with key k."""
        for item in self._table:
            if k == item._key:
                return item._value
        raise KeyError("Key Error: " + repr(k))
    
    def __setitem__(self, k, v):
        """Assign value v to key k, overwriting exiting value if present."""
        for item in self._table:
            if k == item._key:
                item._value = v # overwrite exiting value
                return # quit after found
        self._table.append(self._Item(k, v))
    
    def __delitem__(self, k):
        """Remove item associated with key k."""
        for j in range(len(self._table)):
            if k == self._table[j]._key:
                self._table.pop(j) # remove item at index j
                return # quit
        raise KeyError("Key Error: " + repr(k))
    
    def __len__(self):
        """Return number of items in the map."""
        return len(self._table)
    
    def __iter__(self):
        """Generate iteration of the map's keys."""
        for item in self._table:
            yield item._key

## Hash Tables

Hash table is a common data structure used to implement a map.

### Hash function

Two parts of a hash function:

1. Hash code: $\text{keys} \to \mathbb{Z}$.
2. Compression function: $\mathbb{Z} \to [0, N-1]$.

#### Hash code

1. If the order of individual components is not significant: Integer interpretation of bits. If the number of bits in the key is greater than the number of bits in the hash code, can truncate to the lower 32 bits, or combine the upper and lower 32 bits by summation or bitwise exclusive-or.
2. If the order is significant (e.g., strings): Polynomial hash code, cyclic shift hash code.

#### Compression function
1. Modulus (the division method): $x\to x \mod N$.
2. Multiply-Add-and-Divide (MAD method): $(x\to (ax+b) \mod p)\mod N$, where $p>N$ is a prime number, and $a, b\in [0, p-1]$, $a>0$.

### Collision-handling schemes

1. Separate chaining: Store collisions in separate containers in bucket. Load factor $\lambda$ should be $<0.9$, otherwise the hash table operations become inefficient.
2. Open addresing: Store collisions in hash table itself. $\lambda < 2/3$.
    1. Linear probing (may cause linear clustering). $\lambda < 0.5$.
    2. Quadratic probing (may cause secondary clustering).
    3. Double hashing.
    
The hash table is resized when the load factor exceeds the recommended threshold to regain efficiency.

### Hash Map Abstract Base Class

In [10]:
class HashMapBase(MapBase):
    """Abstract base class for map using hash-table with MAD compression."""
    
    def __init__(self, cap=11, p=109345121):
        """Create an empty hash-table map."""
        self._table = cap * [None]
        self._n = 0 # number of entries in the map
        self._prime = p # prime for MAD compression
        self._scale = 1 + randrange(p-1) # the scale constant, a, in MAD
        self._shift = randrange(p) # the shift constant, b, in MAD
    
    def _hash_function(self, k):
        """Compute hash based on index using the MAD method."""
        return (hash(k) * self._scale + self._shift) % self._prime % len(self._table)
    
    def __len__(self):
        return self._n
    
    def __getitem__(self, k):
        j = self._hash_function(k) # find the jth bucket
        return self._bucket_getitem(j, k) # get item with key k in bucket j
    
    def __setitem__(self, k, v):
        j = self._hash_function(k) # find the jth bucket
        self._bucket_setitem(j, k, v) # set item with key k to value v in the jth bucket, maintains self._n
        if self._n > len(self._table) // 2: # resize if needed to keep laod factor <= 0.5
            self._resize(2 * len(self._table) - 1)
    
    def __delitem__(self, k):
        j = self._hash_function(k)
        self._bucket_delitem(j, k) # delete item with key k from the jth bucket
        self._n -= 1
    
    def _resize(self, c):
        """Resize bucket array to capacity c."""
        old = list(self.items())
        self._table = c * [None]
        self._n = 0 # reset
        for (k,v) in old: # re-insert old k, v pair
            self[k] = v

### Hash Map with Separating Chaining

In [11]:
class ChainHashMap(HashMapBase):
    """Hash map implemented with separate chaining for collision resolution."""
    
    def _bucket_getitem(self, j, k):
        """Get item with key k in bucket at index j."""
        bucket = self._table[j]
        if bucket is None: # no match found
            raise KeyError("Key Error: " + repr(k))
        return bucket[k]
    
    def _bucket_setitem(self, j, k, v):
        """Set item with key k to value v in bucket at index j."""
        if self._table[j] is None:
            self._table[j] = UnsortedTableMap() # create new bucket at index j
        oldSize = len(self._table[j])
        self._table[j][k] = v
        if len(self._table[j]) > oldSize: # key is new to the table
            self._n += 1
    
    def _bucket_delitem(self, j, k):
        """Delete item with key k from bucket at index j."""
        bucket = self._table[j]
        if bucket is None: # no match found
            raise KeyError("Key Error: " + repr(k))
        del bucket[k]
    
    def __iter__(self):
        """Return an iteration of the entires (keys) in the map."""
        for bucket in self._table:
            if bucket is not None:
                for key in bucket:
                    yield key

### Hash Map with Linear Probing

In [14]:
class ProbeHashMap(HashMapBase):
    """Hash map implemented with linear probing for collision reolution."""
    _AVAIL = object() # sentinel marks locations of previous deletions
    
    def _is_available(self, j):
        """Return True if index j is available in table."""
        return self._table[j] is None or self._table[j] is ProbeHashMap._AVAIL
    
    def _find_slot(self, j, k):
        """Search for key k in at 'bucket' starting at index j.
        
        If match is found, return (True, index).
        Else, return (False, index for first available slot).
        """
        firstAvail = None
        while True:
            if self._is_available(j): # found a true empty slot
                if firstAvail is None: # skip over locations marked with previous deletions
                    firstAvail = j # mark this as first available slot
                if self._table[j] is None: # this is a true empty slot that is not a previous deletion, so search failed
                    return (False, firstAvail)
            elif k == self._table[j]._key: # found key k
                return (True, j)
            j = (j+1) % len(self._table)
    
    def _bucket_getitem(self, j, k):
        """Find item with key k at 'bucket' starting at index j."""
        found, s = self._find_slot(j, k)
        if not found:
            raise KeyError("Key Error: " + repr(k))
        return self._table[s]._value
    
    def __bucket_setitem(self, j, k, v):
        """Set item with key k to value v at 'bucket' starting at index j."""
        found, s = self._find_slot(j, k)
        if not found:
            self._table[s] = self._Item(k, v)
            self._n += 1
        else:
            self._table[s]._value = v
    
    def _bucket_delitem(self, j, k):
        """Delete item with key k at 'bucket' starting at index j."""
        found, s = self._find_slot(j, k)
        if not found:
            raise KeyError("Key Error: " + repr(k))
        self._table[s] = ProbeHashMap._AVAIL # mark position as deleted
    
    def __iter__(self):
        """Return an iteration of all entries in the map."""
        for j in range(len(self._table)):
            if not self._is_available(j): # if occupied (i.e., not empty and not deprecated)
                yield self._table[j]._key

## Sorted Maps

### Sorted Search Tables

In [None]:
class SortedTableMap(MapBase):
    """Map implementation using a sorted table."""
    
    def _find_index(self, k, low, high):
        """Return index of the leftmost item with key greater than or equal to k.
        
        Return high + 1 if there is no such item.
        """
        if high < low: # no such item found
            return high + 1
        else:
            mid = (low + high) // 2
            if k == self._table[mid]._key:
                return mid # exact match
            elif k < self._table[mid]._key:
                return self._find_index(k, low, mid - 1) # search in the left of mid
            else: # k > self._table[mid]._key
                return self._find_index(k, mid + 1, high) # search in the right of mid
        
    def __init__(self):
        """Create an empty map."""
        self._table = []
    
    def __len__(self):
        """Return number of items in the map."""
        return len(self._table)
    
    def __getitem__(self, k):
        """Return value associated with key k."""
        j = self._find_index(k, 0, len(self._table) - 1)
        if j == len(self._table) or self._table[j]._key != k: # no exact match was found
            raise KeyError("Key Error: " + repr(k))
        return self._table[j]._value
    
    def __setitem__(self, k, v):
        """Assign value v to key k, overwriting existing value if present."""
        j = self._find_index(k, 0, len(self._table) - 1)
        if j < len(self._table) and self._table[j]._key == k: # if j is a valid index for item with key k
            self._table[j]._value = v # reassign value
        else: # no such item was found
            self._table.insert(j, self._Item(k, v)) # insert new item
    
    def __delitem__(self, k):
        """Remove item associated with key k."""
        j = self._find_index(k, 0, len(self._table) - 1)
        if j == len(self._table) or self._table[j]._key != k: # no exact match was found
            raise KeyError("Key Error: " + repr(k))
        self._table.pop(j) # delete item if exact match was found
    
    def __iter__(self):
        """Generate keys of the map ordered from min to max."""
        for item in self._table: # self._table is sorted, so this is O(n)
            yield item._key
    
    def __reversed__(self):
        """Generate keys of the map ordered from max to min."""
        for item in reversed(self._table):
            yield item._key
    
    def find_min(self):
        """Return (key, value) pair with minimum key."""
        if len(self._table) > 0:
            return (self._table[0]._key, self._table[0]._value)
        else:
            return None
    
    def find_max(self):
        """Return (key, value) pair with maxminum key."""
        if len(self._table) > 0:
            return (self._table[-1]._key, self._table[-1]._value)
        else:
            return None
    
    def find_ge(self, k):
        """Return (key, value) pair with least key >= k."""
        j = self._find_index(k, 0, len(self._table) - 1) # (j-1)'s key < k <= j's key
        if j < len(self._table):
            return (self._table[j]._key, self._table[j]._value) # we want item with j's key
        else:
            return None
    
    def find_lt(self, k):
        """Return (key, value) pair with least key < k."""
        j = self._find_index(k, 0, len(self._table) - 1) # (j-1)'s key < k <= j's key
        if j > 0:
            return (self._table[j-1]._key, self._table[j-1]._value) # we want item with (j-1)'s key
        else:
            return None
    
    def find_gt(self, k):
        """Return (key, value) pair with least key > k."""
        j = self._find_index(k, 0, len(self._table) - 1) # (j-1)'s key < k <= j's key < (j+1)'s key
        if j < len(self._table) and self._table[j]._key == k:
            j += 1 # move past the exact match
        if j < len(self._table):
            return (self._table[j]._key, self._table[j]._value) # we want (j+1)'s key
        else:
            return None

    def find_range(self, start, stop):
        """Iterate all (key, value) pairs such that start <= key < stop."""
        if start is None:
            j = 0
        else:
            j = self._find_index(start, 0, len(self._table) - 1)
            while j < len(self._table) and (stop is None or self._table[j]._key < stop):
                yield (self._table[j]._key, self._table[j]._value)
                j += 1

## Sets

In [1]:
class MultiMap:
    """A multimap class built upon use of an underlying map for storage."""
    _MapType = dict
    
    def __init__(self):
        """Create a new empty multimap instance."""
        self._map = self._MapType() # create map instance for storage
        self._n = 0
    
    def __iter__(self):
        """Iterate through all (k, v) pairs in multimap."""
        for k, secondary in self._map.items():
            for v in secondary: # the value is also a container for the multiple values associated with key k
                yield (k, v)
    
    def add(self, k, v):
        """Add pair (k, v) to multimap."""
        container = self._map.setdefault(k, []) # initialize container with [], if needed
        container.append(v)
        self._n += 1
    
    def pop(self, k):
        """Remove and return arbitrary (k, v) with key k."""
        secondary = self._map[k]
        v = secondary.pop()
        if len(secondary) == 0:
            del self._map[k]
        self._n -= 1
        return (k, v)
    
    def find(self, k):
        """Return arbitrary (k, v) pair with given key."""
        secondary = self._map[k]
        return (k, secondary[0])
    
    def find_all(self, k):
        """Generate iteration of all (k, v) pairs with given key."""
        secondary = self._map.get(k, [])
        for v in secondary:
            yield (k, v)