In [None]:
%pip install -q spytial-diagramming
from spytial import *
from spytial.annotations import *

In [None]:
# Setup for performance metrics
import random
from time import sleep

# Set this to True to run performance tests
RUN_PERF = False

perf_base = "spytial_perf"
def get_perf_path(structure, size):
    return perf_base + "_" + structure + "_" + f"{size}.json"
PI = 20
SIZES = [5, 10, 25, 50]


In [None]:

@attribute(field='key')
@attribute(field='sdata')
class HashedData:
    def __init__(self, key, sdata): 
        self.key = key
        self.sdata = sdata

mapsto= "{k : int, v : (object - NoneType) | (some t : tuple | t.t1 not in NoneType and t.t1 = v and t.t0 = k)}"

# Direct Address
@hideAtom(selector='NoneType + DirectAddress + tuple + list + str + (int - (tuple.t0))')
@inferredEdge(selector=mapsto, name='maps-to')
@orientation(selector=mapsto, directions=['directlyRight'])
@group(selector='tuple.t0', name='T')
@orientation(selector='{x, y : (tuple.t0) | x < y}', directions=['directlyBelow'])

class DirectAddress:
    def __init__(self, m): self.T = [None]*m
    def insert(self, k, v): self.T[k] = (k, v)
    def search(self, k):    return self.T[k]
    def delete(self, k):    self.T[k] = None


![diradd](./img/direct-address-hash-table.png)

In [None]:
N = 10
data = [HashedData(key=2, sdata="lorem"), HashedData(key=3, sdata="ipsum"), HashedData(key=5, sdata="dolor"), HashedData(key=7, sdata="sit"), HashedData(key=8, sdata="amet")]

da = DirectAddress(N)

for d in data:
    da.insert(d.key, d)

diagram(da)

## Performance - Direct Address

In [None]:
if RUN_PERF:
    STRUCTURE = "direct_address"
    for size in SIZES:
        da = DirectAddress(size)
        keys = random.sample(range(size), min(size, size // 2))  # Insert about half capacity
        
        for key in keys:
            hd = HashedData(key=key, sdata=f"data_{key}")
            da.insert(key, hd)
        
        diagram(da, method="browser", perf_path=get_perf_path(STRUCTURE, size), perf_iterations=PI, headless=True)
else:
    print("Performance testing skipped. Set RUN_PERF = True to enable.")


# Now Chained Hash Table

In [None]:

@hideField(selector='Node', field='prev')
@attribute(field='key')
@attribute(field='val')
class Node:
    __slots__ = ("key", "val", "prev", "next")
    def __init__(self, key, val=None):
        self.key = key
        self.val = val
        self.prev = None
        self.next = None
    def __repr__(self):
        return f"Node(key={self.key}, val={self.val})"
NEXT_PREV_IDN = "univ.((next & prev) & iden)"

@hideAtom(selector=f'NoneType + {NEXT_PREV_IDN} + list + str + int') # Hide empty slots
@attribute(field='m')
@group(selector="(((NoneType.~key) -> Node) & ^next) - iden", name='bucket')
@align(selector=f"{{a, b : Node-{NEXT_PREV_IDN} | a != b and (a.key + b.key) in NoneType }}", direction='vertical')
@group(selector='(NoneType.~key) - ((iden & next).Node)', name='T')
@atomColor(selector='(NoneType.~key) - ((iden & next).Node)', value='blue')
@orientation(selector='( ((NoneType.~key) -> Node) & next ) - iden', directions=['directlyRight'])

class HashTableChaining:
    def __init__(self, m=8):
        self.m = m
        self.T = [self._new_sentinel() for _ in range(m)]

    # ---- internals ----
    def _new_sentinel(self):
        s = Node(key=None, val=None)
        s.prev = s.next = s          # circular sentinel
        return s

    def _h(self, k):
        return hash(k) % self.m      # abstract h(.) in CLRS; fine for a minimal demo

    # ---- CLRS ops ----
    # CHAINED-HASH-SEARCH(T, k) -> pointer to element or None
    def search(self, k):
        s = self.T[self._h(k)]
        x = s.next
        while x is not s:
            if x.key == k:
                return x
            x = x.next
        return None

    # CHAINED-HASH-INSERT(T, x)  (insert element object)
    # Head insert (like CLRS); does NOT check duplicates.
    def insert(self, x: Node):
        s = self.T[self._h(x.key)]
        # splice x right after sentinel (head insert)
        x.next = s.next
        x.prev = s
        s.next.prev = x
        s.next = x

    # CHAINED-HASH-DELETE(T, x)  (delete by pointer) -> bool
    # O(1) because we have prev/next.
    def delete(self, x: Node):
        if x.prev is None or x.next is None:
            return False  # not currently in a table/chain
        x.prev.next = x.next
        x.next.prev = x.prev
        x.prev = x.next = None
        return True

    # (Optional) convenience method: delete by key using search
    def delete_key(self, k):
        x = self.search(k)
        return self.delete(x) if x else False

    # (Optional) quick view
    def __str__(self):
        lines = []
        for i, s in enumerate(self.T):
            items, x = [], s.next
            while x is not s:
                items.append(f"{x.key}:{x.val}")
                x = x.next
            lines.append(f"[{i}] " + " -> ".join(items) if items else f"[{i}] Â·")
        return "\n".join(lines)



![chained](./img/chaining-hash-table.png)

In [None]:
# ---- Example usage ----
T = HashTableChaining(m=5)
a = Node(12, "A"); b = Node(7, "B"); c = Node(12, "C")  # duplicate key
d = Node(42, "D"); e = Node(100, "E")  # duplicate key
T.insert(a); T.insert(b); T.insert(c); T.insert(d); T.insert(e)
diagram(T)


## Performance - Hash Table Chaining

In [None]:
if RUN_PERF:
    STRUCTURE = "hash_table_chaining"
    for size in SIZES:
        ht = HashTableChaining(m=size)
        keys = random.sample(range(1, 1000), size)
        
        # Insert nodes into the hash table
        for key in keys:
            node = Node(key=key, val=f"value_{key}")
            ht.insert(node)
        
        diagram(ht, method="browser", perf_path=get_perf_path(STRUCTURE, size), perf_iterations=PI, headless=True, timeout=500)
else:
    print("Performance testing skipped. Set RUN_PERF = True to enable.")
