# Task 4: Hashtable with Separate Chaining

In [None]:
import sys

sys.path.append('..')

from ads import HashTableSC

## Basic Operations

In [None]:
# Create hashtable and insert key-value pairs
ht = HashTableSC[str, int]()

# Insert some names with ages
names = ['Liam', 'Olivia', 'Charlotte', 'Lucas', 'Mia']
ages = [34, 28, 42, 19, 31]

for name, age in zip(names, ages, strict=False):
    ht.put(name, age)

print("Size:", len(ht))
print("Contains 'Olivia':", ht.contains('Olivia'))
print("Contains 'Emma':", ht.contains('Emma'))
print("Get 'Olivia':", ht.get('Olivia'))
print("Get 'Emma':", ht.get('Emma'))

## Update Existing Key

In [None]:
# Update Olivia's age
print("Before update:", ht.get('Olivia'))
ht.put('Olivia', 35)
print("After update:", ht.get('Olivia'))
print("Size (should be same):", len(ht))

## Remove Operation

In [None]:
print("Remove 'Lucas':", ht.remove('Lucas'))
print("Size after removal:", len(ht))
print("Contains 'Lucas':", ht.contains('Lucas'))
print("Remove 'Emma' (doesn't exist):", ht.remove('Emma'))

## Keys and Values

In [None]:
print("All keys:", ht.keys())
print("All values:", ht.values())

## Collision Testing

Test with integers to observe collision handling

In [None]:
import random

# Create small table to force collisions
ht_int = HashTableSC[int, str](m=5)

# Insert 10 random numbers
random.seed(42)
for _i in range(10):
    key = random.randint(1, 100)
    ht_int.put(key, f"value_{key}")

print("Inserted 10 items into table of size 5")
print("Total items:", len(ht_int))
print("All keys:", sorted(ht_int.keys()))
print("\nNote: Multiple items hash to same buckets (collisions handled via chaining)")

## Performance with Larger Dataset

In [None]:
# Test with 200 random integers, table size 31
ht_large = HashTableSC[int, int](m=31)

random.seed(42)
for _i in range(200):
    key = random.randint(1, 100_000)
    ht_large.put(key, key * 2)

print("Inserted 200 items into table of size 31")
print("Total items:", len(ht_large))
print("Average items per bucket:", len(ht_large) / ht_large.sz)

## Bucket Distribution Analysis

In [None]:
import matplotlib.pyplot as plt

# Analyze chain lengths
chain_lengths = [ht_large._len_chain(bucket) for bucket in ht_large.table]

print("Chain length statistics:")
print(f"  Min: {min(chain_lengths)}")
print(f"  Max: {max(chain_lengths)}")
print(f"  Avg: {sum(chain_lengths) / len(chain_lengths):.2f}")
print(f"  Empty buckets: {chain_lengths.count(0)}")

# Show distribution

plt.figure(figsize=(10, 5))
plt.bar(
    range(len(chain_lengths)),
    chain_lengths,
    color='skyblue',
    edgecolor='black',
)
plt.xlabel('Bucket Index')
plt.ylabel('Chain Length')
title = f'Hashtable Chain Length Distribution (n={len(ht_large)}, m={ht_large.sz})'
plt.title(title)
plt.axhline(
    y=len(ht_large) / ht_large.sz,
    color='r',
    linestyle='--',
    label='Expected (n/m)',
)
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

## Custom Object Keys

In [None]:
from dataclasses import dataclass


@dataclass(frozen=True)  # frozen makes it hashable
class Person:
    name: str
    age: int

# Create hashtable with Person keys
ht_person = HashTableSC[Person, str]()

p1 = Person("Alice", 30)
p2 = Person("Bob", 25)
p3 = Person("Charlie", 35)

ht_person.put(p1, "Engineer")
ht_person.put(p2, "Designer")
ht_person.put(p3, "Manager")

print("Get Alice:", ht_person.get(p1))
print("Get Bob:", ht_person.get(p2))
print("\nNote: Works with any hashable Python object!")

## Empty Table Operations

In [None]:
ht_empty = HashTableSC[str, int]()

print("Is empty:", ht_empty.is_empty())
print("Size:", len(ht_empty))
print("Get from empty:", ht_empty.get('test'))
print("Contains in empty:", ht_empty.contains('test'))
print("Remove from empty:", ht_empty.remove('test'))