# 01_03: Dictionaries and sets

In [1]:
import math
import collections
import dataclasses
import datetime

import numpy as np
import pandas as pd
import matplotlib.pyplot as pp  

In [2]:
capitals = {'United States': 'Washington, DC', 'France': 'Paris', 'Italy': 'Rome'}

In [3]:
capitals

{'United States': 'Washington, DC', 'France': 'Paris', 'Italy': 'Rome'}

In [4]:
len(capitals), len({})

(3, 0)

In [5]:
capitals['Italy']

'Rome'

In [6]:
capitals['Spain'] = 'Madrid'

In [7]:
capitals

{'United States': 'Washington, DC',
 'France': 'Paris',
 'Italy': 'Rome',
 'Spain': 'Madrid'}

In [9]:
capitals['Germany'] if 'Germany' in capitals else 'KeyError'

'KeyError'

In [10]:
'Germany' in capitals, 'Italy' in capitals

(False, True)

In [11]:
morecapitals = {'Germany': 'Berlin', 'United Kingdom': 'London'}

In [12]:
{**capitals, **morecapitals}

{'United States': 'Washington, DC',
 'France': 'Paris',
 'Italy': 'Rome',
 'Spain': 'Madrid',
 'Germany': 'Berlin',
 'United Kingdom': 'London'}

In [13]:
capitals.update(morecapitals)

In [14]:
capitals

{'United States': 'Washington, DC',
 'France': 'Paris',
 'Italy': 'Rome',
 'Spain': 'Madrid',
 'Germany': 'Berlin',
 'United Kingdom': 'London'}

In [15]:
del capitals['United Kingdom']

In [16]:
capitals

{'United States': 'Washington, DC',
 'France': 'Paris',
 'Italy': 'Rome',
 'Spain': 'Madrid',
 'Germany': 'Berlin'}

In [17]:
birthdays = {(7,15): 'Michele', (3,14): 'Albert'}

In [19]:
birthdays

{(7, 15): 'Michele', (3, 14): 'Albert'}

In [18]:
birthdays[(7,15)]

'Michele'

In [None]:
hash('Italy'), hash((7,15))

In [None]:
for country in capitals:
    print(country)

In [None]:
for country in capitals.keys():
    print(country)

In [None]:
capitals.keys()

In [None]:
list(capitals.keys())

In [None]:
for capital in capitals.values():
    print(capital)

In [None]:
for country, capital in capitals.items():
    print(country, capital)

In [None]:
capitals.keys()

In [None]:
list(capitals.keys())

In [None]:
capitals_default = collections.defaultdict(lambda: "I don't know!")

In [None]:
capitals_default.update(capitals)

In [None]:
capitals_default['Canada']

In [None]:
continents = {'America', 'Europe', 'Asia', 'Oceania', 'Africa', 'Africa'}

In [None]:
continents

In [None]:
'Africa' in continents

In [None]:
continents.add('Antarctica')

In [None]:
continents.remove('Antarctica')

In [None]:
for c in continents:
    print(c)

**Example: dictionaries with various hashable key types**

Below is a compact example that uses many kinds of hashable objects as dictionary keys and demonstrates retrieval and iteration. Mutable objects like `list` are not hashable and cannot be used as keys.

In [None]:
from dataclasses import dataclass
import datetime

@dataclass(frozen=True)
class Point:
    x: int
    y: int

p = Point(1, 2)

d = {
    42: "an int key",
    3.14: "a float key",
    "name": "a str key",
    (1, 2): "a tuple key",
    frozenset({1, 2}): "a frozenset key",
    True: "a bool key",
    b'bytes': "a bytes key",
    datetime.date(2025, 12, 11): "a date key",
    p: "a frozen dataclass instance key",
}

print("Dictionary with mixed hashable keys:\n", d)
print("Access some values:\n", d[(1, 2)], d[datetime.date(2025, 12, 11)], d[p])

print('\nTypes of keys:')
for k in d:
    print(type(k), '->', k)
    if isinstance(k, Point):
        print('  This key is a Point with coordinates:', k.x, k.y)

# Note: the following would raise TypeError (unhashable):
# d[[1,2]] = 'list as key'  # TypeError: unhashable type: 'list'

# Special note: bool is a subclass of int, so True and 1 collide as keys:
d2 = {1: 'one', True: 'true'}
print('\nCollision example (1 and True):', d2)

Dictionary with mixed hashable keys:
 {42: 'an int key', 3.14: 'a float key', 'name': 'a str key', (1, 2): 'a tuple key', frozenset({1, 2}): 'a frozenset key', True: 'a bool key', b'bytes': 'a bytes key', datetime.date(2025, 12, 11): 'a date key', Point(x=1, y=2): 'a frozen dataclass instance key'}
Access some values:
 a tuple key a date key a frozen dataclass instance key

Types of keys:
<class 'int'> -> 42
<class 'float'> -> 3.14
<class 'str'> -> name
<class 'tuple'> -> (1, 2)
<class 'frozenset'> -> frozenset({1, 2})
<class 'bool'> -> True
<class 'bytes'> -> b'bytes'
<class 'datetime.date'> -> 2025-12-11
<class '__main__.Point'> -> Point(x=1, y=2)
  This key is a Point with coordinates: 1 2

Collision example (1 and True): {1: 'true'}


**Frozenset examples**

A `frozenset` is an immutable `set`. It supports non-mutating set operations and is hashable, so it can be used as a dictionary key or stored inside a `set`.

In [1]:
# frozenset demo
fs = frozenset([1, 2, 3])
print('fs:', fs)

# immutability: mutating methods don't exist (this will raise AttributeError)
try:
    fs.add(4)
except Exception as e:
    print('fs.add ->', type(e).__name__, e)

# non-mutating operations return new frozensets
print('union with {3,4}:', fs.union({3, 4}))
print('intersection with [2,3]:', fs.intersection([2, 3]))

# equality with set compares by membership
print('fs == set([1,2,3]):', fs == set([1, 2, 3]))

# frozenset can be used as a dict key or as an element of a set
d = {fs: 'value for frozenset key'}
print('dict access by frozenset key:', d[fs])

s = {frozenset([1]), frozenset([2, 3])}
print('set containing frozensets:', s)

# unhashable example (would raise TypeError):
# d[set([1,2])] = 'bad'  # TypeError: unhashable type: 'set' (not hashable)

# note: frozenset methods do not modify in-place; they return new frozensets
fs2 = fs.union([4])
print('fs after union (original unchanged):', fs, '-> new:', fs2)

fs: frozenset({1, 2, 3})
fs.add -> AttributeError 'frozenset' object has no attribute 'add'
union with {3,4}: frozenset({1, 2, 3, 4})
intersection with [2,3]: frozenset({2, 3})
fs == set([1,2,3]): True
dict access by frozenset key: value for frozenset key
set containing frozensets: {frozenset({1}), frozenset({2, 3})}
fs after union (original unchanged): frozenset({1, 2, 3}) -> new: frozenset({1, 2, 3, 4})


**Making classes immutable/hashable: dataclass vs manual dunder methods**

There are two main approaches:
1. **`@dataclass(frozen=True)`** — Simple, automatic immutability and hashability  
2. **Manual `__hash__` & `__eq__` overrides** — More control, but requires you to handle immutability separately

For finality (preventing subclassing), use `@final` from `typing` module.


In [11]:
# Approach 1: dataclass with frozen=True (simple, automatic)
from dataclasses import dataclass
from typing import final

@dataclass(frozen=True)
class PointDataclass:
    x: int
    y: int

p1 = PointDataclass(1, 2)
print("Dataclass approach:")
print("  p1:", p1)
print("  hash(p1):", hash(p1))
print("  Can use as dict key:", {p1: "success"})

# Try to mutate (will raise FrozenInstanceError)
try:
    p1.x = 5
except Exception as e:
    print(f"  p1.x = 5 -> {type(e).__name__}: {e}")

print()

# Approach 2: Manual __hash__ and __eq__ (more control, but more code)
class PointManual:
    def __init__(self, x: int, y: int):
        # Store in private attributes to prevent direct mutation
        self._x = x
        self._y = y
    
    @property
    def x(self):
        return self._x
    
    @property
    def y(self):
        return self._y
    
    def __eq__(self, other):
        if not isinstance(other, PointManual):
            return False
        return self._x == other._x and self._y == other._y
    
    def __hash__(self):
        return hash((self._x, self._y))
    
    def __repr__(self):
        return f"PointManual({self._x}, {self._y})"

p2 = PointManual(1, 2)
print("Manual approach:")
print("  p2:", p2)
print("  hash(p2):", hash(p2))
print("  Can use as dict key:", {p2: "success"})
print("  Equality works:", PointManual(1, 2) == p2)

# Attempting direct mutation is harder (properties prevent it)
try:
    p2.x = 5
except AttributeError as e:
    print(f"  p2.x = 5 -> AttributeError (property read-only)")

print()

# Approach 3: Using @final to prevent subclassing
@final
@dataclass(frozen=True)
class FinalPoint:
    x: int
    y: int

print("Final class (with @final decorator):")
print("  FinalPoint(3, 4):", FinalPoint(3, 4))
# Trying to subclass FinalPoint would raise TypeError at class definition time


Dataclass approach:
  p1: PointDataclass(x=1, y=2)
  hash(p1): -3550055125485641917
  Can use as dict key: {PointDataclass(x=1, y=2): 'success'}
  p1.x = 5 -> FrozenInstanceError: cannot assign to field 'x'

Manual approach:
  p2: PointManual(1, 2)
  hash(p2): -3550055125485641917
  Can use as dict key: {PointManual(1, 2): 'success'}
  Equality works: True
  p2.x = 5 -> AttributeError (property read-only)

Final class (with @final decorator):
  FinalPoint(3, 4): FinalPoint(x=3, y=4)


In [9]:
# IMPORTANT: @final only works at type-checking time, NOT at runtime!
# This means Mypy will complain, but Python won't prevent it.

# To actually enforce immutability at runtime, use __init_subclass__:
@dataclass(frozen=True)
class TrulyFinalPoint:
    x: int
    y: int
    
    def __init_subclass__(cls, **kwargs):
        raise TypeError(f"Cannot subclass {cls.__name__}; it is final")

print("Trying to subclass TrulyFinalPoint:")
try:
    class subTrulyFinalPoint(TrulyFinalPoint):
        pass
except TypeError as e:
    print(f"  ✓ Correctly raised: {e}")

print("\n@final (from typing) is only for static type checkers:")
print("  - Mypy will flag violations")
print("  - But Python allows subclassing at runtime")
print("  - Use __init_subclass__ to enforce at runtime")

Trying to subclass TrulyFinalPoint:
  ✓ Correctly raised: Cannot subclass subTrulyFinalPoint; it is final

@final (from typing) is only for static type checkers:
  - Mypy will flag violations
  - But Python allows subclassing at runtime
  - Use __init_subclass__ to enforce at runtime
