In [1]:
# dicts are used often in Python, so they have been highly optimized. Hash tables are the engines behind Python's high-performance dicts.

In [2]:
# Outline of the Chapter:
# Common dictionary methods
# Special handling for missing keys
# Variations of dict in the standard library
# The `set` and `frozenset` types
# How hash tables work
# Implications of the hash tables (key type limitations, unpredictable ordering, etc.)

In [3]:
# Definition: hashable: An object is hashable if it has a hash value that never changes during its lifetime (it needs a __hash__() method), and can be compared to other objects (it needs an __eq__() method). Hashable objects which compare equal mush have the same hash value.
# The atomic immutables types (str, bytes, numeric types) are all hashable. A frozenset is always hashable, because its elements must be hashable by definition. A tuple is only hashable if all its items are hashable. 
# See below:

In [6]:
tt = (1, 2, (30, 40))
print(hash(tt), "\n")

tf = (1, 2, frozenset([30, 40]))
print(hash(tf), "\n")

tl = (1, 2, [30, 40])
print(hash(tl), "\n")

8027212646858338501 

985328935373711578 



TypeError: unhashable type: 'list'

In [8]:
# Dictionaries can be built in several ways:
a = dict(one=1, two=2, three=3)
b = {'one':1, 'two':2, 'three':3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three':3, 'one':1, 'two':2})
print(a==b==c==d==e)

True


In [10]:
# dict Comprehensions (dictcomp)
# Very similar to generator expressions (genexp) and list comprehensions (listcomp)
# An example:
DIAL_CODES = [
    (86,  'China'),
    (91,  'India'),
    (1,   'United States'),
    (62,  'Indonesia'),
    (55,  'Brazil'),
    (92,  'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7,   'Russia'),
    (81,  'Japan'),
]
# Creating a dict with a list of tuple pairs
# We reverse the order of the tuple to create the key, value
country_code = {country: code for code, country in DIAL_CODES}
print(country_code, "\n")
# Reversing the pairs again, and filtering items with code < 66
country_code_filtered = {code: country.upper() for country, code in country_code.items()
                        if code < 66}
print(country_code_filtered)

{'China': 86, 'India': 91, 'United States': 1, 'Indonesia': 62, 'Brazil': 55, 'Pakistan': 92, 'Bangladesh': 880, 'Nigeria': 234, 'Russia': 7, 'Japan': 81} 

{1: 'UNITED STATES', 62: 'INDONESIA', 55: 'BRAZIL', 7: 'RUSSIA'}


In [None]:
# Overview of common mapping methods:
d.clear() # Remove all items
d.__contains__(k) # k in d
d.copy() # Shallow copy
d.__copy__() # Support for copy.copy - only for defaultdict
d.__delitem__(k) # del d[k] - remove item with key k
d.fromkeys(it, [initial]) # New mapping from keys in iteratble, with optional initial value
d.get(k, [default]) # Get item with key k, return default or None if missing
d.__getitem__(k) # d[k] - get item with key k
d.items() # Get view over items -- (key, value) pairs
d.__iter__() # Get iterator over keys
d.keys() # Get view over keys
d.__len__() # len(d) -- number of items
d.move_to_end(k, [last]) # Move k first or last position (last is True by default)
d.pop(k, [default]) # Remove and return value at k, or default or None if missing
d.popitem() # Remove and return an arbitrary (key, value) item
d.__reversed__() # Get iterator for keys from last to first inserted
d.setdefault(k, [default]) # If k in d, return d[k]; else set d[k] = default and return it
d.__setitem__(k, v) # d[k] = v -- put v at k
d.update(m, [**kwargs]) # Update d with items from mapping or iterable of (key, value) pairs
d.values() # Get view over values

In [6]:
# Handling Missing Keys with setdefault
# Not always optimal to use d.get(k, default), especially when updating the value

# Create a dictionary of key: letter, value: list[numbers]
import random, string
random.seed(5)
string_index = {}
for i in range(50):
    k = random.choice(string.ascii_letters).upper()
    v = random.randint(0, 10)
    occurrences = string_index.get(k, [])
    occurrences.append(v)
    string_index[k] = occurrences
    
# Print in Alphabetic Order
for letter in sorted(string_index, key=str.upper):
    print(letter, string_index[letter])


A [3, 4, 0, 2]
B [5, 5]
C [0]
D [3, 4]
E [5, 4, 5, 2]
H [0, 3]
I [1, 9, 2]
J [4, 6]
K [1, 3, 1, 2]
L [6, 0, 7, 0]
M [8, 10]
N [4, 7, 3, 2, 3]
P [0, 6]
R [10]
S [5, 3]
T [9, 5, 2]
V [5]
W [4]
X [7, 6]
Y [10, 4]
Z [0]


In [7]:
# Those 3 lines dealing with occurrences can be replaced with
# d.setdefault
import random, string
random.seed(5)
string_index = {}
for i in range(50):
    k = random.choice(string.ascii_letters).upper()
    v = random.randint(0, 10)
    string_index.setdefault(k, []).append(v)
    
# Print in Alphabetic Order
for letter in sorted(string_index, key=str.upper):
    print(letter, string_index[letter])


A [3, 4, 0, 2]
B [5, 5]
C [0]
D [3, 4]
E [5, 4, 5, 2]
H [0, 3]
I [1, 9, 2]
J [4, 6]
K [1, 3, 1, 2]
L [6, 0, 7, 0]
M [8, 10]
N [4, 7, 3, 2, 3]
P [0, 6]
R [10]
S [5, 3]
T [9, 5, 2]
V [5]
W [4]
X [7, 6]
Y [10, 4]
Z [0]
