# Chapter 3: Dictionaries and Sets

Here is a brief outline of this chapter: 
• Modern syntax to build and handle dicts and mappings, including enhanced unpacking and pattern matching 
• Common methods of mapping types 
• Special handling for missing keys 
• Variations of dict in the standard library 
• The set and frozenset types 
• Implications of hash tables in the behavior of sets and dictionaries

## Dict Comprehensions

In [None]:
codes = [
    ("a", 1),
    ("b", 2),
    ("c", 3),
    ("d", 4),
]

code_dict = {k: v for k, v in codes}

code_dict

{'a': 1, 'b': 2, 'c': 3, 'd': 4}

In [2]:
{v: k.upper() for k, v in sorted(codes, reverse=True) if v > 1}

{4: 'D', 3: 'C', 2: 'B'}

## Unpacking with **

In [None]:
def dump(**kwargs):
    return kwargs

dump(**code_dict, z=26, **{'e': 5, 'f': 6})

{'a': 1, 'b': 2, 'c': 3, 'd': 4, 'z': 26, 'e': 5, 'f': 6}

In [None]:
# | makes a new dict
d1 = {'a': 1, 'b': 3}
d2 = {'a': 2, 'b': 4, 'c': 6}
d1 | d2

{'a': 2, 'b': 4, 'c': 6}

In [6]:
d1 |= d2
d1 # |= updates in place

{'a': 2, 'b': 4, 'c': 6}

In [None]:
# match case pattern matching can also be done with mappings

In [8]:
# the ABCs that implement the mapping protocol are Mapping and MutableMapping
from collections.abc import Mapping, MutableMapping

my_dict = {}
isinstance(my_dict, Mapping), isinstance(my_dict, MutableMapping)

(True, True)

## Hashables

In [None]:
# An object is hashable if it has a constant hash value that doesn't change during its lifetime (__hash__) and can be compared to other objects (__eq__)
tt = (1, 2, 3) # hashable
tl = [1, 2, 3] # not hashable
tf = frozenset([1, 2, 3]) # hashable

## .setdefault() and defaultdict

In [None]:
# inserting or updating mutable values (setdefault)
import re
import sys

WORD_RE = re.compile(r'\w+')

# the following function indexes a text, making a dict where the keys are words and values are the positions where they occur
def index_text(text):
    index = {}
    with open(text, encoding="utf-8") as fp:
        for line_no, line in enumerate(fp, 1):
            for match in WORD_RE.finditer(line):
                word = match.group()
                column_no = match.start() + 1

                location = (line_no, column_no)
                index.setdefault(word, []).append(location) # this is the same as checking if word in index, setting an empty list if not, then appending
    
    return index

index = index_text('./zen.txt')
index = {k.lower(): v for k, v in sorted(index.items())}

{k: v for k, v in sorted(index.items())}

{'a': [(17, 48), (18, 53)],
 'although': [(9, 1), (14, 1), (16, 1)],
 'ambiguity': [(12, 16)],
 'and': [(13, 23)],
 'are': [(19, 12)],
 'aren': [(8, 15)],
 'at': [(14, 38)],
 'bad': [(17, 50)],
 'be': [(13, 14), (14, 27), (18, 50)],
 'beats': [(9, 23)],
 'beautiful': [(1, 1)],
 'better': [(1, 14),
  (2, 13),
  (3, 11),
  (4, 12),
  (5, 9),
  (6, 11),
  (15, 8),
  (16, 25)],
 'break': [(8, 40)],
 'cases': [(8, 9)],
 'complex': [(3, 23)],
 'complicated': [(4, 24)],
 'counts': [(7, 13)],
 'dense': [(6, 23)],
 'do': [(13, 64), (19, 48)],
 'dutch': [(14, 61)],
 'easy': [(18, 26)],
 'enough': [(8, 30)],
 'errors': [(10, 1)],
 'explain': [(17, 34), (18, 34)],
 'explicit': [(2, 1)],
 'explicitly': [(11, 8)],
 'face': [(12, 8)],
 'first': [(14, 41)],
 'flat': [(5, 1)],
 'good': [(18, 55)],
 'great': [(19, 28)],
 'guess': [(12, 52)],
 'hard': [(17, 26)],
 'honking': [(19, 20)],
 'idea': [(17, 54), (18, 60), (19, 34)],
 'if': [(17, 1), (18, 1)],
 'implementation': [(17, 8), (18, 8)],
 'implicit':

In [None]:
# you can also use defaultdict:
from collections import defaultdict

def index_text_default(text):
    index = defaultdict(list) # pass a callable to defaultdict that produces a default whenever a missing key is indexed
    with open(text, encoding="utf-8") as fp:
        for line_no, line in enumerate(fp, 1):
            for match in WORD_RE.finditer(line):
                word = match.group()
                column_no = match.start() + 1

                location = (line_no, column_no)
                index[word].append(location) # this is the same as checking if word in index, setting an empty list if not, then appending
    
    return index

# defaultdict uses __missing__, which you can implement yourself

# take the following class which searches for the str version of nonstring keys
class StrKeyDict0(dict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError()
        return self[str(key)]
    
    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default
    
    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()


## Other dict types

In [None]:
from collections import OrderedDict # ordereddict is pretty similar to dict now that dict preserves insertion orders, but there are a few small differences


In [3]:
from collections import ChainMap # lookups in a chain map are performed one at a time, "falling back" to the next one

d1 = dict(a=1, b=2)
d2 = dict(a=2, b=4, c=6)

chain = ChainMap(d1, d2)
chain['a'], chain['c']

(1, 6)

In [4]:
# counter counts occurrences. it can be used as a multiset
from collections import Counter

ct = Counter('abracadabra')
ct

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})

In [5]:
ct.update('ababababazzzzaaaaa')
ct

Counter({'a': 15, 'b': 6, 'z': 4, 'r': 2, 'c': 1, 'd': 1})

In [6]:
ct.most_common(3)

[('a', 15), ('b', 6), ('z', 4)]

In [None]:
# shelf is used with pickle? it maps strings to objects persistently
# haven't used pickle before

In [12]:
# inherting from userdict instead of dict is preferred:
# see the improved version of strkeydict0
from collections import UserDict

class StrKeyDict(UserDict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return(self[str(key)])

    def __contains__(self, key):
        return str(key) in self.data

    def __setitem__(self, key, item):
        self.data[str(key)] = item

In [13]:
userdict = StrKeyDict(a=1, b=2, c=4)
userdict[1] = 100
var = 'c'
userdict['a'], userdict[var], userdict[1]

(1, 4, 100)

In [16]:
# sometimes you may need immutable mappings

# in that case, we have a MappingProxyType
from types import MappingProxyType

d = {1: 'a'}
d_proxy = MappingProxyType(d)

d_proxy[2] = 3

TypeError: 'mappingproxy' object does not support item assignment

In [19]:
d[2] = 'b'
d_proxy, d_proxy[2]

(mappingproxy({1: 'a', 2: 'b'}), 'b')

## Views and Sets

In [None]:
# dict views

# keys(), values(), and items() are all views that are read-only and reduce overhead by not copying the entire set of items they're viewing
d = dict(a=10, b=20, c=30)
values = d.values()

values, len(values), reversed(values) # values() supports __len__, __iter__, and __reversed__

(dict_values([10, 20, 30]), 3, <dict_reversevalueiterator at 0x1096afb00>)

In [22]:
values[0] # you can't get individual items from a view

TypeError: 'dict_values' object is not subscriptable

In [23]:
# sets

l = ['spam', 'spam', 'eggs', 'spam', 'bacon', 'eggs']
set(l) # sets remove duplicates, but don't necessarily preserve order

{'bacon', 'eggs', 'spam'}

In [25]:
# if you want to preserve order, you can do the following
list(dict.fromkeys(l).keys())

['spam', 'eggs', 'bacon']

In [26]:
# set operations:

s1 = {'a', 'e', 'g', 'alala', 10, 100}
s2 = {10, 100, 'a', 'c', 'd', '000o0o0', '100'}

s1 | s2, s1 & s2, s1 - s2, s1 ^ s2 # union, intersection, difference, symmetric difference 

({'000o0o0', 10, '100', 100, 'a', 'alala', 'c', 'd', 'e', 'g'},
 {10, 100, 'a'},
 {'alala', 'e', 'g'},
 {'000o0o0', '100', 'alala', 'c', 'd', 'e', 'g'})

In [28]:
for element in ('c', 'd', '000o0o0', '100'):
    s2.discard(element) # this is like .remove but doesn't give an error if it's not in the set

s1 > s2, s1 < s2 # proper superset and proper subset

(True, False)

In [None]:
# .keys() and .items() implement set operations:
d1.keys() | d2.keys(), d1.keys() & d2.keys()

({'a', 'b', 'c'}, {'a', 'b'})

In [32]:
# this also means they work with sets themselves
d1.keys() & s2

{'a'}

### Soapbox

Something interesting I read is that some people hack the global namespace so that they can paste JSON directly into the terminal (i.e. true = True)