## Chapter 3. Dictionaries and Sets

the dict type is a fundamental part of Python’s implementation. 

Class and instance attributes, module namespaces, and function keyword arguments are
some of the core Python constructs represented by dictionaries in memory.

The "\_\_builtins\_\_.\_\_dict\_\_" stores all built-in types, objects, and functions.

In [3]:
print(__builtins__.__dict__)

All Rights Reserved.

Copyright (c) 2000 BeOpen.com.
All Rights Reserved.

Copyright (c) 1995-2001 Corporation for National Research Initiatives.
All Rights Reserved.

Copyright (c) 1991-1995 Stichting Mathematisch Centrum, Amsterdam.
All Rights Reserved., 'credits':     Thanks to CWI, CNRI, BeOpen.com, Zope Corporation and a cast of thousands
    for supporting Python development.  See www.python.org for more information., 'license': Type license() to see the full license text, 'help': Type help() for interactive help, or help(object) for help about object., 'execfile': <function execfile at 0x00000167CF7C8670>, 'runfile': <function runfile at 0x00000167CF873130>, '__IPYTHON__': True, 'display': <function display at 0x00000167CDDC8EE0>, 'get_ipython': <bound method InteractiveShell.get_ipython of <ipykernel.zmqshell.ZMQInteractiveShell object at 0x00000167CFB032B0>>}


In [4]:
a = dict(one=1, two=2, three=3)
b = {'one': 1, 'two': 2, 'three': 3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three': 3, 'one': 1, 'two': 2})
print(a == b == c == d == e)

True


In [2]:
# Example 3-1. Examples of dict comprehensions

dial_codes = [
    (880, 'Bangladesh'),
    (55, 'Brazil'),
    (86, 'China'),
    (91, 'India'),
    (62, 'Indonesia'),
    (81, 'Japan'),
    (234, 'Nigeria'),
    (92, 'Pakistan'),
    (7, 'Russia'),
    (1, 'United States'),
    ]

country_code = {country: code for code, country in dial_codes}
print(country_code)

print({code: country.upper() for country, code in country_code.items() if code < 70})

{'Bangladesh': 880, 'Brazil': 55, 'China': 86, 'India': 91, 'Indonesia': 62, 'Japan': 81, 'Nigeria': 234, 'Pakistan': 92, 'Russia': 7, 'United States': 1}
{55: 'BRAZIL', 62: 'INDONESIA', 7: 'RUSSIA', 1: 'UNITED STATES'}


In [4]:
# Unpacking Mappings

def dump(**kwargs):
    return kwargs

print(dump(**{'x': 1}, y=2, **{'z': 3}))

print({'a': 0, **{'x': 1}, 'y': 2, **{'z': 3, 'x': 4}})

{'x': 1, 'y': 2, 'z': 3}
{'a': 0, 'x': 4, 'y': 2, 'z': 3}


In [7]:
# Merging Mappings with |

d1 = {'a': 1, 'b': 3}
d2 = {'a': 2, 'b': 4, 'c': 6}
print(d1 | d2)
print(d2 | d1)

print(d1)
d1 |= d2
print(d1)

{'a': 2, 'b': 4, 'c': 6}
{'a': 1, 'b': 3, 'c': 6}
{'a': 1, 'b': 3}
{'a': 2, 'b': 4, 'c': 6}


In [5]:
# Example 3-2. creator.py: get_creators() extracts names of creators from media records

def get_creators(record: dict) -> list:
    match record:
        case {'type': 'book', 'api': 2, 'authors': [*names]}:
            return names
        case {'type': 'book', 'api': 1, 'author': name}:
            return [name]
        case {'type': 'book'}:
            raise ValueError(f"Invalid 'book' record: {record!r}")
        case {'type': 'movie', 'director': name}:
            return [name]
        case _:
            raise ValueError(f'Invalid record: {record!r}')

In [6]:
b1 = dict(api=1, author='Douglas Hofstadter',type='book', title='Gödel, Escher, Bach')
print(get_creators(b1))

from collections import OrderedDict
b2 = OrderedDict(api=2, type='book', title='Python in a Nutshell', authors='Martelli Ravenscroft Holden'.split())
print(get_creators(b2))

['Douglas Hofstadter']
['Martelli', 'Ravenscroft', 'Holden']


In [7]:
get_creators({'type': 'book', 'pages': 770})  # error case

ValueError: Invalid 'book' record: {'type': 'book', 'pages': 770}

In [8]:
get_creators('Spam, spam, spam')  # error case

ValueError: Invalid record: 'Spam, spam, spam'

In [8]:
food = dict(category='ice cream', flavor='vanilla', cost=199)

match food:
    case {'category': 'ice cream', **details}:  ## ** to capture extra key-value pairs
        print(f'Ice cream details: {details}')

Ice cream details: {'flavor': 'vanilla', 'cost': 199}


### Standard API of Mapping Types

In [14]:
from collections import abc  # collection.abc provides the Mapping and MutableMapping

my_dict = {}
print(isinstance(my_dict, abc.Mapping))
print(isinstance(my_dict, abc.MutableMapping))

True
True


In [16]:
tt = (1, 2, (30, 40))
print(hash(tt))

#tl = (1, 2, [30, 40])  # unhashable type : 'list'
#print(hash(tl))

tf = (1, 2, frozenset([30, 40]))
print(hash(tf))

-3907003130834322577
5149391500123939311


#### Inserting or Updating Mutable Values

In [17]:
# Example 3-4. index0.py uses dict.get to fetch and update a list of word occurrences from the index (a better solution is in Example 3-5)
"""Build an index mapping word -> list of occurrences"""

import sys
import re

WORD_RE = re.compile('\w+')

index = {}
#with open('sys.argv[1]', encoding='utf-8') as fp:
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)

            # this is ugly; coded like this is to make a point
            occurences = index.get(word, [])  # get the list of occurences for work, or [] if not found
            
            occurences.append(location)
            index[word] = occurences            

# print in alphabetical order
for word in sorted(index, key=str.upper):
    print(word, index[word])

a [(19, 48), (20, 53)]
Although [(11, 1), (16, 1), (18, 1)]
ambiguity [(14, 16)]
and [(15, 23)]
are [(21, 12)]
aren [(10, 15)]
at [(16, 38)]
bad [(19, 50)]
be [(15, 14), (16, 27), (20, 50)]
beats [(11, 23)]
Beautiful [(3, 1)]
better [(3, 14), (4, 13), (5, 11), (6, 12), (7, 9), (8, 11), (17, 8), (18, 25)]
break [(10, 40)]
cases [(10, 9)]
complex [(5, 23)]
Complex [(6, 1)]
complicated [(6, 24)]
counts [(9, 13)]
dense [(8, 23)]
do [(15, 64), (21, 48)]
Dutch [(16, 61)]
easy [(20, 26)]
enough [(10, 30)]
Errors [(12, 1)]
explain [(19, 34), (20, 34)]
Explicit [(4, 1)]
explicitly [(13, 8)]
face [(14, 8)]
first [(16, 41)]
Flat [(7, 1)]
good [(20, 55)]
great [(21, 28)]
guess [(14, 52)]
hard [(19, 26)]
honking [(21, 20)]
idea [(19, 54), (20, 60), (21, 34)]
If [(19, 1), (20, 1)]
implementation [(19, 8), (20, 8)]
implicit [(4, 25)]
In [(14, 1)]
is [(3, 11), (4, 10), (5, 8), (6, 9), (7, 6), (8, 8), (17, 5), (18, 16), (19, 23), (20, 23)]
it [(15, 67), (19, 43), (20, 43)]
let [(21, 42)]
may [(16, 19),

In [21]:
# Example 3-5. index.py uses dict.setdefault to fetch and update a list of word occurrences from the index in a single line; contrast with Example 3-4
"""Build an index mapping word -> list of occurrences"""

import sys
import re

WORD_RE = re.compile('\w+')

index = {}
#with open('sys.argv[1]', encoding='utf-8') as fp:
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)
            
            index.setdefault(word, []).append(location)  # Use set default

## print in alphabetical order
#for word in sorted(index, key=str.upper):
#    print(word, index[word])
print('Zen', index['Zen'])


Zen [(1, 5)]


### Automatic Handling of Missing Keys
#### defaultdict: Another Take on Missing Keys

In [22]:
# Example 3-6. index_default.py: using an instance of defaultdict instead of the setdefault method
"""Build an index mapping word -> list of occurrences"""

import sys
import re
import collections

WORD_RE = re.compile('\w+')

index = collections.defaultdict(list)
#with open('sys.argv[1]', encoding='utf-8') as fp:
with open('zen.txt', encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)
            index[word].append(location)

# print in alphabetical order
#for word in sorted(index, key=str.upper):
#    print(word, index[word])
print('Zen', index['Zen'])

Zen [(1, 5)]


#### The \_\_missing\_\_ Method

In [12]:
# Example 3-8. StrKeyDict0 converts nonstring keys to str on lookup (see tests in Example 3-7)

class StrKeyDict0(dict):
    def __missing__(self, key):  # it will be called by dict.__gettime__ whenever a key is not found
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
            return default

    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

In [13]:
# Example 3-7. When searching for a nonstring key, StrKeyDict0 converts it to str when it is not found

# Tests for item retrieval using `d[key]` notation::
d = StrKeyDict0([('2', 'two'), ('4', 'four')])
print(d['2'])
print(d['4'])
print(d[1])


two
four


KeyError: '1'

In [14]:
# Tests for item retrieval using `d.get(key)` notation::
print(d.get('2'))
print(d.get(4))
print(d.get(1, 'N/A'))

# Tests for the `in` operator::
print(2 in d)
print(1 in d)

two
four
N/A
True
False


### Variations of dict
#### collections.ChainMap

In [30]:
d1 = dict(a=1, b=3)
d2 = dict(a=2, b=4, c=6)

from collections import ChainMap
chain = ChainMap(d1, d2)
print(chain['a'])
print(chain['c'])

chain['c'] = -1  # Updates or insertions to a ChainMap only affect the first input mapping.
print(d1)
print(d2)

1
6
{'a': 1, 'b': 3, 'c': -1}
{'a': 2, 'b': 4, 'c': 6}


#### collections.Counter

In [35]:
ct = collections.Counter('abracadabra')
print(ct)
ct.update('aaaaazzz')
print(ct)
print(ct.most_common(3))

Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
Counter({'a': 10, 'z': 3, 'b': 2, 'r': 2, 'c': 1, 'd': 1})
[('a', 10), ('z', 3), ('b', 2)]


#### Subclassing UserDict Instead of dict

In [15]:
# Example 3-9. StrKeyDict always converts non-string keys to str—on insertion, update, and lookup

import collections

class StrKeyDict(collections.UserDict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def __contains__(self, key):
        return str(key) in self.data

    def __setitem__(self, key, item):
        self.data[str(key)] = item

In [16]:
user_dict = StrKeyDict([('2', 'two'), ('4', 'four')])
print(user_dict['2'])
print(user_dict['4'])
print(user_dict[1])

two
four


KeyError: '1'

In [17]:
# Tests for item retrieval using `d.get(key)` notation::
print(user_dict.get('2'))
print(user_dict.get(4))
print(user_dict.get(1, 'N/A'))

# Tests for the `in` operator::
print(2 in user_dict)
print(1 in user_dict)

two
four
N/A
True
False


### Immutable Mappings

In [18]:
# Example 3-10. MappingProxyType builds a read-only mappingproxy instance from a dict
from types import MappingProxyType

d = {1: 'A'}
d_proxy = MappingProxyType(d)
print(d_proxy)
print(d_proxy[1])
# d_proxy[2] = 'x'  # Type Error, read only
d[2] = 'B'
print(d_proxy)
print(d_proxy[2])

{1: 'A'}
A
{1: 'A', 2: 'B'}
B


In [23]:
# Example 3-11. The .values() method returns a view of the values in a dict

d = dict(a=10, b=20, c=30)
values = d.values()
print(values)
print(len(values))
print(list(values))
r_values= reversed(values)
print(list(r_values))
# values[0]  # Error: We can't use [] to get individual items from a view

dict_values([10, 20, 30])
3
[10, 20, 30]
[30, 20, 10]


In [24]:
d['z'] = 99
print(d)
print(values)

{'a': 10, 'b': 20, 'c': 30, 'z': 99}
dict_values([10, 20, 30, 99])


In [25]:
values_class = type({}.values())
# v = values_class()  # dict_keys, dict_values, and dict_items are internal, not available

TypeError: cannot create 'dict_values' instances

### Set Theory

In [49]:
# A set is a collection of unique objects. A basic use case is removing duplication:
l = ['spam', 'spam', 'eggs', 'spam', 'bacon', 'eggs']
print(set(l))
print(list(set(l)))
print(dict.fromkeys(l).keys())
print(list(dict.fromkeys(l).keys()))

{'eggs', 'bacon', 'spam'}
['eggs', 'bacon', 'spam']
dict_keys(['spam', 'eggs', 'bacon'])
['spam', 'eggs', 'bacon']


In [51]:
s = {1}
print(type(s))
print(s)
print(s.pop())
print(s)

frozenset(range(10))

<class 'set'>
{1}
1
set()


frozenset({0, 1, 2, 3, 4, 5, 6, 7, 8, 9})

In [52]:
# Example 3-15. Build a set of Latin-1 characters that have the word “SIGN” in their Unicode names
from unicodedata import name
{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

In [53]:
d1 = dict(a=1, b=2, c=3, d=4)
d2 = dict(b=20, d=40, e=50)
print(d1.keys() & d2.keys())

s = {'a', 'e', 'i'}
print(d1.keys() & s)
print(d1.keys() | s)

{'b', 'd'}
{'a'}
{'d', 'e', 'a', 'b', 'i', 'c'}
