# 1. Generic mapping types

In [1]:
a = dict(one=1, two=2, three=3)
b = {'one': 1, 'two': 2, 'three': 3}
c = dict(zip(['one', 'two', 'three'], [1, 2, 3]))
d = dict([('two', 2), ('one', 1), ('three', 3)])
e = dict({'three': 3, 'one': 1, 'two': 2})

In [2]:
a == b == c == d == e

True

# 2. dict comprehensions

In [3]:
# Ex 3-1 Examples of dict comprehensions
DIAL_CODES = [
    (86, 'China'),
    (91, 'India'),
    (1, 'United States'),
    (62, 'Indonesia'),
    (55, 'Brazil'),
    (92, 'Pakistan'),
    (880, 'Bangladesh'),
    (234, 'Nigeria'),
    (7, 'Russia'),
    (81, 'Japan')]
country_code = {country: code for code, country in DIAL_CODES}

In [4]:
country_code

{'Bangladesh': 880,
 'Brazil': 55,
 'China': 86,
 'India': 91,
 'Indonesia': 62,
 'Japan': 81,
 'Nigeria': 234,
 'Pakistan': 92,
 'Russia': 7,
 'United States': 1}

In [6]:
{code: country.upper() for country, code in country_code.items()}

{1: 'UNITED STATES',
 7: 'RUSSIA',
 55: 'BRAZIL',
 62: 'INDONESIA',
 81: 'JAPAN',
 86: 'CHINA',
 91: 'INDIA',
 92: 'PAKISTAN',
 234: 'NIGERIA',
 880: 'BANGLADESH'}

# 3. Overview of common mapping methods
## Handling missing keys with setdefault

In [None]:
# Ex 3-2 index0.py uses dict.get to fetch and update a list of word occurrences from the index
import sys
import re

WORD_RE = re.compile('\w+')
index = {}
with open(sys.argv[1]) as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            occurances = index.get(word, [])
            occurances.append(location)
            index[word] = occurence
for word in sorted(index, key=str.upper):
    print(word, index[word])

In [None]:
# Ex 3-4 index.py uses dict.setdefault to fetch and update a list of word occurrences from the index in a single line
import sys
import re

WORD_RE = re.compile('\w+')
index = {}
with open(sys.argv[1]) as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index.setdefault(word, []).append(location)
for word in sorted(index, key=str.upper):
    print(word, index[word])

# 4. Mappings with flexible key lookup

## defaultdict: another take an missing keys

In [None]:
# Ex 3-4 index.py uses dict.setdefault to fetch and update a list of word occurrences from the index in a single line Example 3-4.
import sys
import re
import collections

WORD_RE = re.compile('\w+')

index = collections.defaultdict(list)
with open(sys.argv[1]) as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start() + 1
            location = (line_no, column_no)
            index[word].append(location)
for word in sorted(index, key=str.upper):
    print(word, index[word])

## The __missing__ method

In [8]:
# Ex 3-7 StrKeyDict0 converts non-string keys to str on lookup.
class StrKeyDict0(dict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def get(self, key, default=None):
        try:
            return self[key]
        except KeyError:
                return default

    def __contains__(self, key):
        return key in self.keys() or str(key) in self.keys()

In [9]:
# Ex 3-6 When searching for a non-string key, StrKeyDict0 converts it to str when it is not found.

# Tests for item retrieval using 'd[key]' notation
d = StrKeyDict0([('2', 'two'), ('4', 'four')])

In [10]:
d['2'], d[4]

('two', 'four')

In [11]:
d[1]

KeyError: '1'

In [13]:
d.get('2'), d.get(4), d.get(1, 'N/A')

('two', 'four', 'N/A')

In [14]:
2 in d, 1 in d

(True, False)

## Variations of dict
- collections.OrderedDict
- collections.ChainMap
- collections.Counter

## Subclassing UserDict.

In [15]:
import collections

class StrKeyDict(collections.UserDict):
    def __missing__(self, key):
        if isinstance(key, str):
            raise KeyError(key)
        return self[str(key)]

    def __contains__(self, key):
        return str(key) in self.data

    def __setitem__(self, key, item):
        self.data[str(key)] = item

## Immutable mappings

In [16]:
from types import MappingProxyType
d = {1: 'A'}
d_proxy = MappingProxyType(d)
d_proxy

mappingproxy({1: 'A'})

In [17]:
d_proxy[1]

'A'

In [18]:
d_proxy[2] = 'x'

TypeError: 'mappingproxy' object does not support item assignment

In [19]:
d[2] = 'B'
d_proxy, d_proxy[2]

(mappingproxy({1: 'A', 2: 'B'}), 'B')

# 5. Set theory

## set literals

In [20]:
s = {1}
type(s)

set

In [21]:
s

{1}

In [23]:
s.pop()

1

In [24]:
s

set()

## set comprehensions

In [25]:
# Ex 3-13 Build a set of Latin-1 characters that have the word "SIGN" in their Unicode names.
from unicodedata import name
{chr(i) for i in range(32, 256) if 'SIGN' in name(chr(i), '')}

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

## Set operations

# 6. dict and set under the hood
## A performance experiment

## Hash tables in dictionaries

### Hashs and equality

### The hash table algorithm

## Practical consequences of how dict works
### #1: Keys must be hashable objects

### #2: dicts have significant memory overhead

### #3: Key search if very fast

### #4: Key ordering depends on insertion order

### #5: Adding items to a dict may change the order of existing keys

## How set works - practical consequences