# Chapter 3. Dictionaries and Sets 

## Generic Mapping Types 

In [1]:
a = dict(one=1,two=2,three=3)
b = {'one':1,'two':2,'three':3}
c = dict(zip(['one','two','three'],[1,2,3]))
d = dict([('two',2),('one',1),('three',3)])
e = dict({'three':3, 'one':1,'two':2})

In [3]:
a

{'one': 1, 'two': 2, 'three': 3}

In [2]:
a==b==c==d==e

True

## dict Comprehensions 

**Example 3-1. Examples of dict comprehensions

In [4]:
Dial_codes = [(86, 'China'),
             (91, 'India'),
             (1, 'United States'),
             (62, 'Indonesia'),
             (55, 'Brazil'),
             (92, 'Pakistan'),
             (880, 'Banladesh'),
             (234, 'Nigeria'),
             (7, 'Russia'),
             (81, 'Japan')]

In [15]:
country_code = {country: code for code,country in Dial_codes}
# Here the pairs are reversed: country is the key, and code is the value

In [16]:
country_code

{'China': 86,
 'India': 91,
 'United States': 1,
 'Indonesia': 62,
 'Brazil': 55,
 'Pakistan': 92,
 'Banladesh': 880,
 'Nigeria': 234,
 'Russia': 7,
 'Japan': 81}

In [10]:
{code: country.upper() for country,code in country_code.items()
 if code<66}
# Reverse pairs again, values uppercased and itmes filtered by code<66

{1: 'UNITED STATES', 62: 'INDONESIA', 55: 'BRAZIL', 7: 'RUSSIA'}

## Overview of Common Mapping Methods 

### Handling missing keys with setdefault

**Example 3-2. index0.py uses dict.get to fetch and update a list of word occurrence from the index(a better solution is in Example 3-4)

In [17]:
import sys 
import re 

In [18]:
word_re = re.compile(r'\w+')

In [None]:
index = {}
with open(sys.argv[1], encoding='utf-8') as fp:
    for line_no, line in enumerate(fp, 1):
        for match in WORD_RE.finditer(line):
            word = match.group()
            column_no = match.start()+1
            location = (line_no, column_no)
            index.setdefault(word, []).append(location)
            

In [None]:
for word in sorted(index, key=str.upper):
    print(word, index[word])

**Example 3-7. StrKeyDict0 converts nonstring keys to str on lookup 

In [31]:
class StrKeyDict0(dict):
    
    def __missing__(self,key):
        if isinstance(key,str): #check whether key is a str already
            raise KeyError(key)
        return self[str(key)]
    
    def get(self,key,default=None):
        try:
            return self[key]
        except KeyError:
            return default
            
    def __contains__(self,key):
        return key in self.keys() or str(key) in self.keys()

In [32]:
ct = collections.Counter('abraxadabra')

In [33]:
ct

Counter({'a': 5, 'b': 2, 'r': 2, 'x': 1, 'd': 1})

In [34]:
ct.update('aaaaazzz')

In [35]:
ct

Counter({'a': 10, 'b': 2, 'r': 2, 'x': 1, 'd': 1, 'z': 3})

In [36]:
ct.most_common(2)

[('a', 10), ('z', 3)]

**Example 3-8. StrKeyDict always converts non-string keys to str- on insertion, update, and lookup

In [37]:
import collections 

In [38]:
class StrKeyDict(collections.UserDict): # StrKeyDict extends UserDict
    
    def __missing__(self,key): 
        if isinstance(key,str):
            raise KeyError(key)
        return self[str(key)]
    
    def __contains__(self,key):
        return str(key) in self.data 
    
    def __setitem__(self,key,item):
        self.data[str(key)] = item

### Set Theory

In [40]:
l = ['spam','spam','eggs','spam']
set(l)

{'eggs', 'spam'}

In [42]:
list(set(l))

['eggs', 'spam']

### Set comprehensions

**Example 3-13. Build a set of Latin-1 characters that have the word 'SIGN' in their unicode names 

In [44]:
from unicodedata import name 

In [45]:
{chr(i) for i in range(32,256) if 'SIGN' in name(chr(i),'')} 

{'#',
 '$',
 '%',
 '+',
 '<',
 '=',
 '>',
 '¢',
 '£',
 '¤',
 '¥',
 '§',
 '©',
 '¬',
 '®',
 '°',
 '±',
 'µ',
 '¶',
 '×',
 '÷'}

**Example 3-17. dialcodes.py fills 3 dictionaries with the same data sorted in different ways

In [46]:
Dial_codes = [(86, 'China'),
             (91, 'India'),
             (1, 'United States'),
             (62, 'Indonesia'),
             (55, 'Brazil'),
             (92, 'Pakistan'),
             (880, 'Banladesh'),
             (234, 'Nigeria'),
             (7, 'Russia'),
             (81, 'Japan')]

In [48]:
d1 = dict(Dial_codes)
print('d1:', d1.keys())

d1: dict_keys([86, 91, 1, 62, 55, 92, 880, 234, 7, 81])


In [49]:
d2 = dict(sorted(Dial_codes))
print('d2:', d2.keys())

d2: dict_keys([1, 7, 55, 62, 81, 86, 91, 92, 234, 880])


In [50]:
d3 = dict(sorted(Dial_codes, key = lambda x:x[1]))
print('d3:', d3.keys())

d3: dict_keys([880, 55, 86, 91, 62, 81, 234, 92, 7, 1])


**Adding items to a dict may change the order of existing keys 