In [1]:
from collections import defaultdict
from pprint import pprint

# defaultdict: grouping, accumulation, reverse one-to-many mapping

## accumulation

In [3]:
d = defaultdict(list)

d['raymond'].append('red')
d['rachel'].append('blue')

d['raymond'].append('mac')
d['rachel'].append('pc')

In [4]:
pprint(d)

defaultdict(<class 'list'>,
            {'rachel': ['blue', 'pc'],
             'raymond': ['red', 'mac']})


In [8]:
pprint(dict(d), width=20)

{'rachel': ['blue',
            'pc'],
 'raymond': ['red',
             'mac']}


## reverse one-to-many

In [10]:
e2s = {
    'one': ['uno'],
    'two': ['dos'],
    'three': ['tres'],
    'trio': ['tres'],
    'free': ['libre', 'gratis']
}

In [11]:
pprint(e2s)

{'free': ['libre', 'gratis'],
 'one': ['uno'],
 'three': ['tres'],
 'trio': ['tres'],
 'two': ['dos']}


In [12]:
s2e = defaultdict(list)

In [17]:
for eng, spanwords in e2s.items():
    for span in spanwords:
        s2e[span].append(eng)

In [18]:
pprint(s2e)

defaultdict(<class 'list'>,
            {'dos': ['two'],
             'gratis': ['free'],
             'libre': ['free'],
             'tres': ['three', 'trio'],
             'uno': ['one']})


## reverse one-to-one

In [19]:
e2s = dict(one='uno', two='dos', three='tres')

In [20]:
pprint(e2s)

{'one': 'uno', 'three': 'tres', 'two': 'dos'}


In [24]:
s2e = { span: eng for eng, span in e2s.items()}

In [26]:
pprint(s2e)

{'dos': 'two', 'tres': 'three', 'uno': 'one'}


# glob

In [27]:
import glob

In [29]:
glob.glob("*.ipynb")

['01 foundational python .ipynb',
 '02 analyzing data using simulations and resampling.ipynb',
 '03 typehint, default dict, fsum.ipynb',
 '05 voting blocks.ipynb',
 'kmeans.ipynb']

# read file

In [31]:
with open("congress_votes_114-2016_s20.csv", encoding="utf-8") as f:
    print(f.read())

Senate Vote #20 2016-02-10T17:11:00 - H.R. 757: North Korea Sanctions Enforcement Act of 2016
person,state,district,vote,name,party
300002,TN,,Yea,Sen. Lamar Alexander [R],Republican
300011,CA,,Yea,Sen. Barbara Boxer [D],Democrat
300018,WA,,Yea,Sen. Maria Cantwell [D],Democrat
300019,DE,,Yea,Sen. Thomas Carper [D],Democrat
300023,MS,,Yea,Sen. Thad Cochran [R],Republican
300025,ME,,Yea,Sen. Susan Collins [R],Republican
300027,TX,,Yea,Sen. John Cornyn [R],Republican
300030,ID,,Yea,Sen. Michael Crapo [R],Republican
300038,IL,,Not Voting,Sen. Richard Durbin [D],Democrat
300041,WY,,Yea,Sen. Michael Enzi [R],Republican
300043,CA,,Yea,Sen. Dianne Feinstein [D],Democrat
300047,SC,,Not Voting,Sen. Lindsey Graham [R],Republican
300048,IA,,Yea,Sen. Charles “Chuck” Grassley [R],Republican
300052,UT,,Yea,Sen. Orrin Hatch [R],Republican
300055,OK,,Yea,Sen. James “Jim” Inhofe [R],Republican
300065,VT,,Yea,Sen. Patrick Leahy [D],Democrat
300071,AZ,,Yea,Sen. John McCain [R],Republican
300072,KY,,Yea,Se

consume iterators

In [48]:
it = iter("abcdefg")

In [49]:
next(it)

'a'

In [50]:
next(it)

'b'

In [51]:
list(it)

['c', 'd', 'e', 'f', 'g']

# Read csv

In [52]:
import csv

In [53]:
with open("congress_votes_114-2016_s20.csv", encoding="utf-8") as f:
    for row in csv.reader(f):
        print(row)

['Senate Vote #20 2016-02-10T17:11:00 - H.R. 757: North Korea Sanctions Enforcement Act of 2016']
['person', 'state', 'district', 'vote', 'name', 'party']
['300002', 'TN', '', 'Yea', 'Sen. Lamar Alexander [R]', 'Republican']
['300011', 'CA', '', 'Yea', 'Sen. Barbara Boxer [D]', 'Democrat']
['300018', 'WA', '', 'Yea', 'Sen. Maria Cantwell [D]', 'Democrat']
['300019', 'DE', '', 'Yea', 'Sen. Thomas Carper [D]', 'Democrat']
['300023', 'MS', '', 'Yea', 'Sen. Thad Cochran [R]', 'Republican']
['300025', 'ME', '', 'Yea', 'Sen. Susan Collins [R]', 'Republican']
['300027', 'TX', '', 'Yea', 'Sen. John Cornyn [R]', 'Republican']
['300030', 'ID', '', 'Yea', 'Sen. Michael Crapo [R]', 'Republican']
['300038', 'IL', '', 'Not Voting', 'Sen. Richard Durbin [D]', 'Democrat']
['300041', 'WY', '', 'Yea', 'Sen. Michael Enzi [R]', 'Republican']
['300043', 'CA', '', 'Yea', 'Sen. Dianne Feinstein [D]', 'Democrat']
['300047', 'SC', '', 'Not Voting', 'Sen. Lindsey Graham [R]', 'Republican']
['300048', 'IA', '', 

# Tuple unpacking

In [54]:
t = ("Raymond", "Hettinger", 54, "python@rcn.com")

In [55]:
type(t)

tuple

In [56]:
len(t)

4

In [57]:
fname, lname, age, email = t

In [58]:
fname

'Raymond'

In [59]:
lname

'Hettinger'

In [60]:
names = 'raymond rachel matthew'.split()
colors = 'red blue yellow'.split()
cities = 'austin dallas austin houseton chicago dallas austin'.split()

In [78]:
# loop idioms
for name in names:
    print(name.upper())
    
for i, name in enumerate(names, start=1):
    print(i, name)
    
# backward
for color in reversed(colors):
    print(color)
    
# zip
for name, color in zip(names, colors):
    print(name, color)
    
# sorted colors by length
for color in sorted(colors, key=len):
    print(color)
    
# remove duplicate in cities & sort
for city in sorted(set(cities)):
    print(city)
    
# functional programming
for i, city in enumerate(map(str.upper, reversed(sorted(set(cities))))):
    print(i, city)

RAYMOND
RACHEL
MATTHEW
1 raymond
2 rachel
3 matthew
yellow
blue
red
raymond red
rachel blue
matthew yellow
red
blue
yellow
austin
chicago
dallas
houseton
0 HOUSETON
1 DALLAS
2 CHICAGO
3 AUSTIN


In [79]:
import collections

In [80]:
c = collections.Counter()

In [81]:
c

Counter()

In [82]:
c['red'] += 1

In [83]:
c

Counter({'red': 1})