In [3]:
from collections import defaultdict
from pprint import pprint

In [None]:
# Defaultdict is very useful in the accumelation phase, after we are done convert it to usual dict
# Defaultdict: grouping, accumelation 
# Model one-to-many: dict(one, list_of_many)




d = defaultdict(list)
d["raymond"].append("red")
d["rachel"].append("black")
d["matthew"].append("yellow")

pprint(d)

d["raymond"].append("mac")
d["rachel"].append("pc")
d["matthew"].append("pc")

pprint(d)

d = dict(d)

pprint(d)

defaultdict(<class 'list'>,
            {'matthew': ['yellow'],
             'rachel': ['black'],
             'raymond': ['red']})
defaultdict(<class 'list'>,
            {'matthew': ['yellow', 'pc'],
             'rachel': ['black', 'pc'],
             'raymond': ['red', 'mac']})
{'matthew': ['yellow', 'pc'],
 'rachel': ['black', 'pc'],
 'raymond': ['red', 'mac']}


In [14]:
e2s = {
    "one": ["uno"],
    "two": ["dos"],
    "three": ["tres"],
    "trio": ["tres"],
    "free": ["libre", "gratis"]
}


pprint(e2s)

s2e = defaultdict(list)

for eng, spanwords in e2s.items():
    for span in spanwords:
        s2e[span].append(eng)

pprint(s2e)

print("For one-to-one mapping we can use the following")

e2s = dict(one="uno", two="dos", three="tres")
print(e2s)
s2e = {span: eng for eng, span in e2s.items()}
print(s2e)


{'free': ['libre', 'gratis'],
 'one': ['uno'],
 'three': ['tres'],
 'trio': ['tres'],
 'two': ['dos']}
defaultdict(<class 'list'>,
            {'dos': ['two'],
             'gratis': ['free'],
             'libre': ['free'],
             'tres': ['three', 'trio'],
             'uno': ['one']})
For one-to-one mapping we can use the following
{'one': 'uno', 'two': 'dos', 'three': 'tres'}
{'uno': 'one', 'dos': 'two', 'tres': 'three'}


In [None]:
# Glob module 
import glob 

glob.glob(".*")           # global expansion wildcard 

['.git', '.mypy_cache']

In [18]:
with open("./congress_data/congress_votes_114-2016_s20.csv", encoding="utf") as file:
    print(file.readlines())

['Senate Vote #20 2016-02-10T17:11:00 - H.R. 757: North Korea Sanctions Enforcement Act of 2016\n', 'person,state,district,vote,name,party\n', '300002,TN,,Yea,Sen. Lamar Alexander [R],Republican\n', '300011,CA,,Yea,Sen. Barbara Boxer [D],Democrat\n', '300018,WA,,Yea,Sen. Maria Cantwell [D],Democrat\n', '300019,DE,,Yea,Sen. Thomas Carper [D],Democrat\n', '300023,MS,,Yea,Sen. Thad Cochran [R],Republican\n', '300025,ME,,Yea,Sen. Susan Collins [R],Republican\n', '300027,TX,,Yea,Sen. John Cornyn [R],Republican\n', '300030,ID,,Yea,Sen. Michael Crapo [R],Republican\n', '300038,IL,,Not Voting,Sen. Richard Durbin [D],Democrat\n', '300041,WY,,Yea,Sen. Michael Enzi [R],Republican\n', '300043,CA,,Yea,Sen. Dianne Feinstein [D],Democrat\n', '300047,SC,,Not Voting,Sen. Lindsey Graham [R],Republican\n', '300048,IA,,Yea,Sen. Charles “Chuck” Grassley [R],Republican\n', '300052,UT,,Yea,Sen. Orrin Hatch [R],Republican\n', '300055,OK,,Yea,Sen. James “Jim” Inhofe [R],Republican\n', '300065,VT,,Yea,Sen. Patr

In [None]:
it = iter("abcdefg")
next(it)
next(it)
list(it)       # notice that the list does not start at a since next consumed the first two letters. 

['c', 'd', 'e', 'f', 'g']

In [24]:
import csv 


with open("./congress_data/congress_votes_114-2016_s20.csv", encoding="utf") as file:
    for row in csv.reader(file):
        print(row)

['Senate Vote #20 2016-02-10T17:11:00 - H.R. 757: North Korea Sanctions Enforcement Act of 2016']
['person', 'state', 'district', 'vote', 'name', 'party']
['300002', 'TN', '', 'Yea', 'Sen. Lamar Alexander [R]', 'Republican']
['300011', 'CA', '', 'Yea', 'Sen. Barbara Boxer [D]', 'Democrat']
['300018', 'WA', '', 'Yea', 'Sen. Maria Cantwell [D]', 'Democrat']
['300019', 'DE', '', 'Yea', 'Sen. Thomas Carper [D]', 'Democrat']
['300023', 'MS', '', 'Yea', 'Sen. Thad Cochran [R]', 'Republican']
['300025', 'ME', '', 'Yea', 'Sen. Susan Collins [R]', 'Republican']
['300027', 'TX', '', 'Yea', 'Sen. John Cornyn [R]', 'Republican']
['300030', 'ID', '', 'Yea', 'Sen. Michael Crapo [R]', 'Republican']
['300038', 'IL', '', 'Not Voting', 'Sen. Richard Durbin [D]', 'Democrat']
['300041', 'WY', '', 'Yea', 'Sen. Michael Enzi [R]', 'Republican']
['300043', 'CA', '', 'Yea', 'Sen. Dianne Feinstein [D]', 'Democrat']
['300047', 'SC', '', 'Not Voting', 'Sen. Lindsey Graham [R]', 'Republican']
['300048', 'IA', '', 

In [27]:
from collections import Counter 

c = Counter()

c["red"] +=1 
c["blue"] += 2

print(c)

print(dict(c))

Counter({'blue': 2, 'red': 1})
{'red': 1, 'blue': 2}


In [None]:
%%writefile congress.py
import csv
from collections import namedtuple, defaultdict
from pprint import pprint
import glob
from typing import NamedTuple, DefaultDict, Tuple

#Senator = namedtuple("Senator", ["name", "party", "state"])
Senator = NamedTuple("Senator", [("name", str), ("party", str), ("state", str)])
VotingValue = int
VotingHistory = Tuple[VotingValue, ...]
# Load votes which were arranged by topic and accumlate vote by senator
vote_value = {"Yea" : 1, "Nay" : -1, "Not Voting": 0}                           # type: Dict[str, VotingValue]
accumelated_record = defaultdict(list)                                          # type: DefaulDict[Senator, List[VotingValue]]
for filename in glob.glob("./congress_data/*.csv"):
    with open(filename, encoding="utf") as file:
        reader = csv.reader(file)
        vote_topic = next(reader)
        headers = next(reader)
        for person, state, district, vote, name, party in csv.reader(file):
            # person, state, district, vote, name, party
            senator = Senator(name, party, state)
            accumelated_record[senator].append(vote_value[vote])

# transform the record into a plain dict that maps to tuple of votes 
record = {senator: tuple(votes) for senator, votes in accumelated_record.items()}           # type: Dict[Senator, VotingHistory]

# Use k-means to locate the cluster centroids from patterns of votes 

pprint(record, width=500)

Overwriting congress.py


NameError: name 'record' is not defined