## List Comprehensions

We'll be working with legislators.csv, which records information on every historical member of the U.S. Congress. The file includes these columns:

- `last_name` - the legislator's last name
- `first_name` - the legislator's first name
- `birthday` - the legislator's birthday
- `gender` - the legislator's gender
- `type` - the chamber in which the legislator served - either Senate (sen) or House of Representatives (rep)
- `state` - the state the legislator represents
- `party` - the legislator's party affiliation

In [1]:
import csv
f = open("legislators.csv")
csvread = csv.reader(f)
legislators = list(csvread)

__Preprocessing and handling erros__

In [2]:
for row in legislators:
    if row[3] == '':
        row[3] = 'M'

In [3]:
for row in legislators[1:]:
    parts = row[2].split('/')
    try:
        birth_year = int(parts[2])
    except Exception:
        birth_year = 0
    row.append(birth_year)

In [4]:
legislators[0].append("birth_year")

In [5]:
last_value = 1
for row in legislators[1:]:
    if row[7] == 0:
        row[7] = last_value
        last_value = row[7]

In [8]:
# enumerate() example 1
ships = ["Andrea Doria", "Titanic", "Lusitania"]
cars = ["Ford Edsel", "Ford Pinto", "Yugo"]
for idx, ship in enumerate(ships):
    print(ship)
    print(cars[idx])

Andrea Doria
Ford Edsel
Titanic
Ford Pinto
Lusitania
Yugo


In [14]:
# enumerate() example 2
things = [["apple", "monkey"], ["orange", "dog"], ["banana", "cat"]]
trees = ["cedar", "maple", "fig"]
for idx, thing in enumerate(things):
    thing.append(trees[idx])
    
print(things)

[['apple', 'monkey', 'cedar'], ['orange', 'dog', 'maple'], ['banana', 'cat', 'fig']]


In [19]:
# List Comprehensions example
apple_prices = [100, 101, 102, 105]
apple_prices_doubled = [2*i for i in apple_prices]
apple_prices_lowered = [i-100 for i in apple_prices]
print(apple_prices_doubled)
print(apple_prices_lowered)

[200, 202, 204, 210]
[0, 1, 2, 5]


__Find the most common names among U.S. legislators of each gender__

In [20]:
legislators[0:10]

[['last_name',
  'first_name',
  'birthday',
  'gender',
  'type',
  'state',
  'party',
  'birth_year'],
 ['Abercrombie', 'Neil', '6/26/1938', 'M', 'Rep', 'HI', 'D', 1938],
 ['Ackerman', 'Gary', '11/19/1942', 'M', 'Rep', 'NY', 'D', 1942],
 ['Aderholt', 'Robert', '7/22/1965', 'M', 'Rep', 'AL', 'R', 1965],
 ['Akaka', 'Daniel', '9/11/1924', 'M', 'Sen', 'HI', 'D', 1924],
 ['Allard', 'Wayne', '12/2/1943', 'M', 'Sen', 'CO', 'R', 1943],
 ['Andrews', 'Robert', '8/4/1957', 'M', 'Rep', 'NJ', 'D', 1957],
 ['Allen', 'Thomas', '4/16/1945', 'M', 'Rep', 'ME', 'D', 1945],
 ['Akin', 'W.', '7/5/1947', 'M', 'Rep', 'MO', 'R', 1947],
 ['Alexander', 'Lamar', '7/3/1940', 'M', 'Sen', 'TN', 'R', 1940]]

In [22]:
name_counts = dict()
for row in legislators[1:]:
    if (row[3] == 'F') and (row[-1] >= 1940):
        if row[1] in name_counts:
            name_counts[row[1]] += 1
        else:
            name_counts[row[1]] = 1

print(name_counts)

{'Sandy': 1, 'Kelly': 1, 'Barbara': 3, 'Corrine': 1, 'Mary': 3, 'Tammy': 2, 'Shelley': 1, 'Marsha': 1, 'Virginia': 2, 'Melissa': 1, 'Michele': 1, 'Nancy': 2, 'Karen': 1, 'Diane': 1, 'Ann Marie': 1, 'Joyce': 1, 'Susan': 3, 'Julia': 1, 'Cheri': 1, 'Maria': 1, 'Donna': 2, 'Hillary': 1, 'Kathy': 1, 'Yvette': 1, 'Judy': 1, 'Diana': 1, 'Rosa': 1, 'Jo Ann': 2, 'Thelma': 1, 'Kathleen': 2, 'Suzan': 1, 'Anna': 1, 'Renee': 1, 'Elizabeth': 2, 'Marcia': 1, 'Lois': 1, 'Deb': 1, 'Kay': 3, 'Gabrielle': 1, 'Kirsten': 1, 'Tulsi': 1, 'Jane': 1, 'Stephanie': 2, 'Mazie': 1, 'Deborah': 2, 'Colleen': 1, 'Vicky': 1, 'Nan': 1, 'Jaime': 1, 'Janice': 2, 'Heidi': 1, 'Sheila': 1, 'Lynn': 1, 'Marcy': 1, 'Carolyn': 3, 'Amy': 1, 'Ann': 3, 'Suzanne': 1, 'Mary Jo': 1, 'Blanche': 1, 'Zoe': 1, 'Cynthia': 1, 'Michelle': 1, 'Patty': 1, 'Sue': 1, 'Betty': 2, 'Candice': 1, 'Marilyn': 1, 'Lisa': 1, 'Cathy': 1, 'Gwen': 1, 'Doris': 1, 'Claire': 1, 'Betsy': 1, 'Grace': 1, 'Kristi': 1, 'Gloria': 1, 'Chellie': 1, 'Ileana': 1, 'Luc

In [28]:
max_value = None
for k in name_counts:
    if max_value is None or name_counts[k] > max_value:
        max_value = name_counts[k]
max_value

3

In [32]:
top_female_names = []
for k in name_counts:
    if name_counts[k] == max_value:
        top_female_names.append(k)
        
print(top_female_names)

['Barbara', 'Mary', 'Susan', 'Kay', 'Carolyn', 'Ann']


In [33]:
# item() example
plant_types = {"orchid": "flower", "cedar": "tree", "maple": "tree"}
for k,v in plant_types.items():
    print(k)
    print(v)

orchid
flower
cedar
tree
maple
tree


In [35]:
top_female_names = []
top_female_names = [k for k, v in name_counts.items() if v == max_value]
top_female_names

['Barbara', 'Mary', 'Susan', 'Kay', 'Carolyn', 'Ann']

In [37]:
male_name_counts = {}
for row in legislators[1:]:
    if row[3] == "M" and row[-1] >= 1940:
        if row[1] in male_name_counts:
            male_name_counts[row[1]] += 1
        else:
            male_name_counts[row[1]] = 1
print(male_name_counts)

{'Gary': 3, 'Robert': 16, 'Wayne': 2, 'Thomas': 11, 'W.': 1, 'Lamar': 2, 'Rodney': 3, 'Jason': 2, 'Michael': 17, 'John': 40, 'Steve': 13, 'Justin': 1, 'Mark': 12, 'Spencer': 1, 'Richard': 9, 'Joe': 9, 'Charles': 12, 'Max': 1, 'Xavier': 1, 'Howard': 1, 'Joseph': 5, 'Brian': 4, 'Jeff': 8, 'Sanford': 1, 'Earl': 2, 'Roy': 1, 'Frederick': 1, 'Allen': 2, 'Kevin': 4, 'Sherrod': 1, 'Samuel': 1, 'Stephen': 3, 'Evan': 1, 'James': 13, 'Timothy': 4, 'Jo': 1, 'Rob': 2, 'George': 5, 'Dan': 2, 'Gus': 1, 'Bruce': 1, 'Vern': 1, 'Paul': 6, 'Bobby': 3, 'Scott': 7, 'Lou': 1, 'Rick': 3, 'Mo': 1, 'Larry': 3, 'Suzanne': 1, 'Ron': 7, 'Kerry': 1, 'Garland': 1, 'Jim': 8, 'Ami': 1, 'Ken': 2, 'Dave': 1, 'Christopher': 8, 'Benjamin': 1, 'Saxby': 1, 'Daniel': 6, 'Kent': 1, 'Jerry': 3, 'Elijah': 1, 'Ander': 1, 'Eric': 4, 'Shelley': 1, 'Wm.': 1, 'Dennis': 5, 'Tom': 11, 'Norm': 1, 'Ben': 4, 'Russ': 1, 'Emanuel': 1, 'K.': 1, 'Henry': 2, 'Bob': 7, 'AndrÃ©': 1, 'Donald': 3, 'Travis': 1, 'Bill': 9, 'Mike': 12, 'Gerald': 2

In [38]:
max_value_m = None
for k in male_name_counts:
    if max_value_m is None or male_name_counts[k] > max_value_m:
        max_value_m = male_name_counts[k]
max_value_m

40

In [41]:
top_male_names = []
top_male_names = [a for a,b in male_name_counts.items() if b == max_value_m]
top_male_names

['John']