In [130]:
import numpy as np
import pandas as pd

from us import states

In [83]:
# Read in data files

results_2012 = pd.read_csv('data/2012_results.csv')
results_2014 = pd.read_csv('data/2014_results.csv')
results_2016 = pd.read_csv('data/2016_results.csv')
results_2018 = pd.read_csv('data/2018_results.csv')

In [84]:
dfs = [results_2012,results_2014,results_2016,results_2018]

In [85]:
results_2012.head()

Unnamed: 0,Name,URL,State,District,Party,Terms
0,"Representative Adams, Alma S.",https://www.congress.gov/member/alma-adams/A00...,North Carolina,12.0,Democratic,House: 2014-Present
1,"Representative Aderholt, Robert B.",https://www.congress.gov/member/robert-aderhol...,Alabama,4.0,Republican,House: 1997-Present
2,"Representative Alexander, Rodney",https://www.congress.gov/member/rodney-alexand...,Louisiana,5.0,Republican,House: 2003-2013
3,"Representative Amash, Justin",https://www.congress.gov/member/justin-amash/A...,Michigan,3.0,Republican,House: 2011-Present
4,"Representative Amodei, Mark E.",https://www.congress.gov/member/mark-amodei/A0...,Nevada,2.0,Republican,House: 2011-Present


In [87]:
# Get state names and FIPS codes for each state

st = states.STATES

states_dict = {}

for state in st:
    states_dict[state] = state.fips

In [88]:
# Create dictionary for each FIPS code

fips = {}
for state, fip in states_dict.items():
    fips[str(state)] = fip

In [89]:
fips

{'Alabama': '01',
 'Alaska': '02',
 'Arizona': '04',
 'Arkansas': '05',
 'California': '06',
 'Colorado': '08',
 'Connecticut': '09',
 'Delaware': '10',
 'District of Columbia': '11',
 'Florida': '12',
 'Georgia': '13',
 'Hawaii': '15',
 'Idaho': '16',
 'Illinois': '17',
 'Indiana': '18',
 'Iowa': '19',
 'Kansas': '20',
 'Kentucky': '21',
 'Louisiana': '22',
 'Maine': '23',
 'Maryland': '24',
 'Massachusetts': '25',
 'Michigan': '26',
 'Minnesota': '27',
 'Mississippi': '28',
 'Missouri': '29',
 'Montana': '30',
 'Nebraska': '31',
 'Nevada': '32',
 'New Hampshire': '33',
 'New Jersey': '34',
 'New Mexico': '35',
 'New York': '36',
 'North Carolina': '37',
 'North Dakota': '38',
 'Ohio': '39',
 'Oklahoma': '40',
 'Oregon': '41',
 'Pennsylvania': '42',
 'Rhode Island': '44',
 'South Carolina': '45',
 'South Dakota': '46',
 'Tennessee': '47',
 'Texas': '48',
 'Utah': '49',
 'Vermont': '50',
 'Virginia': '51',
 'Washington': '53',
 'West Virginia': '54',
 'Wisconsin': '55',
 'Wyoming': '56

In [90]:
states_list = list(fips.keys())

In [91]:
# List of values to drop from results data

drop_list

['Guam',
 'Virgin Islands',
 'American Samoa',
 'Puerto Rico',
 'Northern Mariana Islands',
 'Guam',
 'Puerto Rico',
 'Virgin Islands',
 'American Samoa',
 'Northern Mariana Islands',
 'Guam',
 'Puerto Rico',
 'Virgin Islands',
 'American Samoa',
 'Northern Mariana Islands',
 'Puerto Rico',
 'Virgin Islands',
 'American Samoa',
 'Northern Mariana Islands',
 'Guam']

In [None]:
drop_set = set(drop_list)

In [93]:
# Length of each data frame before dropping above values

for df in dfs:
    print(len(df))

450
447
456
440


In [97]:
# Drop above values from each data frame

drop_indices = []
for index, row in dfs[3]['State'].iteritems():
    if row in drop_set:
        drop_indices.append(index)
dfs[3].drop(drop_indices, inplace=True)

In [98]:
# Length of each data frame after removing above values

for df in dfs:
    print(len(df))

445
442
451
435


In [69]:
drop_indices

[29, 64, 124, 317, 360]

In [99]:
for frame in dfs:
    frame['FIPS'] = [fips[fi] for fi in frame['State']]

In [104]:
results_2018.head(10)

Unnamed: 0,Name,URL,State,District,Party,Terms,FIPS
0,"Representative Abraham, Ralph Lee",https://www.congress.gov/member/ralph-abraham/...,Louisiana,5.0,Republican,House: 2015-Present,22
1,"Representative Adams, Alma S.",https://www.congress.gov/member/alma-adams/A00...,North Carolina,12.0,Democratic,House: 2014-Present,37
2,"Representative Aderholt, Robert B.",https://www.congress.gov/member/robert-aderhol...,Alabama,4.0,Republican,House: 1997-Present,1
3,"Representative Aguilar, Pete",https://www.congress.gov/member/pete-aguilar/A...,California,31.0,Democratic,House: 2015-Present,6
4,"Representative Allen, Rick W.",https://www.congress.gov/member/rick-allen/A00...,Georgia,12.0,Republican,House: 2015-Present,13
5,"Representative Allred, Colin Z.",https://www.congress.gov/member/colin-allred/A...,Texas,32.0,Democratic,House: 2019-Present,48
6,"Representative Amash, Justin",https://www.congress.gov/member/justin-amash/A...,Michigan,3.0,Republican,House: 2011-Present,26
7,"Representative Amodei, Mark E.",https://www.congress.gov/member/mark-amodei/A0...,Nevada,2.0,Republican,House: 2011-Present,32
8,"Representative Armstrong, Kelly",https://www.congress.gov/member/kelly-armstron...,North Dakota,,Republican,House: 2019-Present,38
9,"Representative Arrington, Jodey C.",https://www.congress.gov/member/jodey-arringto...,Texas,19.0,Republican,House: 2017-Present,48


In [110]:
# Fill in null values

results_2012.fillna(value=0,inplace=True)
results_2014.fillna(value=0,inplace=True)
results_2016.fillna(value=0,inplace=True)
results_2018.fillna(value=0,inplace=True)

In [116]:
# Rename congressional district and state columns for joining later

results_2012 = results_2012.rename(columns={'State': 'Oldstate', 'FIPS': 'State'})
results_2014 = results_2014.rename(columns={'State': 'Oldstate', 'FIPS': 'State'})
results_2016 = results_2016.rename(columns={'State': 'Oldstate', 'FIPS': 'State'})
results_2018 = results_2018.rename(columns={'State': 'Oldstate', 'FIPS': 'State'})

In [118]:
dfs = [results_2012,results_2014,results_2016,results_2018]

In [119]:
# Set state and district types to integers

for df in dfs:
    df['District'] = df['District'].astype(int)
    df['State'] = df['State'].astype(int)

In [121]:
for df in dfs:
    df.reset_index(inplace=True)

In [123]:
# Set election year for each results table

results_2012['Year'] = 2012
results_2014['Year'] = 2014
results_2016['Year'] = 2016
results_2018['Year'] = 2018

In [128]:
results_2012.drop(['index'], axis=1, inplace=True)
results_2014.drop(['index'], axis=1, inplace=True)
results_2016.drop(['index'], axis=1, inplace=True)
results_2018.drop(['index'], axis=1, inplace=True)

In [140]:
# Create full results table

results_data = results_2012.append(results_2014)
results_data = results_data.append(results_2016)
results_data = results_data.append(results_2018)

In [141]:
len(results_data)

1773

In [142]:
results_data.to_csv('results_data.csv')