In [1]:
import numpy as np
import csv
import pandas as pd
from datetime import datetime
import pickle

## Senate

In [24]:
senate_df = pd.read_csv('data/1976-2018-senate.csv', encoding = "ISO-8859-1")
print(senate_df.keys())

Index(['year', 'state', 'state_po', 'state_fips', 'state_cen', 'state_ic',
       'office', 'district', 'stage', 'special', 'candidate', 'party',
       'writein', 'mode', 'candidatevotes', 'totalvotes', 'unofficial',
       'version'],
      dtype='object')


In [25]:
state_to_party = {}

# Get each state's most recent party that won
for state in np.unique(senate_df['state']):
    most_recent_year = np.amax(senate_df[senate_df['state']==state]['year'].astype(int))
    most_recent_candidates = senate_df[(senate_df['state']==state) & (senate_df['year'].astype(int)==most_recent_year)].index
    party, votes = None, 0
    for ind in most_recent_candidates:
        cand = senate_df.iloc[ind]
        if int(cand['candidatevotes']) > int(votes):
            party, votes = cand['party'], cand['candidatevotes']
    state_to_party[state.lower()] = party

print(len(state_to_party))
print(state_to_party)

50
{'delaware': 'democrat', 'idaho': 'republican', 'nebraska': 'republican', 'iowa': 'republican', 'new hampshire': 'democrat', 'nevada': 'democrat', 'california': 'democrat', 'montana': 'democrat', 'massachusetts': 'democrat', 'new jersey': 'democrat', 'hawaii': 'democrat', 'texas': 'republican', 'pennsylvania': 'democrat', 'maine': 'independent', 'utah': 'republican', 'illinois': 'democrat', 'georgia': 'republican', 'north carolina': 'republican', 'missouri': 'republican', 'kentucky': 'republican', 'florida': 'republican', 'mississippi': 'republican', 'arizona': 'democrat', 'alabama': 'republican', 'vermont': 'independent', 'oklahoma': 'republican', 'west virginia': 'democrat', 'wisconsin': 'democrat', 'south carolina': 'republican', 'minnesota': 'democratic-farmer-labor', 'tennessee': 'republican', 'connecticut': 'democrat', 'oregon': 'democrat', 'north dakota': 'republican', 'washington': 'democrat', 'new mexico': 'democrat', 'colorado': 'democrat', 'alaska': 'republican', 'michiga

## House

In [36]:
house_df = pd.read_csv('data/1976-2018-house2.csv', encoding = "ISO-8859-1")
print(house_df.keys())

Index(['year', 'state', 'state_po', 'state_fips', 'state_cen', 'state_ic',
       'office', 'district', 'stage', 'runoff', 'special', 'candidate',
       'party', 'writein', 'mode', 'candidatevotes', 'totalvotes',
       'unofficial', 'version'],
      dtype='object')


In [40]:
house_df['district'] = house_df['district'].replace(0, 1)

In [42]:
house_df['state_dist'] = house_df['state'].astype(str) + house_df['district'].astype(str)
print(len(np.unique(house_df['state_dist'])))

495


In [43]:
housedist_to_party = {}

# Get each state's most recent party that won
for housedist in np.unique(house_df['state_dist']):
    most_recent_year = np.amax(house_df[house_df['state_dist']==housedist]['year'].astype(int))
    
    # to filter obsolete districts
    if int(most_recent_year) < 2012:
        continue
    most_recent_candidates = house_df[(house_df['state_dist']==housedist) & (house_df['year'].astype(int)==most_recent_year)].index
    party, votes = None, 0
    for ind in most_recent_candidates:
        cand = house_df.iloc[ind]
        if int(cand['candidatevotes']) > int(votes):
            party, votes = cand['party'], cand['candidatevotes']
    housedist_to_party[housedist.lower()] = party

print(len(housedist_to_party))
print(housedist_to_party)

435
{'virginia6': 'republican', 'new jersey6': 'democrat', 'texas23': 'republican', 'california35': 'democrat', 'pennsylvania5': 'democrat', 'virginia3': 'democrat', 'ohio4': 'republican', 'alabama2': 'republican', 'illinois1': 'democrat', 'texas8': 'republican', 'illinois12': 'republican', 'new york5': 'democrat', 'new york14': 'democrat', 'massachusetts3': 'democrat', 'maryland4': 'democrat', 'nebraska2': 'republican', 'arizona2': 'democrat', 'california48': 'democrat', 'california4': 'republican', 'south carolina2': 'republican', 'connecticut4': 'democrat', 'south carolina3': 'republican', 'south carolina7': 'republican', 'oregon4': 'democrat', 'washington4': 'republican', 'nebraska1': 'republican', 'louisiana1': 'republican', 'michigan6': 'republican', 'massachusetts5': 'democrat', 'new york3': 'democrat', 'california46': 'democrat', 'florida4': 'republican', 'california5': 'democrat', 'pennsylvania9': 'republican', 'california16': 'democrat', 'north carolina12': 'democrat', 'calif

In [44]:
pickle.dump([state_to_party, housedist_to_party], open('baselines.pkl', 'wb'))