In [1]:
import numpy as np
import csv
import pandas as pd
from datetime import datetime
import pickle

## Senate

In [2]:
senate_df = pd.read_csv('data/1976-2018-senate.csv', encoding = "ISO-8859-1")
print(senate_df.keys())

Index(['year', 'state', 'state_po', 'state_fips', 'state_cen', 'state_ic',
       'office', 'district', 'stage', 'special', 'candidate', 'party',
       'writein', 'mode', 'candidatevotes', 'totalvotes', 'unofficial',
       'version'],
      dtype='object')


In [3]:
state_to_party = {}

# Get each state's most recent party that won
for state in np.unique(senate_df['state']):
    most_recent_year = np.amax(senate_df[senate_df['state']==state]['year'].astype(int))
    most_recent_candidates = senate_df[(senate_df['state']==state) & (senate_df['year'].astype(int)==most_recent_year)].index
    party, votes = None, 0
    for ind in most_recent_candidates:
        cand = senate_df.iloc[ind]
        if int(cand['candidatevotes']) > int(votes):
            party, votes = cand['party'], cand['candidatevotes']
    state_to_party[state.lower()] = party

print(len(state_to_party))
print(state_to_party)

50
{'rhode island': 'democrat', 'massachusetts': 'democrat', 'virginia': 'democrat', 'utah': 'republican', 'wyoming': 'republican', 'hawaii': 'democrat', 'nebraska': 'republican', 'texas': 'republican', 'vermont': 'independent', 'washington': 'democrat', 'pennsylvania': 'democrat', 'south carolina': 'republican', 'oklahoma': 'republican', 'louisiana': 'republican', 'oregon': 'democrat', 'kansas': 'republican', 'ohio': 'democrat', 'mississippi': 'republican', 'minnesota': 'democratic-farmer-labor', 'florida': 'republican', 'new york': 'democrat', 'new jersey': 'democrat', 'kentucky': 'republican', 'illinois': 'democrat', 'alaska': 'republican', 'colorado': 'democrat', 'north carolina': 'republican', 'new mexico': 'democrat', 'connecticut': 'democrat', 'montana': 'democrat', 'south dakota': 'republican', 'nevada': 'democrat', 'maryland': 'democrat', 'new hampshire': 'democrat', 'west virginia': 'democrat', 'indiana': 'republican', 'georgia': 'republican', 'alabama': 'republican', 'missou

## House

In [4]:
house_df = pd.read_csv('data/1976-2018-house2.csv', encoding = "ISO-8859-1")
print(house_df.keys())

Index(['year', 'state', 'state_po', 'state_fips', 'state_cen', 'state_ic',
       'office', 'district', 'stage', 'runoff', 'special', 'candidate',
       'party', 'writein', 'mode', 'candidatevotes', 'totalvotes',
       'unofficial', 'version'],
      dtype='object')


In [5]:
house_df['district'] = house_df['district'].replace(0, 1)

In [6]:
house_df['state_dist'] = house_df['state'].astype(str) + house_df['district'].astype(str)
print(len(np.unique(house_df['state_dist'])))

495


In [7]:
housedist_to_party = {}

# Get each state's most recent party that won
for housedist in np.unique(house_df['state_dist']):
    most_recent_year = np.amax(house_df[house_df['state_dist']==housedist]['year'].astype(int))
    
    # to filter obsolete districts
    if int(most_recent_year) < 2012:
        continue
    most_recent_candidates = house_df[(house_df['state_dist']==housedist) & (house_df['year'].astype(int)==most_recent_year)].index
    party, votes = None, 0
    for ind in most_recent_candidates:
        cand = house_df.iloc[ind]
        if int(cand['candidatevotes']) > int(votes):
            party, votes = cand['party'], cand['candidatevotes']
    housedist_to_party[housedist.lower()] = party

print(len(housedist_to_party))
print(housedist_to_party)

435
{'california1': 'republican', 'texas7': 'democrat', 'hawaii1': 'democrat', 'new york4': 'democrat', 'iowa4': 'republican', 'west virginia1': 'republican', 'texas25': 'republican', 'massachusetts4': 'democrat', 'kentucky4': 'republican', 'arizona4': 'republican', 'maryland6': 'democrat', 'maryland8': 'democrat', 'florida26': 'democrat', 'texas3': 'republican', 'georgia14': 'republican', 'texas23': 'republican', 'west virginia2': 'republican', 'arkansas2': 'republican', 'oregon5': 'democrat', 'pennsylvania15': 'republican', 'california14': 'democrat', 'nebraska1': 'republican', 'mississippi2': 'democrat', 'new york11': 'democrat', 'pennsylvania6': 'democrat', 'new york26': 'democrat', 'ohio12': 'republican', 'georgia6': 'democrat', 'california5': 'democrat', 'tennessee3': 'republican', 'arizona3': 'democrat', 'louisiana5': 'republican', 'illinois3': 'democrat', 'california25': 'democrat', 'texas36': 'republican', 'california26': 'democrat', 'california53': 'democrat', 'new york16': '

In [8]:
pickle.dump([state_to_party, housedist_to_party], open('baselines.pkl', 'wb'))