In [10]:
import pandas as pd

In [114]:
results_df = pd.read_csv('1976-2016-president.csv')


In [115]:
results_df.head(10)

##Creating a dictionary of state-state abbr entries to be used across the notebook
states = results_df[results_df.year.isin([2016])]
states = states.filter(items = ['state','state_po'])
states = states.drop_duplicates()
states = states.set_index('state')

In [116]:
years = [1992,1996,2000,2004,2008,2012]
parties = ['democrat', 'republican']

lean_2016 = results_df[results_df.year.isin(years)]
lean_2016 = lean_2016[lean_2016.party.isin(parties)]

##Some duplicate entries (probably faithless elector votes)
#lean_2016[(results_df['year']==2004) & (lean_2016['state_po'] == 'MD')]
#lean_2016[(results_df['year']==2016) & (lean_2016['state_po'] == 'AZ')]
#lean_2016[(results_df['year']==2016) & (lean_2016['state_po'] == 'MD')]


lean_2016['vote_share'] = round(100 * (lean_2016['candidatevotes']/lean_2016['totalvotes']),2) 
lean_2016 = lean_2016[lean_2016['vote_share'] > 1]  #Few electors voted for Kasich/Sanders
lean_2016 = lean_2016.filter(items = ['year','state_po','party','vote_share'])
lean_2016 = lean_2016.set_index(['year','state_po','party'],verify_integrity=True)

lean_2016 = lean_2016.unstack(level='party')
lean_2016['partisan_lean'] = lean_2016['vote_share']['democrat'] - lean_2016['vote_share']['republican'] 

lean_by_state_2016 = lean_2016.groupby('state_po')['partisan_lean'].mean().to_frame()
lean_by_state_2016.sort_values(by=['partisan_lean'])

lean_by_state_2016

Unnamed: 0_level_0,partisan_lean
state_po,Unnamed: 1_level_1
AK,-19.786667
AL,-16.338333
AR,-4.015
AZ,-5.676667
CA,15.87
CO,0.695
CT,15.353333
DC,79.5
DE,14.62
FL,0.411667


In [117]:
dem_event_2016 = pd.read_csv('2016_clinton_kaine_campaign_events.csv')
rep_event_2016 = pd.read_csv('2016_trump_pence_events.csv')

dem_event_2016 = dem_event_2016.set_index('state')
dem_event_2016 = dem_event_2016.rename(columns={"event_count": "dem_event_count"}, errors="raise")
rep_event_2016 = rep_event_2016.set_index('state')
rep_event_2016 = rep_event_2016.rename(columns={"event_count": "rep_event_count"}, errors="raise")

dem_event_2016 = dem_event_2016.join(states)
dem_event_2016 = dem_event_2016.set_index('state_po')

rep_event_2016 = rep_event_2016.join(states)
rep_event_2016 = rep_event_2016.set_index('state_po')

lean_by_state_2016 = lean_by_state_2016.join(dem_event_2016)
lean_by_state_2016 = lean_by_state_2016.join(rep_event_2016)

lean_by_state_2016['dem_event_count'] = lean_by_state_2016['dem_event_count'].fillna(0)
lean_by_state_2016['rep_event_count'] = lean_by_state_2016['rep_event_count'].fillna(0)

lean_by_state_2016

Unnamed: 0_level_0,partisan_lean,dem_event_count,rep_event_count
state_po,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AK,-19.786667,0.0,0.0
AL,-16.338333,0.0,0.0
AR,-4.015,0.0,0.0
AZ,-5.676667,3.0,7.0
CA,15.87,0.0,1.0
CO,0.695,3.0,16.0
CT,15.353333,0.0,1.0
DC,79.5,0.0,0.0
DE,14.62,0.0,0.0
FL,0.411667,36.0,35.0


In [118]:

margin_2016 = results_df[results_df.year.isin([2016])]

margin_2016 = margin_2016[margin_2016.party.isin(parties)]
margin_2016['vote_share'] = round(100 * (margin_2016['candidatevotes']/margin_2016['totalvotes']),2) 
margin_2016 = margin_2016[margin_2016['vote_share'] > 1] ##There were a few people who voted for Sanders/Kasich in 2016

margin_2016 = margin_2016.filter(items = ['year','state_po','party','vote_share'])
margin_2016 = margin_2016.set_index(['year','state_po','party'],verify_integrity=True)

margin_2016 = margin_2016.unstack(level='party')

margin_2016['margin'] = margin_2016['vote_share']['democrat'] - margin_2016['vote_share']['republican'] 

margin_2016 = margin_2016.groupby('state_po')['margin'].mean().to_frame()
margin_2016 = margin_2016.sort_values(by=['margin'])

lean_by_state_2016 = lean_by_state_2016.join(margin_2016)

lean_by_state_2016

Unnamed: 0_level_0,partisan_lean,dem_event_count,rep_event_count,margin
state_po,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AK,-19.786667,0.0,0.0,-14.73
AL,-16.338333,0.0,0.0,-27.72
AR,-4.015,0.0,0.0,-26.92
AZ,-5.676667,3.0,7.0,-3.54
CA,15.87,0.0,1.0,30.11
CO,0.695,3.0,16.0,4.91
CT,15.353333,0.0,1.0,13.64
DC,79.5,0.0,0.0,86.41
DE,14.62,0.0,0.0,11.43
FL,0.411667,36.0,35.0,-1.2


In [111]:
from sklearn import linear_model
import statsmodels.api as sm




Unnamed: 0_level_0,vote_diff,dem_event_count,rep_event_count,margin
state_po,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
AK,-19.786667,0.0,0.0,-14.73
AL,-16.338333,0.0,0.0,-27.72
AR,-4.015,0.0,0.0,-26.92
AZ,-5.676667,3.0,7.0,-3.54
CA,15.87,0.0,1.0,30.11
CO,0.695,3.0,16.0,4.91
CT,15.353333,0.0,1.0,13.64
DC,79.5,0.0,0.0,86.41
DE,14.62,0.0,0.0,11.43
FL,0.411667,36.0,35.0,-1.2
