In [44]:
import scipy.stats
import matplotlib.pyplot as plt


from scipy import stats
from scipy.optimize import curve_fit
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import seaborn as sns

import pandas as pd
import numpy as np
import folium

%matplotlib inline



We will analyze the electoral margins for each state over time.

In [2]:
indDataset = pd.read_csv('indResults.csv')
indDataset.head()

Unnamed: 0,State,1976D,1976R,1980R,1980D,1984R,1984D,1988R,1988D,1992D,...,2000R,2000D,2004R,2004D,2008D,2008R,2012D,2012R,2016D,2016R
0,Alabama,659170.0,504070.0,654192.0,636730.0,872849.0,551899.0,815576.0,549506.0,690080.0,...,941173.0,692611.0,1176394.0,693933.0,813479.0,1266546.0,795696.0,1255925.0,729547.0,1318255.0
1,Alaska,44058.0,71555.0,86112.0,41842.0,138377.0,62007.0,119251.0,72584.0,78294.0,...,167398.0,79004.0,190889.0,111025.0,123594.0,193841.0,122640.0,164676.0,116454.0,163387.0
2,Arizona,295602.0,418642.0,529688.0,246843.0,681416.0,333854.0,702541.0,454029.0,543050.0,...,781652.0,685341.0,1104294.0,893524.0,1034707.0,1230111.0,1025232.0,1233654.0,1161167.0,1252401.0
3,Arkansas,499614.0,268753.0,403164.0,398041.0,534774.0,338646.0,466578.0,349237.0,505823.0,...,472940.0,422768.0,572898.0,469953.0,422310.0,638017.0,394409.0,647744.0,380494.0,684872.0
4,California,3742284.0,3882244.0,4524858.0,3083661.0,5467009.0,3922519.0,5054917.0,4702233.0,5121325.0,...,4567429.0,5861203.0,5509826.0,6745485.0,8274473.0,5011781.0,7854285.0,4839958.0,8753788.0,4483810.0


Lets see if there are any missing values to deal with.

In [3]:
indDataset.isnull().sum()
#Not at all

State    0
1976D    0
1976R    0
1980R    0
1980D    0
1984R    0
1984D    0
1988R    0
1988D    0
1992D    0
1992R    0
1996D    0
1996R    0
2000R    0
2000D    0
2004R    0
2004D    0
2008D    0
2008R    0
2012D    0
2012R    0
2016D    0
2016R    0
dtype: int64

Compute the percentage margins for each state, out of the total 2 party vote. This is done bc no third-party candidate has won a state in a presidential election since 1968. Additionally, it enables to better understand long-term trends in the partisan lean of the states.

In [4]:
for year in range(1976, 2020, 4):
    totalVotes = indDataset[str(year)+"D"] + indDataset[str(year)+"R"]
    indDataset[str(year)+'D%'] = indDataset[str(year)+"D"]/totalVotes
    indDataset[str(year)+'R%'] = indDataset[str(year)+"R"]/totalVotes

indDataset.head()  

Unnamed: 0,State,1976D,1976R,1980R,1980D,1984R,1984D,1988R,1988D,1992D,...,2000D%,2000R%,2004D%,2004R%,2008D%,2008R%,2012D%,2012R%,2016D%,2016R%
0,Alabama,659170.0,504070.0,654192.0,636730.0,872849.0,551899.0,815576.0,549506.0,690080.0,...,0.423931,0.576069,0.371022,0.628978,0.391091,0.608909,0.387838,0.612162,0.356259,0.643741
1,Alaska,44058.0,71555.0,86112.0,41842.0,138377.0,62007.0,119251.0,72584.0,78294.0,...,0.320631,0.679369,0.367737,0.632263,0.389352,0.610648,0.426847,0.573153,0.416143,0.583857
2,Arizona,295602.0,418642.0,529688.0,246843.0,681416.0,333854.0,702541.0,454029.0,543050.0,...,0.467174,0.532826,0.44725,0.55275,0.456861,0.543139,0.453866,0.546134,0.4811,0.5189
3,Arkansas,499614.0,268753.0,403164.0,398041.0,534774.0,338646.0,466578.0,349237.0,505823.0,...,0.471993,0.528007,0.450643,0.549357,0.398283,0.601717,0.378456,0.621544,0.357149,0.642851
4,California,3742284.0,3882244.0,4524858.0,3083661.0,5467009.0,3922519.0,5054917.0,4702233.0,5121325.0,...,0.56203,0.43797,0.550413,0.449587,0.622784,0.377216,0.618728,0.381272,0.661282,0.338718


In [5]:
#Computing partisan lean for each state for each election year
for year in range(1976, 2020, 4):
    realPercentage = indDataset.iloc[51][str(year)+'D%']
    indDataset['Partisan Lean '+str(year)] = indDataset[str(year)+'D%'] - realPercentage

indDataset.head()  

Unnamed: 0,State,1976D,1976R,1980R,1980D,1984R,1984D,1988R,1988D,1992D,...,Partisan Lean 1980,Partisan Lean 1984,Partisan Lean 1988,Partisan Lean 1992,Partisan Lean 1996,Partisan Lean 2000,Partisan Lean 2004,Partisan Lean 2008,Partisan Lean 2012,Partisan Lean 2016
0,Alabama,659170.0,504070.0,654192.0,636730.0,872849.0,551899.0,815576.0,549506.0,690080.0,...,0.04629,-0.020938,-0.05844,-0.07276,-0.084691,-0.07875,-0.116538,-0.145798,-0.131801,-0.154874
1,Alaska,44058.0,71555.0,86112.0,41842.0,138377.0,62007.0,119251.0,72584.0,78294.0,...,-0.119938,-0.098863,-0.082618,-0.100292,-0.151638,-0.18205,-0.119823,-0.147536,-0.092792,-0.094989
2,Arizona,295602.0,418642.0,529688.0,246843.0,681416.0,333854.0,702541.0,454029.0,543050.0,...,-0.129067,-0.079471,-0.068419,-0.047568,-0.035115,-0.035506,-0.04031,-0.080027,-0.065772,-0.030033
3,Arkansas,499614.0,268753.0,403164.0,398041.0,534774.0,338646.0,466578.0,349237.0,505823.0,...,0.049856,-0.02058,-0.032901,0.065373,0.046176,-0.030687,-0.036918,-0.138606,-0.141183,-0.153984
4,California,3742284.0,3882244.0,4524858.0,3083661.0,5467009.0,3922519.0,5054917.0,4702233.0,5121325.0,...,-0.041656,0.009451,0.020942,0.050618,0.02481,0.059349,0.062853,0.085896,0.099089,0.150149


Now that we have all the partisan lean metrics computed, lets create an interactive widget to display the partisan lean for each
state from 1976 to 2016. To do this, we will filter the dataset to get rid of the Total Value row. Then 
we will create a json file to contain all of the geometries for the US states.

In [6]:
state_geo = r'us-states.json'
indDataset = indDataset[indDataset.State != 'Total Value']

The function constructMap contains code to create a choropleth map of the partisan lean for each state. It then will
return the map. We can use the interact function along with a slider widget to let the user slide and see how the partisan lean changes from election year to election year.

In [49]:
def constructMap(year):    
    foliumMap = folium.Map(location=[48, -102], zoom_start=3)
    yearCol = 'Partisan Lean ' + str(year)
    legendCol ='Partisan Lean ' + str(year)
    foliumMap.choropleth(geo_path=state_geo, data=indDataset,
             columns=['State', yearCol],
             key_on='feature.properties.name', threshold_scale=[-.10, -.05, 0, .05, .10],
             fill_color='RdBu', fill_opacity=0.7, line_opacity=0.2,
             legend_name=legendCol)
    return foliumMap

In [50]:
slider = widgets.IntSlider(value=1976, min=1976, max=2016, step=4)
interact(constructMap, year=slider)
None

Feel free to play around with this widget and see what trends you notice! Regions of interest include the Northeast
and the Southeast.

In [10]:
indDataset.columns.values

array(['State', '1976D', '1976R', '1980R', '1980D', '1984R', '1984D',
       '1988R', '1988D', '1992D', '1992R', '1996D', '1996R', '2000R',
       '2000D', '2004R', '2004D', '2008D', '2008R', '2012D', '2012R',
       '2016D', '2016R', '1976D%', '1976R%', '1980D%', '1980R%', '1984D%',
       '1984R%', '1988D%', '1988R%', '1992D%', '1992R%', '1996D%',
       '1996R%', '2000D%', '2000R%', '2004D%', '2004R%', '2008D%',
       '2008R%', '2012D%', '2012R%', '2016D%', '2016R%',
       'Partisan Lean 1976', 'Partisan Lean 1980', 'Partisan Lean 1984',
       'Partisan Lean 1988', 'Partisan Lean 1992', 'Partisan Lean 1996',
       'Partisan Lean 2000', 'Partisan Lean 2004', 'Partisan Lean 2008',
       'Partisan Lean 2012', 'Partisan Lean 2016'], dtype=object)

Lets do some linear/logistic regressions on the trends from 2000 to 2016, and calculate the partisan lean of each state that way.

In [31]:
stateNum = 0
averageRSquared = 0.0
for state in indDataset['State']:
    yearX = []
    yearY = []
    i = 1
    for year in range(2000, 2020, 4):
        yearX.append(i)
        yearY.append(indDataset.iloc[stateNum]['Partisan Lean ' + str(year)])
        i += 1
    
    slope, intercept, r_value, p_value, std_err = stats.linregress(yearX, yearY)
    averageRSquared += r_value**2
    stateNum += 1
    
averageRSquared /= 51.0  
print(stateNum)
print("Average R Squared: " + str(averageRSquared))   

51
Average R Squared: 0.46945152595


In [53]:
stateNum = 0
averageRSquared = 0.0

def func(x, a, b, c):
    return (x/(c+np.exp(-b*x)))

for state in indDataset['State']:
    yearX = []
    yearY = []
    i = 1
    for year in range(2000, 2020, 4):
        yearX.append(i)
        yearY.append(indDataset.iloc[stateNum]['Partisan Lean ' + str(year)])
        i += 1
    
    popt, pcov = curve_fit(func, yearX, yearY)
    residuals = yearY - f(yearX, popt)
    ss_res = numpy.sum(residuals**2)
    
    ss_tot = numpy.sum((ydata-numpy.mean(ydata))**2)
    r_squared = 1 - (ss_res / ss_tot)

    averageRSquared += r_squared
    stateNum += 1
    
averageRSquared /= 51.0  
print(stateNum)
print("Average R Squared: " + str(averageRSquared))  

SyntaxError: invalid syntax (<ipython-input-53-2df50632ab97>, line 16)