In [1]:
import scipy.stats
import matplotlib.pyplot as plt


from scipy import stats
from scipy.optimize import curve_fit
from ipywidgets import interact, interactive, fixed
import ipywidgets as widgets
import seaborn as sns

import pandas as pd
import numpy as np
import folium

%matplotlib inline



We will analyze the electoral margins for each state over time.

In [2]:
indDataset = pd.read_csv('indResults.csv')
indDataset

Unnamed: 0,State,1976D,1976R,1980R,1980D,1984R,1984D,1988R,1988D,1992D,...,2000R,2000D,2004R,2004D,2008D,2008R,2012D,2012R,2016D,2016R
0,Alabama,659170.0,504070.0,654192.0,636730.0,872849.0,551899.0,815576.0,549506.0,690080.0,...,941173.0,692611.0,1176394.0,693933.0,813479.0,1266546.0,795696.0,1255925.0,729547.0,1318255.0
1,Alaska,44058.0,71555.0,86112.0,41842.0,138377.0,62007.0,119251.0,72584.0,78294.0,...,167398.0,79004.0,190889.0,111025.0,123594.0,193841.0,122640.0,164676.0,116454.0,163387.0
2,Arizona,295602.0,418642.0,529688.0,246843.0,681416.0,333854.0,702541.0,454029.0,543050.0,...,781652.0,685341.0,1104294.0,893524.0,1034707.0,1230111.0,1025232.0,1233654.0,1161167.0,1252401.0
3,Arkansas,499614.0,268753.0,403164.0,398041.0,534774.0,338646.0,466578.0,349237.0,505823.0,...,472940.0,422768.0,572898.0,469953.0,422310.0,638017.0,394409.0,647744.0,380494.0,684872.0
4,California,3742284.0,3882244.0,4524858.0,3083661.0,5467009.0,3922519.0,5054917.0,4702233.0,5121325.0,...,4567429.0,5861203.0,5509826.0,6745485.0,8274473.0,5011781.0,7854285.0,4839958.0,8753788.0,4483810.0
5,Colorado,460353.0,584367.0,652264.0,367973.0,821818.0,454974.0,728177.0,621453.0,629681.0,...,883748.0,738227.0,1101255.0,1001732.0,1288633.0,1073629.0,1323101.0,1185243.0,1338870.0,1202484.0
6,Connecticut,647895.0,719261.0,677210.0,541732.0,890877.0,569597.0,750241.0,676584.0,682318.0,...,561094.0,816015.0,693826.0,857488.0,997772.0,629428.0,905083.0,634892.0,897572.0,673215.0
7,Delaware,122596.0,109831.0,111252.0,105754.0,152190.0,101656.0,139639.0,108647.0,126054.0,...,137288.0,180068.0,171660.0,200152.0,255459.0,152374.0,242584.0,165484.0,235603.0,185127.0
8,D.C.,137818.0,27873.0,23313.0,130231.0,29009.0,180408.0,27590.0,159407.0,192619.0,...,18073.0,171923.0,21256.0,202970.0,245800.0,17367.0,267070.0,21381.0,282830.0,12723.0
9,Florida,1636000.0,1469531.0,2046951.0,1419475.0,2730350.0,1448816.0,2618885.0,1656701.0,2072698.0,...,2912790.0,2912253.0,3964522.0,3583544.0,4282074.0,4045624.0,4237756.0,4163447.0,4504975.0,4617886.0


Lets see if there are any missing values to deal with.

In [3]:
indDataset.isnull().sum()
#Not at all

State    0
1976D    0
1976R    0
1980R    0
1980D    0
1984R    0
1984D    0
1988R    0
1988D    0
1992D    0
1992R    0
1996D    0
1996R    0
2000R    0
2000D    0
2004R    0
2004D    0
2008D    0
2008R    0
2012D    0
2012R    0
2016D    0
2016R    0
dtype: int64

Compute the percentage margins for each state, out of the total 2 party vote. This is done bc no third-party candidate has won a state in a presidential election since 1968. Additionally, it enables to better understand long-term trends in the partisan lean of the states.

In [4]:
for year in range(1976, 2020, 4):
    totalVotes = indDataset[str(year)+"D"] + indDataset[str(year)+"R"]
    indDataset[str(year)+'D%'] = indDataset[str(year)+"D"]/totalVotes
    indDataset[str(year)+'R%'] = indDataset[str(year)+"R"]/totalVotes

indDataset  

Unnamed: 0,State,1976D,1976R,1980R,1980D,1984R,1984D,1988R,1988D,1992D,...,2000D%,2000R%,2004D%,2004R%,2008D%,2008R%,2012D%,2012R%,2016D%,2016R%
0,Alabama,659170.0,504070.0,654192.0,636730.0,872849.0,551899.0,815576.0,549506.0,690080.0,...,0.423931,0.576069,0.371022,0.628978,0.391091,0.608909,0.387838,0.612162,0.356259,0.643741
1,Alaska,44058.0,71555.0,86112.0,41842.0,138377.0,62007.0,119251.0,72584.0,78294.0,...,0.320631,0.679369,0.367737,0.632263,0.389352,0.610648,0.426847,0.573153,0.416143,0.583857
2,Arizona,295602.0,418642.0,529688.0,246843.0,681416.0,333854.0,702541.0,454029.0,543050.0,...,0.467174,0.532826,0.44725,0.55275,0.456861,0.543139,0.453866,0.546134,0.4811,0.5189
3,Arkansas,499614.0,268753.0,403164.0,398041.0,534774.0,338646.0,466578.0,349237.0,505823.0,...,0.471993,0.528007,0.450643,0.549357,0.398283,0.601717,0.378456,0.621544,0.357149,0.642851
4,California,3742284.0,3882244.0,4524858.0,3083661.0,5467009.0,3922519.0,5054917.0,4702233.0,5121325.0,...,0.56203,0.43797,0.550413,0.449587,0.622784,0.377216,0.618728,0.381272,0.661282,0.338718
5,Colorado,460353.0,584367.0,652264.0,367973.0,821818.0,454974.0,728177.0,621453.0,629681.0,...,0.455141,0.544859,0.476338,0.523662,0.545508,0.454492,0.52748,0.47252,0.526833,0.473167
6,Connecticut,647895.0,719261.0,677210.0,541732.0,890877.0,569597.0,750241.0,676584.0,682318.0,...,0.592557,0.407443,0.552749,0.447251,0.613183,0.386817,0.587726,0.412274,0.571415,0.428585
7,Delaware,122596.0,109831.0,111252.0,105754.0,152190.0,101656.0,139639.0,108647.0,126054.0,...,0.567401,0.432599,0.538315,0.461685,0.626381,0.373619,0.59447,0.40553,0.559986,0.440014
8,D.C.,137818.0,27873.0,23313.0,130231.0,29009.0,180408.0,27590.0,159407.0,192619.0,...,0.904877,0.095123,0.905203,0.094797,0.934008,0.065992,0.925876,0.074124,0.956952,0.043048
9,Florida,1636000.0,1469531.0,2046951.0,1419475.0,2730350.0,1448816.0,2618885.0,1656701.0,2072698.0,...,0.499954,0.500046,0.474763,0.525237,0.514197,0.485803,0.504423,0.495577,0.493812,0.506188


In [5]:
#Computing partisan lean for each state for each election year
for year in range(1976, 2020, 4):
    realPercentage = indDataset.iloc[51][str(year)+'D%']
    print(realPercentage)
    indDataset['Partisan Lean '+str(year)] = indDataset[str(year)+'D%'] - realPercentage


0.510522856723
0.446946585584
0.408303802565
0.460984413294
0.534549190617
0.547352633198
0.50268045035
0.487560132418
0.536888475121
0.51963862252
0.511132908798


Now that we have all the partisan lean metrics computed, lets create an interactive widget to display the partisan lean for each
state from 1976 to 2016. To do this, we will filter the dataset to get rid of the Total Value row. Then 
we will create a json file to contain all of the geometries for the US states.

In [6]:
state_geo = r'us-states.json'
indDataset = indDataset[indDataset.State != 'Total Value']

The function constructMap contains code to create a choropleth map of the partisan lean for each state. It then will
return the map. We can use the interact function along with a slider widget to let the user slide and see how the partisan lean changes from election year to election year.

In [7]:
def constructMap(year):    
    foliumMap = folium.Map(location=[48, -102], zoom_start=3)
    yearCol = 'Partisan Lean ' + str(year)
    legendCol ='Partisan Lean ' + str(year)
    foliumMap.choropleth(geo_path=state_geo, data=indDataset,
             columns=['State', yearCol],
             key_on='feature.properties.name', threshold_scale=[-.10, -.05, 0, .05, .10],
             fill_color='RdBu', fill_opacity=0.7, line_opacity=0.2,
             legend_name=legendCol)
    return foliumMap

In [8]:
slider = widgets.IntSlider(value=1976, min=1976, max=2016, step=4)
interact(constructMap, year=slider)
None

Feel free to play around with this widget and see what trends you notice! Regions of interest include the Northeast
and the Southeast.

In [9]:
indDataset.columns.values

array(['State', '1976D', '1976R', '1980R', '1980D', '1984R', '1984D',
       '1988R', '1988D', '1992D', '1992R', '1996D', '1996R', '2000R',
       '2000D', '2004R', '2004D', '2008D', '2008R', '2012D', '2012R',
       '2016D', '2016R', '1976D%', '1976R%', '1980D%', '1980R%', '1984D%',
       '1984R%', '1988D%', '1988R%', '1992D%', '1992R%', '1996D%',
       '1996R%', '2000D%', '2000R%', '2004D%', '2004R%', '2008D%',
       '2008R%', '2012D%', '2012R%', '2016D%', '2016R%',
       'Partisan Lean 1976', 'Partisan Lean 1980', 'Partisan Lean 1984',
       'Partisan Lean 1988', 'Partisan Lean 1992', 'Partisan Lean 1996',
       'Partisan Lean 2000', 'Partisan Lean 2004', 'Partisan Lean 2008',
       'Partisan Lean 2012', 'Partisan Lean 2016'], dtype=object)

Lets do some linear/logistic regressions on the trends from 2000 to 2016, and calculate the partisan lean of each state that way.

In [10]:
stateNum = 0
averageRSquared = 0.0
for state in indDataset['State']:
    yearX = []
    yearY = []
    i = 1
    for year in range(2000, 2020, 4):
        yearX.append(i)
        yearY.append(indDataset.iloc[stateNum]['Partisan Lean ' + str(year)])
        i += 1
    
    slope, intercept, r_value, p_value, std_err = stats.linregress(yearX, yearY)
    averageRSquared += r_value**2
    stateNum += 1
    
averageRSquared /= 51.0  
print(stateNum)
print("Average R Squared: " + str(averageRSquared))   

51
Average R Squared: 0.46945152595


In [11]:
stateNum = 0
averageRSquared = 0.0

def func(x, a, b):
    return a*np.log(x) + b

for state in indDataset['State']:
    yearX = []
    yearY = []
    i = 1
    for year in range(2000, 2020, 4):
        yearX.append(i)
        yearY.append(indDataset.iloc[stateNum]['Partisan Lean ' + str(year)])
        i += 1
    
    popt, pcov = curve_fit(func, yearX, yearY, maxfev=10000)
    residuals = yearY - func(yearX, *popt)
    ss_res = np.sum(residuals**2)
    
    ss_tot = np.sum((yearY - np.mean(yearY))**2)
    r_squared = 1 - (ss_res / ss_tot)

    averageRSquared += r_squared
    stateNum += 1
    
averageRSquared /= 51.0  
print(stateNum)
print("Average R Squared: " + str(averageRSquared))  

51
Average R Squared: 0.459112478968


In [12]:
#Lets use logarithmic regression bc it seems existing trends will probably not last forever
stateNum = 0
for state in indDataset['State']:
    yearX = []
    yearY = []
    i = 1
    for year in range(2000, 2020, 4):
        yearX.append(i)
        yearY.append(indDataset.iloc[stateNum]['Partisan Lean ' + str(year)])
        i += 1
    
    popt, pcov = curve_fit(func, yearX, yearY, maxfev=10000)
    for year in range(2020, 2064, 4):
        value = func(i, *popt)
        i += 1
        indDataset.set_value(stateNum, 'Partisan Lean ' + str(year), value)
    stateNum += 1    

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [13]:
indDataset.columns.values

array(['State', '1976D', '1976R', '1980R', '1980D', '1984R', '1984D',
       '1988R', '1988D', '1992D', '1992R', '1996D', '1996R', '2000R',
       '2000D', '2004R', '2004D', '2008D', '2008R', '2012D', '2012R',
       '2016D', '2016R', '1976D%', '1976R%', '1980D%', '1980R%', '1984D%',
       '1984R%', '1988D%', '1988R%', '1992D%', '1992R%', '1996D%',
       '1996R%', '2000D%', '2000R%', '2004D%', '2004R%', '2008D%',
       '2008R%', '2012D%', '2012R%', '2016D%', '2016R%',
       'Partisan Lean 1976', 'Partisan Lean 1980', 'Partisan Lean 1984',
       'Partisan Lean 1988', 'Partisan Lean 1992', 'Partisan Lean 1996',
       'Partisan Lean 2000', 'Partisan Lean 2004', 'Partisan Lean 2008',
       'Partisan Lean 2012', 'Partisan Lean 2016', 'Partisan Lean 2020',
       'Partisan Lean 2024', 'Partisan Lean 2028', 'Partisan Lean 2032',
       'Partisan Lean 2036', 'Partisan Lean 2040', 'Partisan Lean 2044',
       'Partisan Lean 2048', 'Partisan Lean 2052', 'Partisan Lean 2056',
       'Parti

In [14]:
#logarithmic trend widget- 1976-2060
slider = widgets.IntSlider(value=1976, min=1976, max=2060, step=4)
interact(constructMap, year=slider)
None

In [15]:
#trend here seems to be relatively stable, aside from the southeast becoming more of a swing region
#and Missippi and indiana starting to trend more liberal. Yet the Upper Midwest still leans somewhat Democratic.

In [16]:
#Lets look at using the linear trend for extrapolation
stateNum = 0
for state in indDataset['State']:
    yearX = []
    yearY = []
    i = 1
    for year in range(2000, 2020, 4):
        yearX.append(i)
        yearY.append(indDataset.iloc[stateNum]['Partisan Lean ' + str(year)])
        i += 1
    
    slope, intercept, r_value, p_value, std_err = stats.linregress(yearX, yearY)
    for year in range(2020, 2064, 4):
        value = (slope * i) + intercept
        i += 1
        indDataset.set_value(stateNum, 'Partisan Lean ' + str(year), value)
    stateNum += 1   

In [17]:
#logarithmic trend widget- 1976-2060
slider = widgets.IntSlider(value=1976, min=1976, max=2060, step=4)
interact(constructMap, year=slider)
None