In [1]:
# import all of the libraries
import pandas as pd
import numpy as np
import requests
import plotly.graph_objs as go
import plotly.offline as py
import plotly.express as px
from plotly.subplots import make_subplots
import re
#import dash
#import dash_core_components as dcc
#import dash_html_components as html
#import chart_studio.plotly as py
#import matplotlib.pyplot as plt
#import seaborn as sns
#%matplotlib inline
#import folium
#from folium import plugins
#import ipywidgets
#import geocoder
#from branca.element import Figure

In [2]:
# https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/42MVDX
df = pd.read_csv('data/1976-2016-president.csv')
#df.head()

In [3]:
# add column for the percentage of votes that each candidate get 
df['pct_vote'] = (df['candidatevotes'] / df['totalvotes']).astype('float64')

# democratic-farmer-labor is a special case for MN
df.replace({'party': 'democratic-farmer-labor'}, 'democrat', inplace = True)

# replace all minor parties with 'other'
df.loc[~df['party'].isin(['republican','democrat']),'party'] = 'other'

# change Candidate Name formatting from LastName, FirstName to FirstName LastName
df['candidate'] = df['candidate'].str.split(', ').str[::-1].str.join(' ')

# drop unneccessary columns
df.drop(columns = ['version','notes'], inplace = True)

# drop duplicates error rows from dataset (year/state/candidate)
df.drop_duplicates(subset = ['year','state','candidate'], inplace = True)

In [4]:
# load electoral results csv
results_ec = pd.read_csv('data/electoral_college.csv')

# extract names for easier matching in main dataset
results_ec['candidate'] = results_ec['candidate'].str.extract(r'((\w+ (\w. )?\w+)|Other)')

# fix a couple of names to match main Dataset
results_ec.replace({'candidate': {'John F. Kerry': 'John Kerry','George Bush': 'George H.W. Bush', \
                                  'Gerald R. Ford': 'Gerald Ford', 'Michael S. Dukakis': 'Michael Dukakis',\
                                  'Walter F. Mondale': 'Walter Mondale','William J. Clinton':'Bill Clinton',\
                                 'Albert Gore':'Al Gore', 'Barack Obama':'Barack H. Obama',
                                 'Donald Trump':'Donald J. Trump'}}, inplace = True)



In [5]:
# add electoral votes results into main dataframe
df = df.merge(results_ec,on = ['year','state','candidate'], how = 'left')
df['electoral_votes'].fillna(0,inplace = True)

# populate total_electoral_votes
df["total_electoral_votes"] = df.groupby(["year","state"])['total_electoral_votes'].transform(lambda x: x.fillna(x.max()))

#df.head()

In [6]:
# load state population csv
state_populations = pd.read_csv('data/state_populations.csv')


In [7]:
# add state populations into main dataframe
df = df.merge(state_populations, on = ['year','state'], how = 'left')

In [8]:
df.head()

Unnamed: 0,year,state,state_po,state_fips,state_cen,state_ic,office,candidate,party,writein,candidatevotes,totalvotes,pct_vote,total_electoral_votes,electoral_votes,pop
0,1976,Alabama,AL,1,63,41,US President,Jimmy Carter,democrat,False,659170,1182850,0.557273,9.0,9.0,3713998.8
1,1976,Alabama,AL,1,63,41,US President,Gerald Ford,republican,False,504070,1182850,0.426149,9.0,0.0,3713998.8
2,1976,Alabama,AL,1,63,41,US President,Lester Maddox,other,False,9198,1182850,0.007776,9.0,0.0,3713998.8
3,1976,Alabama,AL,1,63,41,US President,"Benjamin """"Ben"""" Bubar",other,False,6669,1182850,0.005638,9.0,0.0,3713998.8
4,1976,Alabama,AL,1,63,41,US President,Gus Hall,other,False,1954,1182850,0.001652,9.0,0.0,3713998.8


In [9]:
# retrieve corresponding id for candidate with max votes by year and state as well as create pct_vote of democrats for plotting 
winner = df.groupby(['year','state_po'], as_index = False).agg({'candidatevotes':'idxmax',\
                                                              'pct_vote': lambda g: sum(df.loc[g.index].query("party == 'democrat'").pct_vote)})


# add column for corresponding party for max votes for candidate
winner['party'] = [df.loc[id,'party'] for id in winner['candidatevotes']]

# map parties to colors for plotting later on. 
winner['party_id'] = winner['party'].map({'democrat': 1.0, 'republican': 0.0}) 
winner.head()

Unnamed: 0,year,state_po,candidatevotes,pct_vote,party,party_id
0,1976,AK,7,0.356531,republican,0.0
1,1976,AL,0,0.557273,democrat,1.0
2,1976,AR,19,0.649617,democrat,1.0
3,1976,AZ,11,0.398,republican,0.0
4,1976,CA,23,0.479548,republican,0.0


In [10]:
def new_electoral_count(df_groupby):
    new_electoral = pd.DataFrame()
    for group in df_groupby:
        temp_df = pd.DataFrame(group[1])
        state_electoral_votes = max(temp_df['total_electoral_votes'])
        temp_df['new_electoral_vote'] = np.where(temp_df['pct_vote'] >.05, \
                                                 (temp_df['pct_vote']*temp_df['total_electoral_votes']).astype('int'), 0)
        electoral_vote_check = sum(temp_df['new_electoral_vote'])
        diff = state_electoral_votes - electoral_vote_check
        index_list = list(temp_df[temp_df['new_electoral_vote'] >0].index.values)
        group_size = len(index_list)
        i = 0
        while i < diff:
            temp_df.at[index_list[i % group_size],'new_electoral_vote'] += 1 
            i += 1
        new_electoral = new_electoral.append(temp_df)
    return new_electoral

test_df = new_electoral_count(df.groupby(['year','state_po']))


In [11]:
test_df.groupby(['year']).agg({'new_electoral_vote':'sum','electoral_votes':'sum'})

Unnamed: 0_level_0,new_electoral_vote,electoral_votes
year,Unnamed: 1_level_1,Unnamed: 2_level_1
1976,538,537.0
1980,538,538.0
1984,538,538.0
1988,538,537.0
1992,538,538.0
1996,538,538.0
2000,538,537.0
2004,538,537.0
2008,538,538.0
2012,538,538.0


In [None]:
# add column to calculate electoral vote as percentage of totalvotes
# df['new_electoral_vote'] = df['pct_vote']*df['total_electoral_votes']
# df['new_electoral_vote'].fillna(0, inplace = True)
# df['new_electoral_vote'] = df['new_electoral_vote'].round().astype('int')

In [13]:
df['electoral_vote_value_pop'] = df['pop']/df['total_electoral_votes']
df['electoral_vote_value_voters'] = df['totalvotes']/df['total_electoral_votes']


In [None]:
# electoral college winner current method
gp = results_ec.groupby(['year','candidate'], as_index = False).agg({'electoral_votes':'sum'})
electoral_winner = gp.groupby('year').agg({'electoral_votes':['idxmax','max']})
electoral_winner['candidate'] = [gp.loc[id,'candidate'] for id in electoral_winner['electoral_votes']['idxmax']]
electoral_winner.columns = [''.join(t) for t in electoral_winner.columns]

In [None]:
# determine national popular vote winner
gp = df.groupby(['year','candidate'], as_index = False).agg({'candidatevotes':'sum'})
popular_winner = gp.groupby(['year']).agg({'candidatevotes':['idxmax','max']})
popular_winner['candidate'] = [gp.loc[id,'candidate'] for id in popular_winner['candidatevotes']['idxmax']]
popular_winner.columns = [''.join(t) for t in popular_winner.columns]

In [None]:
# electoral college winner by popular vote
gp = df.groupby(['year','candidate'], as_index = False).agg({'new_electoral_vote':'sum'})
new_electoral_winner = gp.groupby('year').agg({'new_electoral_vote':['idxmax','max']})
new_electoral_winner['candidate'] = [gp.loc[id,'candidate'] for id in new_electoral_winner['new_electoral_vote']['idxmax']]
new_electoral_winner.columns = [''.join(t) for t in new_electoral_winner.columns]

In [None]:
# create winner matrix
winner_matrix = electoral_winner[['electoral_votesmax','candidate']].join(popular_winner[['candidatevotesmax','candidate']], lsuffix = "_electoral", rsuffix = "_popular")
winner_matrix = winner_matrix.join(new_electoral_winner[['new_electoral_votemax','candidate']])



In [None]:
# clean-up winning matrix
winner_matrix.columns = ['electoral_votes','electoral_candidate','popular_votes','popular_candidate','new_electoral_votes','new_electoral_candidate']
winner_matrix.reset_index(inplace = True)

In [None]:
#https://support.sisense.com/hc/en-us/community/posts/360038301533-Plotly-Choropleth-With-Slider-Map-Charts-Over-Time-
min_year = 1976

# your color-scale
scl = [[0.0, 'red'],[.50, '#5700c9'], [1,'#0515a8']]
#scl = [[0.0, 'red'], [1,'blue']]
data_slider = []
for year in winner['year'].unique():
    df_segmented =  winner[(winner['year']== year)]
    data_each_yr = dict(
                        type='choropleth',
                        locations = df_segmented['state_po'],
                        z=df_segmented['pct_vote'].astype(float),
                        locationmode='USA-states',
                        colorscale = scl,
                        colorbar= {'title':'Party'})

    data_slider.append(data_each_yr)

steps = []

for i in range(len(data_slider)):
    step = dict(method='restyle',
                args=['visible', [False] * len(data_slider)],
                label='Year {}'.format(i*4 + min_year))
    step['args'][1][i] = True
    steps.append(step)

sliders = [dict(active=0, pad={"t": 1}, steps=steps)]

layout = dict(title ='Presidential Wins', geo=dict(scope='usa',
                       projection={'type': 'albers usa'}),
              sliders=sliders)

fig = dict(data=data_slider, layout=layout)
py.plot(fig)

In [None]:
min_year = 1976
fig = make_subplots(rows=2, cols=2, specs=[[{"type": "pie"},{"type": "choropleth","rowspan":2}], 
                                           [{"type": "indicator"},None ]],
                   subplot_titles=("National Split", "% Democratic Vote"), )

scl = [[0.0, 'red'],[.50, '#5700c9'], [1,'#0515a8']]


data_slider = []
for year in winner['year'].unique():
    
    # Choropleth for each year
    df_segmented =  winner[winner['year']== year]
    fig.append_trace(
        go.Choropleth(
            locations = df_segmented['state_po'],
            z=df_segmented['pct_vote'].astype(float),
            locationmode='USA-states',
            colorscale = scl,
            colorbar= {'title':'Party'}, text = df_segmented['party'], 
            hoverinfo = ["location", "z", "text"]),row = 1, col = 2)

    # Pie Chart for each year
    full_election_year = df[df['year'] == year].groupby('party',as_index = False).agg({'candidatevotes':'sum'}).sort_values(by = 'party')
    fig.append_trace(go.Pie(values = full_election_year['candidatevotes'], 
           labels = full_election_year['party'], marker = {'colors':['blue','green','red']}), row = 1, col = 1)
    
    # Card
    fig.append_trace(
        go.Indicator(
            mode = "number",
            value = year,
#                 title = {"text": "Accounts<br><span style='font-size:0.8em;color:gray'>Subtitle</span><br><span style='font-size:0.8em;color:gray'>Subsubtitle</span>"},
#                 delta = {'reference': 400, 'relative': True},
            title = {"text": f"Popular Vote Winner <br><span style='font-size:0.8em;color:gray'>{national_winner.loc[year,'candidate']} </span>"}), row =2, col = 1)
    
steps = []
for i in range(0,len(fig.data),3):
    step = dict(method='restyle',
                args=['visible', [False] * len(fig.data)],
                label='Year {}'.format(int(i*(4/3) + min_year)))
    step["args"][1][i:i+3] = [True, True, True]
    steps.append(step)

sliders = [dict(active=0, pad={"t": 1}, steps=steps)]

fig.update_layout(sliders=sliders, title ='Presidential Wins')
fig.update_geos(scope='usa', projection={'type': 'albers usa'})
fig.update_layout(legend= {'yanchor':"top",  'y':0.99, 'xanchor':"left",'x':0.01})
#fig.update_layout(showlegend = False)
#fig.update_layout(width=1000,height=800)
py.plot(fig)

In [None]:
scl = [[0.0, 'red'],[.50, '#5700c9'], [1,'#0515a8']]
fig = px.choropleth(winner,locations='state_po', locationmode="USA-states", color='pct_vote', 
               scope="usa", animation_frame ="year", color_continuous_scale = scl, title = "Percentage of Votes for Democrats")

py.plot(fig, filename = 'choropleth_election_map.html')

In [None]:
df.groupby(['year']).agg({'new_electoral_vote':'sum'})

In [None]:
winner_matrix.columns = ['electoral_votes','electoral_candidate','popular_votes','popular_candidate','new_electoral_votes','new_electoral_candidate']

In [None]:
winner_matrix

In [None]:
compare_electoral_college = df.groupby(['year','candidate'], as_index = False).agg({'electoral_votes':'sum','new_electoral_vote':'sum'})
#fig = px.scatter(compare_electoral_college,x = 'year', y = ['electoral_votes','new_electoral_vote'])

fig = go.Figure()

fig.add_trace(go.Scatter(mode = 'markers',x =compare_electoral_college['year'], y = compare_electoral_college['electoral_votes'], marker_symbol = 'circle', name = 'Traditional Electoral College'))
fig.add_trace(go.Scatter(mode = 'markers',x =compare_electoral_college['year'], y = compare_electoral_college['new_electoral_vote'], marker_symbol = 'diamond', name = 'Popular Vote Electoral College'))

x = list(range(1974,2020,4))
y = [270]*len(x)
fig.add_trace(go.Scatter(x=x, y=y, mode='lines', name = '270 to win'))


In [21]:
px.scatter(df, x ='state' , y = 'electoral_vote_value_pop', color = 'year')

In [22]:
px.scatter(df, x ='state' , y = 'electoral_vote_value_voters', color = 'year')