<a href="https://colab.research.google.com/github/sasukewong/US-Electrion-Poll/blob/main/US_Election_Poll_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [57]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
pd.set_option('display.max_columns', None)
import warnings

# Ignore warnings
warnings.filterwarnings("ignore")
swing_state = ["Arizona", "Georgia", "Michigan", "Nevada", "North Carolina", "Pennsylvania", "Wisconsin"]
keep = ['state', 'end_date', 'sample_size', 'party', 'answer', 'pct']

## State and State Code

In [58]:
state_code = pd.read_csv("https://raw.githubusercontent.com/jasonong/List-of-US-States/master/states.csv")
state_code.columns=['state','code']

## Electoral College Votes

In [59]:
import requests
from bs4 import BeautifulSoup
import csv

url = "https://www.archives.gov/electoral-college/allocation"
response = requests.get(url)
soup = BeautifulSoup(response.content, "html.parser")
table = soup.find("table")
data = []
for row in table.find_all("tr")[0:]:
    cells = row.find_all("td")
    state1 = cells[0].text.strip()
    state2 = cells[1].text.strip()
    state3 = cells[2].text.strip()
    data.append(state1)
    data.append(state2)
    data.append(state3)

In [60]:
votes = pd.DataFrame(data, columns=['c'])
votes['state'] = votes['c'].str.split(" - ", expand=True)[0]
votes['votes'] = votes['c'].str.split(" - ", expand=True)[1].str.extract('(\d+)')
votes.drop(columns="c",inplace= True)

## Poll data cleansing function

In [61]:
def poll_data_cleasing(df):
  df['actual_number'] = round(df['sample_size'] * df['pct']/100)
  df['answer'][df['party']=='DEM'] = democrat_cadidate
  df['answer'][df['party']=='REP'] = republican_cadidate
  df = df[df.party.isin(['DEM','REP'])]
  df = df.groupby(['state','party','answer'])["actual_number"].sum()
  df = df.reset_index()
  df = df.merge(state_code,'left','state')
  df_temp = pd.get_dummies(df['party'])
  df = df.join(df_temp)
  df['DEM_votes'] = df['DEM'] * df['actual_number']
  df['REP_votes'] = df['REP'] * df['actual_number']
  df = df.groupby(['state','code'])[["DEM_votes","REP_votes"]].sum().reset_index()

  df['percent_rep'] = df['REP_votes']*100/(df['REP_votes']+df['DEM_votes'])
  df['percent_dem'] = df['DEM_votes']*100/(df['REP_votes']+df['DEM_votes'])

  temp = pd.DataFrame([["District of Columbia","DC",10.0,90.0,10.0,90.0]],columns=df.columns)
  df = pd.concat([df,temp])
  return df

## Since the polls are always underestimate Republican's votes, so will do a weighting based on 2016, 2020 polls results and real results


In [62]:
poll_history = pd.read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/refs/heads/master/polls/pres_pollaverages_1968-2016.csv")

### 2020 Election

In [63]:
election_results = pd.read_csv("https://raw.githubusercontent.com/sasukewong/US-Electrion-Poll/refs/heads/main/1976-2020-president.csv")
election_results_2020 = election_results[election_results['year']==2020]
election_results_2020 = election_results_2020[['state','party_simplified','candidatevotes']]
election_results_2020.columns = ['state','party','actual_number']
election_results_2020['party']=='DEMOCRAT'
election_results_2020 = election_results_2020[election_results_2020.party.isin(['DEMOCRAT','REPUBLICAN'])]
election_results_2020_temp = pd.get_dummies(election_results_2020['party'])
election_results_2020 = election_results_2020.join(election_results_2020_temp)
election_results_2020['DEM_votes'] = election_results_2020['DEMOCRAT'] * election_results_2020['actual_number']
election_results_2020['REP_votes'] = election_results_2020['REPUBLICAN'] * election_results_2020['actual_number']
election_results_2020 = election_results_2020.groupby(['state'])[["DEM_votes","REP_votes"]].sum().reset_index()
election_results_2020['percent_rep'] = election_results_2020['REP_votes']*100/(election_results_2020['REP_votes']+election_results_2020['DEM_votes'])
election_results_2020['percent_dem'] = election_results_2020['DEM_votes']*100/(election_results_2020['REP_votes']+election_results_2020['DEM_votes'])
election_results_2020 = election_results_2020[election_results_2020['state'].isin([item.upper() for item in swing_state])]
election_results_2020 = election_results_2020[['state','percent_rep']]
election_results_2020['state'] = swing_state
election_results_2020

Unnamed: 0,state,percent_rep
2,Arizona,49.843168
10,Georgia,49.88067
22,Michigan,48.586441
28,Nevada,48.776883
33,North Carolina,50.684183
38,Pennsylvania,49.410795
49,Wisconsin,49.680937


In [64]:
poll_2020 = pd.read_csv("https://raw.githubusercontent.com/sasukewong/US-Electrion-Poll/refs/heads/main/president_polls_historical.csv")
poll_2020[keep]
republican_cadidate = poll_2020['answer'][poll_2020['party']=='REP'].values[0]
democrat_cadidate = poll_2020['answer'][poll_2020['party']=='DEM'].values[0]
poll_2020 = poll_data_cleasing(poll_2020)
poll_2020 = poll_2020[poll_2020['state'].isin(swing_state)][['state','percent_rep']]
poll_2020.columns = ['state','percent_rep_poll']

In [65]:
weighting = poll_2020.merge(election_results_2020,'left','state')
weighting['rep_weight'] = (weighting['percent_rep'] - weighting['percent_rep_poll'])
swing_state_weighting = weighting[['state','rep_weight']]

## ================================================================================

## 2024 Cycle Polls

In [66]:
president_polls = pd.read_csv("https://raw.githubusercontent.com/sasukewong/US-Electrion-Poll/refs/heads/main/president_polls.csv")
president_polls["end_date"] = pd.to_datetime(president_polls["end_date"])

In [67]:
df = president_polls[keep][(president_polls["end_date"]>=pd.Timestamp(2024,7,21))]
republican_cadidate = df['answer'][df['party']=='REP'].values[0]
democrat_cadidate = df['answer'][df['party']=='DEM'].values[0]
df = poll_data_cleasing(df)
df = df.merge(swing_state_weighting,'left','state').fillna(0)
df['percent_rep'] = df['percent_rep'] + df['rep_weight']
df['percent_dem'] = df['percent_dem'] - df['rep_weight']

In [68]:
df_no_new_poll = state_code.merge(df,'outer','state')
states_without_new_poll = df_no_new_poll[df_no_new_poll['code_y'].isnull()]['state']

In [69]:
df_no_new_poll = president_polls[keep][(president_polls["end_date"]>=pd.Timestamp(2024,1,1))&(president_polls["state"].isin(states_without_new_poll))]
republican_cadidate = df_no_new_poll['answer'][df_no_new_poll['party']=='REP'].values[0]
democrat_cadidate = df_no_new_poll['answer'][df_no_new_poll['party']=='DEM'].values[0]
df_no_new_poll = poll_data_cleasing(df_no_new_poll)
df_no_new_poll = df_no_new_poll.merge(swing_state_weighting,'left','state').fillna(0)
df_no_new_poll['percent_rep'] = df_no_new_poll['percent_rep'] + df_no_new_poll['rep_weight']
df_no_new_poll['percent_dem'] = df_no_new_poll['percent_dem'] - df_no_new_poll['rep_weight']
df = pd.concat([df,df_no_new_poll]).sort_values(by='state').reset_index(drop=True)

In [70]:
winner = df.merge(votes,'left','state').drop_duplicates()
winner['winning_party'] = ''
winner['winning_party'][(winner['percent_dem']>winner['percent_rep'])] = democrat_cadidate
winner['winning_party'][(winner['percent_rep']>winner['percent_dem'])] = republican_cadidate
winner['votes'] = winner['votes'].astype(int)
winner = winner.groupby("winning_party")['votes'].sum().reset_index()
winner

Unnamed: 0,winning_party,votes
0,Biden,226
1,Trump,312


In [71]:
winner = df.merge(votes,'left','state').drop_duplicates()
winner['winning_party'] = ''
winner['winning_party'][(winner['percent_dem']>winner['percent_rep'])] = democrat_cadidate
winner['winning_party'][(winner['percent_rep']>winner['percent_dem'])] = republican_cadidate
winner['votes'] = winner['votes'].astype(int)
winner
winner['rep_solid'] = 0
winner['dem_solid'] = 0
winner['rep_won'] = 0
winner['dem_won'] = 0
winner['rep_leaning'] = 0
winner['dem_leaning'] = 0

winner['rep_solid'][winner['percent_rep']>=55] = 1
winner['dem_solid'][winner['percent_rep']<=45] = 1

winner['rep_leaning'][(winner['percent_rep']<=51)&(winner['percent_rep']>50)] = 1
winner['dem_leaning'][(winner['percent_rep']<50)&(winner['percent_rep']>=49)] = 1

winner['rep_won'][(winner['percent_rep']<55)&(winner['percent_rep']>51)] = 1
winner['dem_won'][(winner['percent_rep']<49)&(winner['percent_rep']>45)] = 1

dem_solid = [winner[winner['dem_solid']==1]['votes'].sum()]
rep_solid = [winner[winner['rep_solid']==1]['votes'].sum()]
rep_leaning = [winner[winner['rep_leaning']==1]['votes'].sum()]
dem_leaning = [winner[winner['dem_leaning']==1]['votes'].sum()]
rep_won = [winner[winner['rep_won']==1]['votes'].sum()]
dem_won = [winner[winner['dem_won']==1]['votes'].sum()]



In [72]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import pandas as pd

# Initialize figure with subplots
fig = make_subplots(
    rows=2, cols=2,
    column_widths=[0.6, 0.4],  # Adjust the width of the columns
    row_heights=[0.5, 0.5],    # Adjust the height of the rows
    specs=[
        [{"type": "choropleth", "rowspan": 2, "colspan": 1}, {"type": "bar"}],
        [None, {"type": "pie"}],
    ]
)

# Add Choropleth
fig.add_trace(
    go.Choropleth(
        locations=df["code"],
        z=df["percent_rep"],
        locationmode='USA-states',
        text=df["state"],
        colorscale=[[0, '#000E89'],
         [0.42, 'royalblue'], [0.45, 'lightblue']
        ,[0.5, 'white'],
        [0.55, '#eb757a'],[0.58, '#e8484e'] ,[1.0, '#e81b23']],
        name="",
        showlegend=False,
        zmin=45,
        zmax=55
    ),
    row=1, col=1
)

# Update geo subplot properties
fig.update_layout(
    geo={
        "scope": "usa",
        "projection_type": "albers usa",
        "showcoastlines": True,
        "showland": True,
        "lakecolor": None,
        "landcolor": "white",
        "showocean": False,
    }
)

# Add Winner of the election bar chart
fig.add_trace(go.Bar(
    x=dem_solid,
    # y=categories,
    name='Democrat Solid',
    orientation='h',
    marker_color='#000E89'
),row=1, col=2)
fig.add_trace(go.Bar(
    x=dem_won,
    # y=categories,
    name='Democrat Won',
    orientation='h',
    marker_color='royalblue'
),row=1, col=2)
fig.add_trace(go.Bar(
    x=dem_leaning,
    # y=categories,
    name='Democrat Leaning',
    orientation='h',
    marker_color='lightblue'
),row=1, col=2)
fig.add_trace(go.Bar(
    x=rep_leaning,
    # y=categories,
    name='Republican Leaning',
    orientation='h',
    marker_color='#eb757a'
),row=1, col=2)
fig.add_trace(go.Bar(
    x=rep_won,
    # y=categories,
    name='Republican Won',
    orientation='h',
    marker_color='#e8484e'
),row=1, col=2)
fig.add_trace(go.Bar(
    x=rep_solid,
    # y=categories,
    name='Republican Solid',
    orientation='h',
    marker_color='#e81b23'
),row=1, col=2)

# Add result for each state as a pie chart
for idx, state in enumerate(df['state']):
    fig.add_trace(
        go.Pie(
            labels=["Republican", "Democrat"],
            values=[df.loc[df['state'] == state, 'percent_rep'].values[0],
                    df.loc[df['state'] == state, 'percent_dem'].values[0]],
            marker_colors=["e81b23", "000E89"],
            showlegend=False,
            # name=f'Result for {state}'
        ),
        row=2, col=2
    )

# Update layout with buttons for each state
fig.update_layout(
    updatemenus=[
        dict(
            active=0,
            buttons=[
                dict(label=state, method="update",
                     args=[{"visible": [True, True, True, True, True, True, True] + [idx == i for i in range(len(df))]}
                     ]) for idx, state in enumerate(df['state'])
            ],
            x=0.82,
            xanchor="center",
            y=0.50,
            yanchor="top"
        )
    ]
)

# Rotate x-axis labels
fig.update_layout(
    barmode='stack',
    showlegend=False,
    template="plotly_dark",
    margin=dict(r=10, t=100, b=40, l=60),
    annotations=[
        dict(
            text="By Yeung Sum Wong",
            showarrow=False,
            xref="paper",
            yref="paper",
            x=0,
            y=0),
        dict(
            text="US Election by State",
            showarrow=False,
            xref="paper",
            yref="paper",
            x=0.25,
            y=0.90,
            font=dict(size=20, color="white"),
        ),
        dict(
            text="US Election 270 to Win",
            showarrow=False,
            xref="paper",
            yref="paper",
            x=0.88,
            y=1.05,
            font=dict(size=20, color="white"),
        ),
        dict(
            text=f"Democrat Got: {winner.groupby('winning_party')['votes'].sum().reset_index()['votes'][0]}",
            showarrow=False,
            xref="paper",
            yref="paper",
            x=0.79,
            y=0.95,
            font=dict(size=20, color="white"),
        ),
        dict(
            text=f"Republican Got: {winner.groupby('winning_party')['votes'].sum().reset_index()['votes'][1]}",
            showarrow=False,
            xref="paper",
            yref="paper",
            x=0.93,
            y=0.95,
            font=dict(size=20, color="white"),
        )
    ],
    width=1800,
    height=900
)
fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

# Create the stacked horizontal bar chart
fig.add_shape(
    type="line",
    x0=270, x1=270, y0=-1, y1=1,
    line=dict(
        color="gray", width=2, dash="dash"
    ),
    row=1, col=2
)

fig.show()
