In [2]:
import json
import requests
from bs4 import BeautifulSoup
import re
from pprint import pprint
import pandas as pd

In [3]:
df = pd.read_csv('Presidential_Elections_Data - 2020.csv')

In [4]:
df

Unnamed: 0,STATE,TOTAL VOTES,Rep_Votes,Rep_Percent,Rep_EV,Dem_Votes,Dem_Percent,Dem_EV,Others_Votes,Others_Percent,YEAR
0,Alabama,2323282,1441170,62.03%,9,849624,36.57%,,32488,1.40%,2020
1,Alaska,359530,189951,52.83%,3,153778,42.77%,,15801,4.39%,2020
2,Arizona,3387326,1661686,49.06%,,1672143,49.36%,11,53497,1.58%,2020
3,Arkansas,1219069,760647,62.40%,6,423932,34.78%,,34490,2.83%,2020
4,California,17500881,6006429,34.32%,,11110250,63.48%,55,384202,2.20%,2020
...,...,...,...,...,...,...,...,...,...,...,...
309,Virginia,2739447,1437490,52.5,13,1217290,44.4,,59398,2.2,2000
310,Washington,2487433,1108864,44.6,,1247652,50.2,11,103002,4.1,2000
311,West Virginia,648124,336475,51.9,5,295497,45.6,,10680,1.6,2000
312,Wisconsin,2598607,1237279,47.6,,1242987,47.8,11,94070,3.6,2000


In [5]:
df = df.replace(to_replace=r'\*$', value='', regex=True)

df['Rep_Percent'] = pd.to_numeric(df['Rep_Percent'].str.rstrip('%'))
df['Dem_Percent'] = pd.to_numeric(df['Dem_Percent'].str.rstrip('%'))

df['Victory_Margin'] = df.apply(lambda row: abs(row['Rep_Percent'] - row['Dem_Percent']), axis=1)
df['Victory_Margin'] = df['Victory_Margin'].round(2)

In [6]:
df

Unnamed: 0,STATE,TOTAL VOTES,Rep_Votes,Rep_Percent,Rep_EV,Dem_Votes,Dem_Percent,Dem_EV,Others_Votes,Others_Percent,YEAR,Victory_Margin
0,Alabama,2323282,1441170,62.03,9,849624,36.57,,32488,1.40%,2020,25.46
1,Alaska,359530,189951,52.83,3,153778,42.77,,15801,4.39%,2020,10.06
2,Arizona,3387326,1661686,49.06,,1672143,49.36,11,53497,1.58%,2020,0.30
3,Arkansas,1219069,760647,62.40,6,423932,34.78,,34490,2.83%,2020,27.62
4,California,17500881,6006429,34.32,,11110250,63.48,55,384202,2.20%,2020,29.16
...,...,...,...,...,...,...,...,...,...,...,...,...
309,Virginia,2739447,1437490,52.50,13,1217290,44.40,,59398,2.2,2000,8.10
310,Washington,2487433,1108864,44.60,,1247652,50.20,11,103002,4.1,2000,5.60
311,West Virginia,648124,336475,51.90,5,295497,45.60,,10680,1.6,2000,6.30
312,Wisconsin,2598607,1237279,47.60,,1242987,47.80,11,94070,3.6,2000,0.20


In [7]:
df.to_csv('Presidential_Elections_With_Victory_Margins.csv', index=False)

In [8]:
def calculate_weighted_score(row):
    if row['YEAR'] == 2020:
        return row['Victory_Margin'] * 50
    elif row['YEAR'] == 2016:
        return row['Victory_Margin'] * 25
    elif row['YEAR'] == 2012:
        return row['Victory_Margin'] * 12.5
    elif row['YEAR'] == 2008:
        return row['Victory_Margin'] * 6.25
    elif row['YEAR'] == 2004:
        return row['Victory_Margin'] * 3.125
    elif row['YEAR'] == 2000:
        return row['Victory_Margin'] * 1.5625

# Apply the function to add the 'Weighted_Score' column
df['Weighted_Score'] = df.apply(calculate_weighted_score, axis=1).round(2)

df.head(20)

Unnamed: 0,STATE,TOTAL VOTES,Rep_Votes,Rep_Percent,Rep_EV,Dem_Votes,Dem_Percent,Dem_EV,Others_Votes,Others_Percent,YEAR,Victory_Margin,Weighted_Score
0,Alabama,2323282,1441170,62.03,9.0,849624,36.57,,32488,1.40%,2020,25.46,1273.0
1,Alaska,359530,189951,52.83,3.0,153778,42.77,,15801,4.39%,2020,10.06,503.0
2,Arizona,3387326,1661686,49.06,,1672143,49.36,11.0,53497,1.58%,2020,0.3,15.0
3,Arkansas,1219069,760647,62.4,6.0,423932,34.78,,34490,2.83%,2020,27.62,1381.0
4,California,17500881,6006429,34.32,,11110250,63.48,55.0,384202,2.20%,2020,29.16,1458.0
5,Colorado,3256952,1364607,41.9,,1804352,55.4,9.0,87993,2.70%,2020,13.5,675.0
6,Connecticut,1824280,715291,39.21,,1080680,59.24,7.0,28309,1.55%,2020,20.03,1001.5
7,Delaware,504010,200603,39.8,,296268,58.78,3.0,7139,1.42%,2020,18.98,949.0
8,District of Columbia,344356,18586,5.4,,317323,92.15,3.0,8447,2.45%,2020,86.75,4337.5
9,Florida,11067456,5668731,51.22,29.0,5297045,47.86,,101680,0.92%,2020,3.36,168.0


In [9]:
df_sorted = df.sort_values(by='Weighted_Score', ascending=True)

df_sorted

Unnamed: 0,STATE,TOTAL VOTES,Rep_Votes,Rep_Percent,Rep_EV,Dem_Votes,Dem_Percent,Dem_EV,Others_Votes,Others_Percent,YEAR,Victory_Margin,Weighted_Score
272,Florida,5963110,2912790,48.80,25,2912253,48.80,,97488,1.6,2000,0.00,0.00
294,New Mexico,598605,286417,47.80,,286783,47.90,5,21251,3.6,2000,0.10,0.16
312,Wisconsin,2598607,1237279,47.60,,1242987,47.80,11,94070,3.6,2000,0.20,0.31
278,Iowa,1315563,634373,48.20,,638517,48.50,7,29374,2.2,2000,0.30,0.47
186,Missouri,2925205,1445814,49.40,11,1441911,49.30,,,,2008,0.10,0.62
...,...,...,...,...,...,...,...,...,...,...,...,...,...
53,West Virginia,794652,545382,68.63,5,235984,29.70,,13286,1.67%,2020,38.93,1946.50
55,Wyoming,276765,193559,69.94,3,73491,26.55,,9715,3.51%,2020,43.39,2169.50
64,Dist. of Col.,311268,12723,4.10,,282830,90.90,3,4906,1.60%,2016,86.80,2170.00
32,CD-3,294831,222179,75.36,1,65854,22.34,,6798,2.31%,2020,53.02,2651.00


In [10]:
df_sorted.to_csv('Presidential_Elections_With_Weighted_Scores.csv', index=False)

In [11]:
# Determine the winning party for each state and year based on the Rep_Votes and Dem_Votes columns
df['Winner'] = df.apply(lambda row: 'Republicans' if row['Rep_Votes'] > row['Dem_Votes'] else 'Democrats', axis=1)

# Initialize the Flip Points column to 0.0 for all rows
df['Flip Points'] = 0.0

# Define the pairs of years to compare and the respective Flip Points values for each comparison
years_to_compare = [(2020, 2016, 50), (2016, 2012, 25), (2012, 2008, 12.5), (2008, 2004, 6.25), (2004, 2000, 3.125)]

# For each pair of years, check if the state has flipped and assign Flip Points
for year_current, year_previous, points in years_to_compare:
    # Filter the dataframe for the current and previous years
    df_current = df[df['YEAR'] == year_current]
    df_previous = df[df['YEAR'] == year_previous]
    
    # Merge the current and previous dataframes on STATE to compare the winners
    merged = df_current[['STATE', 'Winner']].merge(df_previous[['STATE', 'Winner']], on='STATE', suffixes=('_current', '_previous'))
    
    # Determine if a state has flipped by comparing the winners of the current and previous years
    merged['Flipped'] = merged['Winner_current'] != merged['Winner_previous']
    
    # For each state that has flipped, assign the appropriate Flip Points in the original dataframe
    for state, flipped in zip(merged['STATE'], merged['Flipped']):
        if flipped:
            df.loc[(df['YEAR'] == year_current) & (df['STATE'] == state), 'Flip Points'] = points

# Calculate the sum of Flip Points for each state
Flip_Score = df.groupby('STATE')['Flip Points'].sum().reset_index()

# Sort the states by the sum of their Flip Points in descending order
Flip_Score_sorted = Flip_Score.sort_values(by='Flip Points', ascending=False)

# Display the sorted Flip Score dataframe
Flip_Score_sorted

Unnamed: 0,STATE,Flip Points
4,CD-1,100.0
5,CD-2,100.0
38,North Dakota,78.125
42,Pennsylvania,75.0
53,Wisconsin,75.0
26,Michigan,75.0
46,Tennessee,65.625
2,Arizona,59.375
49,Vermont,56.25
44,South Carolina,56.25


In [12]:
Flip_Score_sorted.to_csv('Flip_Score_Sorted.csv', index=False)

In [13]:
flip_score_top_states = pd.read_csv('Flip_Score_Top_12_States.csv')

In [14]:
flip_score_top_states

Unnamed: 0,Rank,State,Flip Score
0,1,North Dakota,78.125
1,2,Pennsylvania,75.0
2,2,Wisconsin,75.0
3,2,Michigan,75.0
4,5,Tennessee,65.625
5,6,Arizona,59.375
6,7,Vermont,56.25
7,7,South Carolina,56.25
8,9,Georgia,50.0
9,9,Connecticut,50.0


In [16]:
flip_score_top_states['Flip Score'] = flip_score_top_states['Flip Score'].round(2)

flip_score_top_states

Unnamed: 0,Rank,State,Flip Score
0,1,North Dakota,78.12
1,2,Pennsylvania,75.0
2,2,Wisconsin,75.0
3,2,Michigan,75.0
4,5,Tennessee,65.62
5,6,Arizona,59.38
6,7,Vermont,56.25
7,7,South Carolina,56.25
8,9,Georgia,50.0
9,9,Connecticut,50.0


In [17]:
import altair as alt

In [23]:
flip_score_df_sorted = flip_score_top_states.sort_values(by='Flip Score', ascending=False)

ordered_states = flip_score_df_sorted['State'].tolist()

chart = alt.Chart(flip_score_df_sorted).mark_bar().encode(
    x=alt.X('State:N', sort=ordered_states),
    y='Flip Score:Q'
).properties(
    title='Flip Score by State',
    width=780  # Set the width to 780 pixels
)

chart.display()