# 2024 Election Data Playground

In [1]:
import geopandas as gpd
import numpy as np
from tqdm import tqdm 
import pandas as pd

In [None]:
#load presidential results
pres_results = gpd.read_file("precincts-with-results.topojson")
pres_results["centroid"] = pres_results["geometry"].centroid
pres_results["district"] = None

#load districts data
file_name = "/Users/ryandonaldson/ryan_sandbox/election_data_2024/national_cong118_boundary/national_cong118_boundary/national_cong118_boundary.shp"
districts = gpd.read_file(file_name)


#sort the precints into districts
for i, row in tqdm(districts.iterrows(), total=len(districts)):
    precincts_in_district_bool = row["geometry"].contains(pres_results["centroid"])
    pres_results.loc[precincts_in_district_bool, "district"] = row["CONG118"]


100%|██████████| 435/435 [02:43<00:00,  2.66it/s]


In [3]:
#load house and senate results
file_name = "/Users/ryandonaldson/ryan_sandbox/election_data_2024/election_results_house.csv"
house_results = gpd.read_file(file_name)

file_name = "/Users/ryandonaldson/ryan_sandbox/election_data_2024/election_results_senate.csv"
senate_results = gpd.read_file(file_name)

#sum precient data by district
districts["Presidential_Dem"] = None
districts["Presidential_Rep"] = None

#calcualte voting percentage
pres_results_by_district = pres_results.groupby("district").agg({"votes_dem": "sum", "votes_rep": "sum",  "votes_total": "sum"})
pres_results_by_district["dem_perc_pres"] = pres_results_by_district["votes_dem"]/pres_results_by_district["votes_total"]
pres_results_by_district["rep_perc_pres"] = pres_results_by_district["votes_rep"]/pres_results_by_district["votes_total"]

#rename AT LARGE districts to 01 for joining
pres_results_by_district = pres_results_by_district.reset_index()
at_large_districts = pres_results_by_district["district"].str.contains("AT-LARGE")
districts = pres_results_by_district["district"].to_list()

def rename_func(x):
    if "AT-LARGE" in x:
        return(x[:-9] + "-01")
    else:
        return x
districts = list(map(rename_func,districts))
pres_results_by_district["district"] = districts

pres_results_by_district.set_index('district')

Unnamed: 0_level_0,votes_dem,votes_rep,votes_total,dem_perc_pres,rep_perc_pres
district,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
AR-01,74300,201263,280943,0.264466,0.716384
AR-02,133378,187500,328790,0.405663,0.570273
AR-03,114908,194657,318213,0.361104,0.611719
AR-04,81291,192704,279133,0.291227,0.690366
AZ-01,216232,230227,450724,0.479744,0.510794
...,...,...,...,...,...
WI-07,167604,266844,441113,0.379957,0.604933
WI-08,177232,247092,429848,0.412313,0.574836
WV-01,98066,259831,364662,0.268923,0.712526
WV-02,116243,273725,397728,0.292268,0.688222


In [17]:
results_by_district = pres_results_by_district


#make a table for house dems
house_results["district"] = house_results["office_seat_name"].apply(lambda x: f"-{int(x[9:]):02}")
house_results["district"] = house_results["state_abbrev"] + house_results["district"]
house_results = house_results[house_results["cycle"] == "2024"]
house_results_dems = house_results[house_results["ballot_party"] == "DEM"]

#join pres results with house dems
results_by_district = results_by_district.set_index('district')
results_by_district = results_by_district.join(house_results_dems.set_index('district'), how='left', lsuffix='pres', rsuffix='house')
results_by_district = results_by_district.reset_index()

#simplify table
results_by_district = results_by_district[["district", "dem_perc_pres", "rep_perc_pres", "candidate_name", "percent", "ranked_choice_round"]]
results_by_district = results_by_district.sort_values(by=["district", "percent"])
results_by_district = results_by_district.drop_duplicates(keep='last', subset="district")

#remove duplicates
#results_by_district[results_by_district.duplicated(keep=False)]


#print table
with pd.option_context('display.max_rows', None,
                       'display.max_columns', None,
                       'display.precision', 3,
                       ):
    print(any(results_by_district.duplicated(subset="district")))

False


In [13]:
results_by_district.columns

Index(['district', 'dem_perc_pres', 'rep_perc_pres', 'candidate_name',
       'percent', 'ranked_choice_round'],
      dtype='object')

In [11]:
results_by_district["district"][results_by_district["district"].str.contains("AT_LARGE")]

Series([], Name: district, dtype: object)

In [None]:
results_by_district = pres_results_by_district
for i, row in results_by_district.iterrows():
    state_bool = house_results["state_abbrev"] == row["district"][:2]

In [12]:
house_results.columns

Index(['id', 'race_id', 'state_abbrev', 'state', 'office_id', 'office_name',
       'office_seat_name', 'cycle', 'stage', 'special', 'party',
       'politician_id', 'candidate_id', 'candidate_name', 'ballot_party',
       'ranked_choice_round', 'votes', 'percent', 'unopposed', 'winner',
       'alt_result_text', 'source', 'district'],
      dtype='object')