In [1]:
import geopandas as gpd
import numpy as np
import pandas as pd

In [2]:
#set crs for web geography
g = gpd.read_file("precinct_2024_votes.geojson")
if g.crs is None:
    g = g.set_crs(4326)
g = g.to_crs(4326)

In [3]:
#rename columns to make easier to understand
rename_map = {
    "SRPREC_KEY" : "prec_id",
    "SRPREC" : "prec_name",
    "COUNTY_x" : "county",
    "COUNTY_y" : "county_dup",
    "ELECTION" : "election",
    "PREC_TYPE" : "prec_type",
    "FIPS_CODE" : "fips",
    "USPDEM01" : "dem_votes",
    "USPREP01" : "rep_votes",
}
g = g.rename(columns={k:v for k,v in rename_map.items() if k in g.columns})

In [4]:
g.head()

Unnamed: 0,index,prec_name,county,election,prec_type,fips,prec_id,county_dup,rep_votes,dem_votes,geometry
0,16572,181200,73,g24,SRPREC,6073,6073181200,37,503.0,716.0,"POLYGON ((-117.06416 32.79388, -117.06413 32.7..."
1,16573,505710,73,g24,SRPREC,6073,6073505710,37,346.0,450.0,"POLYGON ((-117.01473 32.72809, -117.01469 32.7..."
2,16574,428610,73,g24,SRPREC,6073,6073428610,37,354.0,243.0,"MULTIPOLYGON (((-117.2141 33.17722, -117.21405..."
3,16575,428611,73,g24,SRPREC,6073,6073428611,37,272.0,199.0,"MULTIPOLYGON (((-117.21068 33.19338, -117.2106..."
4,16576,125270,73,g24,SRPREC,6073,6073125270,37,408.0,639.0,"POLYGON ((-117.26545 32.84337, -117.26545 32.8..."


In [5]:
#get rid of duplicate county
if "county" not in g.columns and "county_dup" in g.columns:
    g = g.rename(columns={"county_dup":"county"})
elif "county" in g.columns and "county_dup" in g.columns:
    g["county"] = g["county"].fillna(g["county_dup"])
    g = g.drop(columns=["county_dup"], errors="ignore")

In [6]:
#get rid of index column
g = g.drop(columns=[c for c in ["index"] if c in g.columns], errors="ignore")

In [7]:
#ensure vote columns numeric
for c in ["dem_votes","rep_votes"]:
    if c in g.columns:
        g[c] = pd.to_numeric(g[c], errors="coerce").fillna(0).astype(int)
    else:
        g[c] = 0

In [8]:
g.head()

Unnamed: 0,prec_name,county,election,prec_type,fips,prec_id,rep_votes,dem_votes,geometry
0,181200,73,g24,SRPREC,6073,6073181200,503,716,"POLYGON ((-117.06416 32.79388, -117.06413 32.7..."
1,505710,73,g24,SRPREC,6073,6073505710,346,450,"POLYGON ((-117.01473 32.72809, -117.01469 32.7..."
2,428610,73,g24,SRPREC,6073,6073428610,354,243,"MULTIPOLYGON (((-117.2141 33.17722, -117.21405..."
3,428611,73,g24,SRPREC,6073,6073428611,272,199,"MULTIPOLYGON (((-117.21068 33.19338, -117.2106..."
4,125270,73,g24,SRPREC,6073,6073125270,408,639,"POLYGON ((-117.26545 32.84337, -117.26545 32.8..."


In [9]:
#compute two-party metrics
g["two_party"] = g["dem_votes"] + g["rep_votes"]
g["dem_share"] = np.where(g["two_party"]>0, g["dem_votes"]/g["two_party"], np.nan)
g["rep_share"] = np.where(g["two_party"]>0, g["rep_votes"]/g["two_party"], np.nan)
g["lean"] = np.where(g["dem_share"]>=0.5, "D", "R")

In [10]:
#keep useful columns
keep = ["prec_id","dem_votes","rep_votes","two_party","dem_share","rep_share","lean","geometry"]
g = g[[c for c in keep if c in g.columns]]

In [11]:
g.head()

Unnamed: 0,prec_id,dem_votes,rep_votes,two_party,dem_share,rep_share,lean,geometry
0,6073181200,716,503,1219,0.587367,0.412633,D,"POLYGON ((-117.06416 32.79388, -117.06413 32.7..."
1,6073505710,450,346,796,0.565327,0.434673,D,"POLYGON ((-117.01473 32.72809, -117.01469 32.7..."
2,6073428610,243,354,597,0.407035,0.592965,R,"MULTIPOLYGON (((-117.2141 33.17722, -117.21405..."
3,6073428611,199,272,471,0.422505,0.577495,R,"MULTIPOLYGON (((-117.21068 33.19338, -117.2106..."
4,6073125270,639,408,1047,0.610315,0.389685,D,"POLYGON ((-117.26545 32.84337, -117.26545 32.8..."


In [12]:
#save geopackage version for analytics
g.to_file("precinct_2024_votes_clean.gpkg", layer="precincts", driver="GPKG")

In [13]:
#save simplified geojson for web use
g_web = g.copy()
g_web["geometry"] = g_web.geometry.simplify(0.0002, preserve_topology=True)  # tweak tolerance
g_web.to_file("precinct_2024_votes_web.geojson", driver="GeoJSON")

In [14]:
#check data
g["dem_votes"].sum()

np.int64(796925)

In [15]:
g["rep_votes"].sum()

np.int64(610662)

In [16]:
g["two_party"].sum()

np.int64(1407587)

In [17]:
g["prec_id"].isna().sum()

np.int64(0)