In [1]:
import pandas as pd
import numpy as np

import geopandas as gpd
from shapely.geometry import Point

import matplotlib.pyplot as plt

import folium
from folium.features import GeoJsonTooltip

from pathlib import Path

In [3]:
pd.set_option("display.max_columns", 200)
pd.set_option("display.max_rows", 200)

In [4]:
BOUNDARIES_2010_PATH = Path("2010_boundaries.json")
BOUNDARIES_2020_PATH = Path("2020_boundaries.json")

In [7]:
ELECTION_2018_PATH = Path("Nov 2018 General results.csv")
ELECTION_2022_PATH = Path("Nov 2022 All results.csv")

In [8]:
BOUNDARIES_2010_PATH, BOUNDARIES_2020_PATH, ELECTION_2018_PATH.exists(), ELECTION_2022_PATH.exists()

(WindowsPath('2010_boundaries.json'),
 WindowsPath('2020_boundaries.json'),
 False,
 False)

In [12]:
def load_boundaries(path: Path) -> gpd.GeoDataFrame:
    gdf = gpd.read_file(path)
    gdf = gdf[gdf.geometry.notnull()].copy()
    gdf["geometry"] = gdf["geometry"].buffer(0)
    return gdf

In [13]:
gdf_2010 = load_boundaries(BOUNDARIES_2010_PATH)
gdf_2020 = load_boundaries(BOUNDARIES_2020_PATH)

DriverError: 2010_boundaries.json: No such file or directory

In [None]:
print("2010 boundaries:", gdf_2010.shape)
print("2020 boundaries:", gdf_2020.shape)
display(gdf_2010.head(3))

In [None]:
def show_join_candidates(gdf: gpd.GeoDataFrame, top_n: int = 30):
    candidates = []
    for c in gdf.columns:
        if c == "geometry":
            continue
        if any(k in c.lower() for k in ["geoid", "district", "dist", "name", "id", "num", "code"]):
            candidates.append(c)
    print("Candidate join columns:", candidates[:top_n])
    for c in candidates[:min(8, len(candidates))]:
        print(f"\n--- {c} (sample values) ---")
        display(gdf[[c]].head(5))

In [None]:
df18 = pd.read_csv(ELECTION_2018_PATH)
df22 = pd.read_csv(ELECTION_2022_PATH)

print("2018 raw:", df18.shape)
print("2022 raw:", df22.shape)
display(df18.head(3))

In [None]:
OFFICE_COL = "Office"
GOVERNOR_LABEL = "Governor"    

GEO_COL = "County"

REP_VOTES_COL = "Rep Votes"
DEM_VOTES_COL = "Dem Votes"

def prep_wide_election(df: pd.DataFrame, year: int) -> pd.DataFrame:
    d = df.copy()
    if OFFICE_COL in d.columns:
        d = d[d[OFFICE_COL].astype(str).str.contains(GOVERNOR_LABEL, case=False, na=False)].copy()
    keep = [GEO_COL, REP_VOTES_COL, DEM_VOTES_COL]

    d = d[keep].copy()
    d[REP_VOTES_COL] = pd.to_numeric(d[REP_VOTES_COL], errors="coerce").fillna(0).astype(int)
    d[DEM_VOTES_COL] = pd.to_numeric(d[DEM_VOTES_COL], errors="coerce").fillna(0).astype(int)

    d["total_votes"] = d[REP_VOTES_COL] + d[DEM_VOTES_COL]
    d["rep_share"] = np.where(d["total_votes"]>0, d[REP_VOTES_COL] / d["total_votes"], np.nan)
    d["dem_share"] = np.where(d["total_votes"]>0, d[DEM_VOTES_COL] / d["total_votes"], np.nan)
    d["year"] = year

    out = (d.groupby([GEO_COL, "year"], as_index=False).agg({REP_VOTES_COL:"sum", DEM_VOTES_COL:"sum", "total_votes":"sum"}))
    out["rep_share"] = np.where(out["total_votes"]>0, out[REP_VOTES_COL] / out["total_votes"], np.nan)
    out["dem_share"] = np.where(out["total_votes"]>0, out[DEM_VOTES_COL] / out["total_votes"], np.nan)
    return out

In [None]:
def prep_long_election(df: pd.DataFrame, year: int,
                      geo_col: str,
                      office_col: str,
                      governor_label: str,
                      votes_col: str,
                      party_col: str | None = None,
                      candidate_col: str | None = None) -> pd.DataFrame:
    d = df.copy()
    d = d[d[office_col].astype(str).str.contains(governor_label, case=False, na=False)].copy()
    d[votes_col] = pd.to_numeric(d[votes_col], errors="coerce").fillna(0).astype(int)

    if party_col and party_col in d.columns:
        party = d[party_col].astype(str).str.upper()
        d["party_clean"] = party
    elif candidate_col and candidate_col in d.columns:
        candidate_to_party = {}
        d["party_clean"] = d[candidate_col].astype(str).str.upper().map(candidate_to_party)
    else:
        raise ValueError("Need either a party column or a candidate column + a manual mapping.")
    rep_vals = {"REP", "REPUBLICAN", "R"}
    dem_vals = {"DEM", "DEMOCRATIC", "D"}

    d["is_rep"] = d["party_clean"].isin(rep_vals)
    d["is_dem"] = d["party_clean"].isin(dem_vals)

    agg = (d.groupby([geo_col], as_index=False).agg(rep_votes=(votes_col, lambda s: int(s[d.loc[s.index, "is_rep"]].sum())),
                  dem_votes=(votes_col, lambda s: int(s[d.loc[s.index, "is_dem"]].sum())),
                  total_votes=(votes_col, "sum")))

    agg["rep_share"] = np.where(agg["total_votes"]>0, agg["rep_votes"]/agg["total_votes"], np.nan)
    agg["dem_share"] = np.where(agg["total_votes"]>0, agg["dem_votes"]/agg["total_votes"], np.nan)
    agg["year"] = year
    return agg

In [None]:
boundaries = gdf_2020.copy() 
BOUNDARY_JOIN_COL = "GEOID"   
ELECTION_JOIN_COL = GEO_COL   

In [None]:
def plot_choropleth(gdf: gpd.GeoDataFrame, column: str, title: str):
    ax = gdf.plot(column=column, legend=True, figsize=(10, 8), missing_kwds={"color":"lightgrey"})
    ax.set_axis_off()
    ax.set_title(title)
    plt.show()

In [None]:
def folium_choropleth(gdf: gpd.GeoDataFrame,
                      value_col: str,
                      key_col: str,
                      tooltip_cols: list[str],
                      tooltip_aliases: list[str] | None = None,
                      map_title: str = "Map",
                      output_html: Path | None = None) -> folium.Map:
    if gdf.crs is None:
        gdf = gdf.set_crs(epsg=4326)
    if gdf.crs.to_epsg() != 4326:
        gdf = gdf.to_crs(epsg=4326)

    center = [gdf.geometry.centroid.y.mean(), gdf.geometry.centroid.x.mean()]
    m = folium.Map(location=center, zoom_start=7, tiles="cartodbpositron")

    tooltip = GeoJsonTooltip(fields=tooltip_cols, aliases=tooltip_aliases, localize=True)

    folium.Choropleth(
        geo_data=gdf.to_json(),
        data=gdf,
        columns=[key_col, value_col],
        key_on=f"feature.properties.{key_col}",
        fill_opacity=0.75,
        line_opacity=0.2,
        nan_fill_opacity=0.2,
        legend_name=value_col,
        name="choropleth",
    ).add_to(m)

    folium.GeoJson(
        gdf,
        tooltip=tooltip,
        name="tooltip-layer",
        style_function=lambda x: {"fillOpacity": 0, "color": "black", "weight": 0.4},
    ).add_to(m)

    folium.LayerControl().add_to(m)

    title_html = f"""<h3 style="position: fixed; top: 10px; left: 50px; z-index:9999; background: white; padding: 8px; border-radius: 6px;">
    {map_title}</h3>"""
    m.get_root().html.add_child(folium.Element(title_html))

    if output_html:
        m.save(str(output_html))
        print("Saved:", output_html)

    return m