In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd

import urllib
import tempfile

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
data = pd.read_csv("1976-2020-president.csv")

In [4]:
def by_year_and_party(year, party):
    return data[(data.party_simplified == party) & (data.year == year)].groupby("state").max()
def by_year(year):
    dem = by_year_and_party(year, "DEMOCRAT")
    gop = by_year_and_party(year, "REPUBLICAN")
    assert (gop.totalvotes == dem.totalvotes).all()
    margins = (dem.candidatevotes - gop.candidatevotes) / gop.totalvotes
    return margins

In [5]:
margin_2004, margin_2020 = by_year(2004), by_year(2020)

In [6]:
ca_2020 = pd.read_html("https://en.wikipedia.org/wiki/2020_United_States_presidential_election_in_California")
[ca_2020] = [x for x in ca_2020 if list(x)[0] == ("County", "County")]
ca_2020 = ca_2020.set_index(("County", "County"))
ca_2020_dem, ca_2020_gop = [
    ca_2020[cand, "%"].apply(lambda x: float(x.replace("%", "")) / 100)
    for cand in ("Joe BidenDemocratic", "Donald TrumpRepublican")
]
ca_2020 = ca_2020_dem - ca_2020_gop
ca_2020 = ca_2020.sort_index()

In [7]:
ca_2004 = pd.read_html("https://en.wikipedia.org/wiki/2004_United_States_presidential_election_in_California")
[ca_2004] = [x for x in ca_2004 if list(x)[0] == "County"]
ca_2004 = ca_2004.set_index("County")
ca_2004_dem, ca_2004_gop = [
    ca_2004[cand].apply(lambda x: float(x.replace("%", "")) / 100)
    for cand in ("Kerry", "Bush")
]
ca_2004 = ca_2004_dem - ca_2004_gop
ca_2004 = ca_2004.sort_index()

In [8]:
assert (ca_2004.index  == ca_2020.index).all()
assert (margin_2004.index  == margin_2020.index).all()

ca_swing = ca_2020 - ca_2004
margin_swing = margin_2020 - margin_2004

In [9]:
import us

In [10]:
def construct_shapefile(shapefile, key, weight_margin):
    county_to_state = {}
    for county in ca_2004.index:
        idx = np.mean(
            [
                np.abs(ca_2004[county] - margin_2004),
                np.abs(ca_2020[county] - margin_2020),
                weight_margin * np.abs(ca_swing[county] - margin_swing),
            ],
            0,
        ).argmin()
        county_to_state[county] = us.states.lookup(margin_2020.index[idx].title()).abbr
    shapefile[key] = shapefile.NAME.apply(lambda x: county_to_state[x])

In [11]:
shapefile = gpd.read_file("./shapefile/CA_Counties_TIGER2016.shp")
construct_shapefile(shapefile, "by_swing", 10000)
construct_shapefile(shapefile, "by_margin", 0)
construct_shapefile(shapefile, "hybrid", 2)
shapefile.to_file("out/out.shp")