# Voting and registration in the 2020 election

In [4]:
%load_ext lab_black

In [5]:
import pandas as pd
import geopandas as gpd
import altair as alt
import matplotlib.pyplot as plt
import jenkspy
import json

%matplotlib inline

In [7]:
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get voting data from the U.S. Census

In [8]:
# https://www.census.gov/data/tables/time-series/demo/voting-and-registration/p20-585.html

In [9]:
# table04a: Reported Voting and Registration for States: November 2020

In [10]:
# table04c: Reported Voting and Registration, by Age, for States: November 2020

In [11]:
base_url = "https://www2.census.gov/programs-surveys/cps/tables/p20/585/"

In [12]:
tables = ["table04a", "table04b", "table04c"]

### States

In [9]:
states = pd.read_excel(
    "https://www2.census.gov/programs-surveys/cps/tables/p20/585/table04a.xlsx",
    skiprows=6,
    skipfooter=7,
)

  warn("""Cannot parse header or footer so it will be ignored""")


In [10]:
states.columns = [
    "state",
    "tot_pop",
    "tot_czn_pop",
    "tot_reg",
    "pct_reg",
    "pct_reg_moe",
    "pct_reg_czn",
    "pct_reg_czn_moe",
    "tot_voted",
    "pct_voted",
    "pct_voted_moe",
    "pct_voted_czn",
    "pct_voted_czn_moe",
]

In [11]:
cols = ["tot_pop", "tot_czn_pop", "tot_czn_pop", "tot_voted"]

In [12]:
states["state"] = states["state"].str.title().str.replace(" Of ", " of ", regex=False)

In [13]:
states[["tot_pop", "tot_czn_pop", "tot_reg", "tot_voted"]] = (
    states[["tot_pop", "tot_czn_pop", "tot_reg", "tot_voted"]] * 1000
)

In [14]:
states.sort_values("pct_voted_czn", ascending=False).head()

Unnamed: 0,state,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe
8,District of Columbia,576000,534000,464000,80.5,2.7,86.9,2.4,448000,77.8,2.8,84.0,2.6
30,New Jersey,6801000,5921000,5008000,73.6,2.2,84.6,1.9,4638000,68.2,2.3,78.3,2.2
23,Minnesota,4339000,4142000,3436000,79.2,2.5,82.9,2.4,3225000,74.3,2.7,77.9,2.7
37,Oregon,3369000,3242000,2590000,76.9,2.9,79.9,2.8,2402000,71.3,3.1,74.1,3.0
29,New Hampshire,1101000,1077000,843000,76.6,2.9,78.3,2.8,797000,72.4,3.0,74.0,3.0


In [15]:
states.sort_values("pct_voted_czn", ascending=False).tail()

Unnamed: 0,state,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe
0,Alabama,3769000,3716000,2527000,67.0,3.1,68.0,3.1,2247000,59.6,3.3,60.5,3.3
41,South Dakota,659000,649000,437000,66.3,3.4,67.4,3.4,380000,57.7,3.5,58.5,3.5
36,Oklahoma,2942000,2800000,1884000,64.0,3.5,67.3,3.5,1631000,55.5,3.6,58.3,3.7
48,West Virginia,1397000,1379000,928000,66.4,3.4,67.3,3.4,773000,55.3,3.6,56.1,3.6
3,Arkansas,2283000,2195000,1361000,59.6,3.4,62.0,3.4,1186000,51.9,3.4,54.0,3.5


In [16]:
states["pct_voted_czn"] = (states["pct_voted_czn"] / 100).round(3)

### Set up breaks by citizen voting rate

In [17]:
breaks = jenkspy.jenks_breaks(list(states.pct_voted_czn), nb_class=7)

In [18]:
breaks

[0.54, 0.585, 0.626, 0.657, 0.685, 0.719, 0.741, 0.84]

In [19]:
def get_group(value):
    for i, b in enumerate(breaks):
        if value <= breaks[i + 1]:
            return i

In [20]:
states["pct_voted_czn_group"] = states.pct_voted_czn.apply(get_group)

In [21]:
states.head()

Unnamed: 0,state,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe,pct_voted_czn_group
0,Alabama,3769000,3716000,2527000,67.0,3.1,68.0,3.1,2247000,59.6,3.3,0.605,3.3,1
1,Alaska,528000,516000,383000,72.6,3.2,74.2,3.1,330000,62.4,3.4,0.638,3.4,2
2,Arizona,5638000,5075000,3878000,68.8,2.5,76.4,2.5,3649000,64.7,2.6,0.719,2.6,4
3,Arkansas,2283000,2195000,1361000,59.6,3.4,62.0,3.4,1186000,51.9,3.4,0.54,3.5,0
4,California,30342000,25946000,18001000,59.3,1.2,69.4,1.2,16893000,55.7,1.2,0.651,1.2,2


In [22]:
alt.Chart(states).mark_bar().encode(
    x=alt.X("pct_voted_czn", axis=alt.Axis(tickCount=5, format="%")),
    y=alt.Y("state", sort="-x"),
    color=alt.condition(
        alt.datum.state == "California", alt.value("#1a80c4"), alt.value("#8dc8f1")
    ),
).properties(height=900)

---

### Sex, Race and Hispanic Origin

In [23]:
# table04b: Reported Voting and Registration, by Sex, Race and Hispanic Origin, for States: November 2020
# https://www2.census.gov/programs-surveys/cps/tables/p20/585/table04b.xlsx

In [24]:
race_sex = pd.read_excel("input/raw/table04b.xlsx", skiprows=1, skipfooter=0)

  warn("""Cannot parse header or footer so it will be ignored""")


In [25]:
race_sex.columns = [
    "state",
    "race_sex",
    "tot_pop",
    "tot_czn_pop",
    "tot_reg",
    "pct_reg",
    "pct_reg_moe",
    "pct_reg_czn",
    "pct_reg_czn_moe",
    "tot_voted",
    "pct_voted",
    "pct_voted_moe",
    "pct_voted_czn",
    "pct_voted_czn_moe",
]

In [26]:
race_sex["state"] = (
    race_sex["state"]
    .str.title()
    .str.replace(" Of ", " of ", regex=False)
    .str.replace("Us", "U.S.", regex=False)
)

In [27]:
race_sex[["tot_czn_pop", "tot_reg", "tot_voted"]] = (
    race_sex[["tot_czn_pop", "tot_reg", "tot_voted"]]
    .replace("-", "0", regex=False)
    .astype(int)
)

In [28]:
race_sex[
    [
        "pct_reg",
        "pct_reg_moe",
        "pct_reg_czn",
        "pct_reg_czn_moe",
        "pct_voted",
        "pct_voted_moe",
        "pct_voted_czn",
        "pct_voted_czn",
        "pct_voted_czn_moe",
    ]
] = (
    race_sex[
        [
            "pct_reg",
            "pct_reg_moe",
            "pct_reg_czn",
            "pct_reg_czn_moe",
            "pct_voted",
            "pct_voted_moe",
            "pct_voted_czn",
            "pct_voted_czn",
            "pct_voted_czn_moe",
        ]
    ]
    .replace("B", "0", regex=False)
    .astype(float)
)

ValueError: Columns must be same length as key

In [None]:
race_sex[["tot_pop", "tot_czn_pop", "tot_reg", "tot_voted"]] = (
    race_sex[["tot_pop", "tot_czn_pop", "tot_reg", "tot_voted"]] * 1000
)

### Hispanic vote 

In [None]:
race_sex[race_sex["state"] == "California"]

In [None]:
latino_vs_white = race_sex[
    (race_sex["race_sex"] == "Hispanic (of any race)")
    | (race_sex["race_sex"] == "White non-Hispanic alone")
]

In [None]:
latino_vs_white.head()

In [None]:
latino_vs_white_pivot = pd.pivot_table(
    latino_vs_white,
    values="pct_voted_czn",
    index="state",
    columns="race_sex",
    aggfunc="mean",
).reset_index()

### Gap between Latino and Anglo turnout

In [None]:
latino_vs_white_gap = latino_vs_white_pivot[
    latino_vs_white_pivot["Hispanic (of any race)"] > 0
].copy()

In [None]:
latino_vs_white_gap["gap"] = (
    latino_vs_white_gap["White non-Hispanic alone"]
    - latino_vs_white_gap["Hispanic (of any race)"]
)

In [None]:
latino_vs_white_gap.sort_values("gap", ascending=False).head()

In [None]:
alt.Chart(latino_vs_white_gap).mark_bar().encode(
    x=alt.X("gap", title="Percentage point gap", axis=alt.Axis(tickCount=5)),
    y=alt.Y("state", title=" ", sort="-x"),
    color=alt.condition(
        alt.datum.state == "California", alt.value("#1a80c4"), alt.value("#8dc8f1")
    ),
).properties(
    height=650,
    title="Whites turned out at greater rates than Latinos in all but one state. What's the gap?",
)

In [None]:
latino_vs_white_gap

---

## Geography

### States map

In [None]:
state_geo = gpd.read_file("raw/states.geojson")
state_geo.columns = state_geo.columns.str.lower()

### Add A.P. states

In [None]:
ap_states = pd.read_csv("raw/ap_states.csv")

In [None]:
states_merge = state_geo.merge(ap_states, left_on="stusps", right_on="usps")

In [None]:
states_merge.head()

### Clean up

In [None]:
states_merge.drop(
    ["name_x", "stusps", "statefp", "statens", "affgeoid", "lsad", "aland", "awater"],
    axis=1,
    inplace=True,
)

In [None]:
states_merge.rename(columns={"name_y": "name"}, inplace=True)

In [None]:
states_merge = states_merge[["geoid", "name", "usps", "ap", "geometry"]]

In [None]:
lower48 = states_merge[
    (states_merge["name"] != "Hawaii") & (states_merge["name"] != "Alaska")
]

---

### Merge with voting stats for states

In [None]:
states_votes_geo = lower48.merge(states, right_on="state", left_on="name")

In [None]:
len(states_votes_geo)

In [None]:
states_votes_geo.sort_values("pct_voted_czn", ascending=False).head()

### Set up breaks by citizen voting rate

In [None]:
breaks = jenkspy.jenks_breaks(list(states_votes_geo.pct_voted_czn), nb_class=7)

In [None]:
breaks

In [None]:
def get_group(value):
    for i, b in enumerate(breaks):
        if value <= breaks[i + 1]:
            return i

In [None]:
states_votes_geo["pct_voted_czn_group"] = states_votes_geo.pct_voted_czn.apply(
    get_group
)

---

In [None]:
states_votes_geo.to_file("output/states_votes_geo.geojson", driver="GeoJSON")

In [None]:
geojson = json.loads(states_votes_geo.to_json())

In [None]:
features = alt.Data(values=geojson["features"])

In [None]:
base = (
    alt.Chart(features)
    .mark_geoshape(stroke="black", strokeWidth=0.1)
    .encode()
    .properties(width=600, height=800)
)

In [None]:
geoshape = alt.Chart(features).mark_geoshape(fill="lightgray", stroke="black")

In [None]:
pct_voted_czn = geoshape.encode(
    color=alt.Color(
        "properties.pct_voted_czn_group:N",
        scale=alt.Scale(
            domain=[0, 1, 2, 3, 4, 5, 6], range=lat.palette["schemes"]["ice-7"]
        ),
        legend=None,
    ),
)

In [None]:
(pct_voted_czn).properties(title="Pct voted").configure_view(strokeWidth=0)