# Voting and registration in the 2020 election

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import geopandas as gpd
import altair as alt
import altair_latimes as lat
import matplotlib.pyplot as plt
import jenkspy
import json

%matplotlib inline

In [3]:
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Get voting data from the U.S. Census

In [4]:
# https://www.census.gov/data/tables/time-series/demo/voting-and-registration/p20-585.html

In [5]:
# table04a: Reported Voting and Registration for States: November 2020

In [6]:
# table04c: Reported Voting and Registration, by Age, for States: November 2020

In [7]:
base_url = "https://www2.census.gov/programs-surveys/cps/tables/p20/585/"

In [8]:
tables = ["table04a", "table04b", "table04c"]

### States

In [9]:
states = pd.read_excel(
    "https://www2.census.gov/programs-surveys/cps/tables/p20/585/table04a.xlsx",
    skiprows=6,
    skipfooter=7,
)

In [10]:
states.columns = [
    "state",
    "tot_pop",
    "tot_czn_pop",
    "tot_reg",
    "pct_reg",
    "pct_reg_moe",
    "pct_reg_czn",
    "pct_reg_czn_moe",
    "tot_voted",
    "pct_voted",
    "pct_voted_moe",
    "pct_voted_czn",
    "pct_voted_czn_moe",
]

In [11]:
cols = ["tot_pop", "tot_czn_pop", "tot_czn_pop", "tot_voted"]

In [12]:
states["state"] = states["state"].str.title().str.replace(" Of ", " of ", regex=False)

In [13]:
states[["tot_pop", "tot_czn_pop", "tot_reg", "tot_voted"]] = (
    states[["tot_pop", "tot_czn_pop", "tot_reg", "tot_voted"]] * 1000
)

In [14]:
states.sort_values("pct_voted_czn", ascending=False).head()

Unnamed: 0,state,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe
8,District of Columbia,576000,534000,464000,80.5,2.7,86.9,2.4,448000,77.8,2.8,84.0,2.6
30,New Jersey,6801000,5921000,5008000,73.6,2.2,84.6,1.9,4638000,68.2,2.3,78.3,2.2
23,Minnesota,4339000,4142000,3436000,79.2,2.5,82.9,2.4,3225000,74.3,2.7,77.9,2.7
37,Oregon,3369000,3242000,2590000,76.9,2.9,79.9,2.8,2402000,71.3,3.1,74.1,3.0
29,New Hampshire,1101000,1077000,843000,76.6,2.9,78.3,2.8,797000,72.4,3.0,74.0,3.0


In [15]:
states.sort_values("pct_voted_czn", ascending=False).tail()

Unnamed: 0,state,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe
0,Alabama,3769000,3716000,2527000,67.0,3.1,68.0,3.1,2247000,59.6,3.3,60.5,3.3
41,South Dakota,659000,649000,437000,66.3,3.4,67.4,3.4,380000,57.7,3.5,58.5,3.5
36,Oklahoma,2942000,2800000,1884000,64.0,3.5,67.3,3.5,1631000,55.5,3.6,58.3,3.7
48,West Virginia,1397000,1379000,928000,66.4,3.4,67.3,3.4,773000,55.3,3.6,56.1,3.6
3,Arkansas,2283000,2195000,1361000,59.6,3.4,62.0,3.4,1186000,51.9,3.4,54.0,3.5


In [16]:
states["pct_voted_czn"] = (states["pct_voted_czn"] / 100).round(3)

### Set up breaks by citizen voting rate

In [17]:
breaks = jenkspy.jenks_breaks(list(states.pct_voted_czn), nb_class=7)

In [18]:
breaks

[0.54, 0.585, 0.626, 0.657, 0.685, 0.719, 0.741, 0.84]

In [19]:
def get_group(value):
    for i, b in enumerate(breaks):
        if value <= breaks[i + 1]:
            return i

In [20]:
states["pct_voted_czn_group"] = states.pct_voted_czn.apply(get_group)

In [21]:
states.head()

Unnamed: 0,state,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe,pct_voted_czn_group
0,Alabama,3769000,3716000,2527000,67.0,3.1,68.0,3.1,2247000,59.6,3.3,0.605,3.3,1
1,Alaska,528000,516000,383000,72.6,3.2,74.2,3.1,330000,62.4,3.4,0.638,3.4,2
2,Arizona,5638000,5075000,3878000,68.8,2.5,76.4,2.5,3649000,64.7,2.6,0.719,2.6,4
3,Arkansas,2283000,2195000,1361000,59.6,3.4,62.0,3.4,1186000,51.9,3.4,0.54,3.5,0
4,California,30342000,25946000,18001000,59.3,1.2,69.4,1.2,16893000,55.7,1.2,0.651,1.2,2


In [22]:
alt.Chart(states).mark_bar().encode(
    x=alt.X("pct_voted_czn", axis=alt.Axis(tickCount=5, format="%")),
    y=alt.Y("state", sort="-x"),
    color=alt.condition(
        alt.datum.state == "California", alt.value("#1a80c4"), alt.value("#8dc8f1")
    ),
).properties(height=900)

---

### Sex, Race and Hispanic Origin

In [23]:
# table04b: Reported Voting and Registration, by Sex, Race and Hispanic Origin, for States: November 2020
# https://www2.census.gov/programs-surveys/cps/tables/p20/585/table04b.xlsx

In [24]:
race_sex = pd.read_excel("input/raw/table04b.xlsx", skiprows=1, skipfooter=0)

In [25]:
race_sex.columns = [
    "state",
    "race_sex",
    "tot_pop",
    "tot_czn_pop",
    "tot_reg",
    "pct_reg",
    "pct_reg_moe",
    "pct_reg_czn",
    "pct_reg_czn_moe",
    "tot_voted",
    "pct_voted",
    "pct_voted_moe",
    "pct_voted_czn",
    "pct_voted_czn_moe",
]

In [26]:
race_sex["state"] = (
    race_sex["state"]
    .str.title()
    .str.replace(" Of ", " of ", regex=False)
    .str.replace("Us", "U.S.", regex=False)
)

In [27]:
race_sex[["tot_czn_pop", "tot_reg", "tot_voted"]] = (
    race_sex[["tot_czn_pop", "tot_reg", "tot_voted"]]
    .replace("-", "0", regex=False)
    .astype(int)
)

In [28]:
race_sex[
    [
        "pct_reg",
        "pct_reg_moe",
        "pct_reg_czn",
        "pct_reg_czn_moe",
        "pct_voted",
        "pct_voted_moe",
        "pct_voted_czn",
        "pct_voted_czn",
        "pct_voted_czn_moe",
    ]
] = (
    race_sex[
        [
            "pct_reg",
            "pct_reg_moe",
            "pct_reg_czn",
            "pct_reg_czn_moe",
            "pct_voted",
            "pct_voted_moe",
            "pct_voted_czn",
            "pct_voted_czn",
            "pct_voted_czn_moe",
        ]
    ]
    .replace("B", "0", regex=False)
    .astype(float)
)

In [29]:
race_sex[["tot_pop", "tot_czn_pop", "tot_reg", "tot_voted"]] = (
    race_sex[["tot_pop", "tot_czn_pop", "tot_reg", "tot_voted"]] * 1000
)

### Hispanic vote 

In [30]:
race_sex[race_sex["state"] == "California"]

Unnamed: 0,state,race_sex,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe
55,California,Total,30342000,25946000,18001000,59.3,1.2,69.4,1.2,16893000,55.7,1.2,65.1,1.2
56,California,Male,14786000,12580000,8549000,57.8,1.7,68.0,1.7,8012000,54.2,1.7,63.7,1.8
57,California,Female,15556000,13366000,9452000,60.8,1.6,70.7,1.6,8882000,57.1,1.6,66.5,1.7
58,California,White alone,21941000,18971000,13508000,61.6,1.4,71.2,1.4,12628000,57.6,1.4,66.6,1.4
59,California,White non-Hispanic alone,12090000,11685000,9133000,75.5,1.6,78.2,1.6,8711000,72.1,1.7,74.6,1.7
60,California,Black alone,1947000,1834000,1249000,64.1,4.3,68.1,4.3,1173000,60.3,4.4,64.0,4.4
61,California,Asian alone,5072000,3958000,2491000,49.1,2.8,62.9,3.1,2370000,46.7,2.8,59.9,3.2
62,California,Hispanic (of any race),11165000,8305000,5014000,44.9,2.0,60.4,2.3,4539000,40.7,2.0,54.6,2.4
63,California,White alone or in combination,22586000,19549000,13924000,61.6,1.3,71.2,1.3,13024000,57.7,1.4,66.6,1.4
64,California,Black alone or in combination,2139000,2021000,1371000,64.1,4.1,67.8,4.1,1295000,60.5,4.2,64.1,4.2


In [31]:
latino_vs_white = race_sex[
    (race_sex["race_sex"] == "Hispanic (of any race)")
    | (race_sex["race_sex"] == "White non-Hispanic alone")
]

In [32]:
latino_vs_white.head()

Unnamed: 0,state,race_sex,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe
4,U.S.,White non-Hispanic alone,157442000,154827000,118389000,75.2,0.4,76.5,0.4,109830000,69.8,0.4,70.9,0.4
7,U.S.,Hispanic (of any race),42468000,30627000,18719000,44.1,1.0,61.1,1.1,16459000,38.8,0.9,53.7,1.1
15,Alabama,White non-Hispanic alone,2587000,2569000,1825000,70.6,3.6,71.0,3.6,1617000,62.5,3.9,63.0,3.9
18,Alabama,Hispanic (of any race),79000,53000,35000,0.0,0.0,0.0,0.0,30000,0.0,0.0,0.0,0.0
26,Alaska,White non-Hispanic alone,325000,323000,251000,77.2,3.8,77.5,3.8,230000,70.6,4.1,71.0,4.1


In [33]:
latino_vs_white_pivot = pd.pivot_table(
    latino_vs_white,
    values="pct_voted_czn",
    index="state",
    columns="race_sex",
    aggfunc="mean",
).reset_index()

### Gap between Latino and Anglo turnout

In [34]:
latino_vs_white_gap = latino_vs_white_pivot[
    latino_vs_white_pivot["Hispanic (of any race)"] > 0
].copy()

In [35]:
latino_vs_white_gap["gap"] = (
    latino_vs_white_gap["White non-Hispanic alone"]
    - latino_vs_white_gap["Hispanic (of any race)"]
)

In [36]:
latino_vs_white_gap.sort_values("gap", ascending=False).head()

race_sex,state,Hispanic (of any race),White non-Hispanic alone,gap
36,Oklahoma,30.3,65.0,34.7
40,South Carolina,38.3,69.0,30.7
15,Iowa,44.2,73.0,28.8
13,Illinois,46.8,72.9,26.1
10,Georgia,44.2,70.3,26.1


In [37]:
alt.Chart(latino_vs_white_gap).mark_bar().encode(
    x=alt.X("gap", title="Percentage point gap", axis=alt.Axis(tickCount=5)),
    y=alt.Y("state", title=" ", sort="-x"),
    color=alt.condition(
        alt.datum.state == "California", alt.value("#1a80c4"), alt.value("#8dc8f1")
    ),
).properties(
    height=650,
    title="Whites turned out at greater rates than Latinos in all but one state. What's the gap?",
)

In [38]:
latino_vs_white_gap

race_sex,state,Hispanic (of any race),White non-Hispanic alone,gap
2,Arizona,60.8,77.0,16.2
3,Arkansas,34.6,57.0,22.4
4,California,54.6,74.6,20.0
5,Colorado,51.1,71.9,20.8
6,Connecticut,56.4,71.0,14.6
9,Florida,52.7,66.8,14.1
10,Georgia,44.2,70.3,26.1
12,Idaho,46.3,67.5,21.2
13,Illinois,46.8,72.9,26.1
14,Indiana,44.0,62.0,18.0


---

## Geography

### States map

In [39]:
state_geo = gpd.read_file("raw/states.geojson")
state_geo.columns = state_geo.columns.str.lower()

### Add A.P. states

In [40]:
ap_states = pd.read_csv("raw/ap_states.csv")

In [41]:
states_merge = state_geo.merge(ap_states, left_on="stusps", right_on="usps")

In [42]:
states_merge.head()

Unnamed: 0,statefp,statens,affgeoid,geoid,stusps,name_x,lsad,aland,awater,geometry,name_y,usps,ap
0,23,1779787,0400000US23,23,ME,Maine,0,79885221885,11748755195,"MULTIPOLYGON (((-68.92401 43.88541, -68.87478 ...",Maine,ME,Maine
1,15,1779782,0400000US15,15,HI,Hawaii,0,16634100855,11777698394,"MULTIPOLYGON (((-156.04965 19.78045, -156.0062...",Hawaii,HI,Hawaii
2,4,1779777,0400000US04,4,AZ,Arizona,0,294198560125,1027346486,"MULTIPOLYGON (((-114.79968 32.59362, -114.8093...",Arizona,AZ,Ariz.
3,5,68085,0400000US05,5,AR,Arkansas,0,134771517596,2960191698,"MULTIPOLYGON (((-94.61792 36.49941, -94.36120 ...",Arkansas,AR,Ark.
4,10,1779781,0400000US10,10,DE,Delaware,0,5047194742,1398720828,"MULTIPOLYGON (((-75.77379 39.72220, -75.75323 ...",Delaware,DE,Del.


### Clean up

In [43]:
states_merge.drop(
    ["name_x", "stusps", "statefp", "statens", "affgeoid", "lsad", "aland", "awater"],
    axis=1,
    inplace=True,
)

In [44]:
states_merge.rename(columns={"name_y": "name"}, inplace=True)

In [45]:
states_merge = states_merge[["geoid", "name", "usps", "ap", "geometry"]]

In [46]:
lower48 = states_merge[
    (states_merge["name"] != "Hawaii") & (states_merge["name"] != "Alaska")
]

---

### Merge with voting stats for states

In [47]:
states_votes_geo = lower48.merge(states, right_on="state", left_on="name")

In [48]:
len(states_votes_geo)

49

In [49]:
states_votes_geo.sort_values("pct_voted_czn", ascending=False).head()

Unnamed: 0,geoid,name,usps,ap,geometry,state,tot_pop,tot_czn_pop,tot_reg,pct_reg,pct_reg_moe,pct_reg_czn,pct_reg_czn_moe,tot_voted,pct_voted,pct_voted_moe,pct_voted_czn,pct_voted_czn_moe,pct_voted_czn_group
7,11,District of Columbia,DC,D.C.,"MULTIPOLYGON (((-77.11976 38.93434, -77.04102 ...",District of Columbia,576000,534000,464000,80.5,2.7,86.9,2.4,448000,77.8,2.8,0.84,2.6,6
45,34,New Jersey,NJ,N.J.,"MULTIPOLYGON (((-75.55945 39.62981, -75.53514 ...",New Jersey,6801000,5921000,5008000,73.6,2.2,84.6,1.9,4638000,68.2,2.3,0.783,2.2,6
5,27,Minnesota,MN,Minn.,"MULTIPOLYGON (((-97.22904 49.00069, -96.93096 ...",Minnesota,4339000,4142000,3436000,79.2,2.5,82.9,2.4,3225000,74.3,2.7,0.779,2.7,6
19,41,Oregon,OR,Ore.,"MULTIPOLYGON (((-124.55244 42.84057, -124.4809...",Oregon,3369000,3242000,2590000,76.9,2.9,79.9,2.8,2402000,71.3,3.1,0.741,3.0,5
44,33,New Hampshire,NH,N.H.,"MULTIPOLYGON (((-72.55611 42.86625, -72.53147 ...",New Hampshire,1101000,1077000,843000,76.6,2.9,78.3,2.8,797000,72.4,3.0,0.74,3.0,5


### Set up breaks by citizen voting rate

In [50]:
breaks = jenkspy.jenks_breaks(list(states_votes_geo.pct_voted_czn), nb_class=7)

In [51]:
breaks

[0.54, 0.585, 0.639, 0.671, 0.705, 0.741, 0.783, 0.84]

In [52]:
def get_group(value):
    for i, b in enumerate(breaks):
        if value <= breaks[i + 1]:
            return i

In [53]:
states_votes_geo["pct_voted_czn_group"] = states_votes_geo.pct_voted_czn.apply(
    get_group
)

---

In [54]:
states_votes_geo.to_file("output/states_votes_geo.geojson", driver="GeoJSON")

In [55]:
geojson = json.loads(states_votes_geo.to_json())

In [56]:
features = alt.Data(values=geojson["features"])

In [57]:
base = (
    alt.Chart(features)
    .mark_geoshape(stroke="black", strokeWidth=0.1)
    .encode()
    .properties(width=600, height=800)
)

In [58]:
geoshape = alt.Chart(features).mark_geoshape(fill="lightgray", stroke="black")

In [59]:
pct_voted_czn = geoshape.encode(
    color=alt.Color(
        "properties.pct_voted_czn_group:N",
        scale=alt.Scale(
            domain=[0, 1, 2, 3, 4, 5, 6], range=lat.palette["schemes"]["ice-7"]
        ),
        legend=None,
    ),
)

In [60]:
(pct_voted_czn).properties(title="Pct voted").configure_view(strokeWidth=0)