# Filter polling place data to specific states and reliable jurisdictions

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import numpy as np

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

### States who filed Amicus briefs

In [4]:
states = {
    "AK": "Alaska",
    "AZ": "Arizona",
    "AL": "Alabama",
    "GA": "Georgia",
    "SC": "South Carolina",
    "SD": "South Dakota",
    "TX": "Texas",
}

In [5]:
state_filter = [
    "Alaska",
    "Arizona",
    "Alabama",
    "Georgia",
    "South Carolina",
    "South Dakota",
    "Texas",
]

In [6]:
src = pd.read_csv(
    "output/polling_places_analysis_clean.csv", dtype={"fips": str, "year": str}
)

In [7]:
src["state_name"] = src["state"].map(states)

In [8]:
src.dtypes

fips                     object
place                    object
state                    object
total_reg_voters        float64
mail_ballots_sent       float64
poll_place_elect_day    float64
poll_place_early        float64
total_votes_cast        float64
year                     object
place_type               object
place_clean              object
state_name               object
dtype: object

---

In [9]:
df_states = src[src["state_name"].isin(state_filter)]

### How jurisdictions in our states don't have polling place data? 

In [10]:
len(
    df_states[
        (df_states["poll_place_elect_day"].isnull()) & (df_states["year"] == "2012")
    ]
)

180

In [11]:
len(
    df_states[
        (df_states["poll_place_elect_day"].isnull()) & (df_states["year"] == "2016")
    ]
)

37

In [12]:
len(
    df_states[
        (df_states["poll_place_elect_day"].isnull()) & (df_states["year"] == "2020")
    ]
)

0

In [13]:
df = df_states[~df_states["poll_place_elect_day"].isnull()]

In [14]:
df.year.value_counts()

2020    608
2016    571
2012    428
Name: year, dtype: int64

---

In [15]:
df_pivot = df.pivot_table(
    index=["fips", "place_clean", "state_name"],
    values=["poll_place_elect_day", "total_reg_voters"],
    columns="year",
).reset_index()

In [16]:
df_pivot.columns = ["_".join(col).strip() for col in df_pivot.columns.values]

### Polling place raw change from 2016-2020

In [17]:
df_pivot["poll_place_change_16_20"] = (
    df_pivot["poll_place_elect_day_2020"] - df_pivot["poll_place_elect_day_2016"]
)

### Polling place PCT change from 2016-2020

In [18]:
df_pivot["poll_place_pct_change_16_20"] = round(
    df_pivot["poll_place_change_16_20"] / df_pivot["poll_place_elect_day_2016"] * 100, 2
)

### Polling place rate 2016

In [19]:
df_pivot["poll_place_1k_2016"] = round(
    (df_pivot["poll_place_elect_day_2016"] / df_pivot["total_reg_voters_2016"]) * 1000,
    2,
)

### Polling place rate 2020

In [20]:
df_pivot["poll_place_1k_2020"] = round(
    (df_pivot["poll_place_elect_day_2020"] / df_pivot["total_reg_voters_2020"]) * 1000,
    2,
)

### Polling place rate change 2016-2020

In [21]:
df_pivot["poll_place_rate_change_2016_2020"] = round(
    (df_pivot["poll_place_1k_2020"] - df_pivot["poll_place_1k_2016"])
    / df_pivot["poll_place_1k_2016"]
    * 100,
    2,
)

In [22]:
df_pivot.poll_place_change_16_20.sum()

-2136.0

In [23]:
df_pivot["poll_place_elect_day_2016"].sum()

16381.0

In [24]:
df_pivot["poll_place_elect_day_2020"].sum()

14643.0

In [25]:
df_pivot.drop(["total_reg_voters_2012", "total_reg_voters_2012"], axis=1, inplace=True)

In [26]:
df_pivot.rename(
    columns={"fips_": "fips", "place_clean_": "place", "state_name_": "state",},
    inplace=True,
)

In [27]:
df = df_pivot.sort_values("poll_place_rate_change_2016_2020", ascending=False).copy()

In [28]:
df.to_csv("output/polling_places_pivot.csv", index=False)