# Filter polling place data to specific states and reliable jurisdictions

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import numpy as np

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

---

### States who filed Amicus briefs

In [4]:
states = {
    "AK": "Alaska",
    "AZ": "Arizona",
    "AL": "Alabama",
    "GA": "Georgia",
    "SC": "South Carolina",
    "SD": "South Dakota",
    "TX": "Texas",
}

In [5]:
state_filter = [
    "Alaska",
    "Arizona",
    "Alabama",
    "Georgia",
    "South Carolina",
    "South Dakota",
    "Texas",
]

In [6]:
src = pd.read_csv(
    "output/polling_places_analysis_clean.csv", dtype={"fips": str, "year": str}
)

In [7]:
src["state_name"] = src["state"].map(states)

In [8]:
src.dtypes

fips                     object
place                    object
state                    object
total_reg_voters        float64
mail_ballots_sent       float64
poll_place_elect_day    float64
poll_place_early        float64
total_votes_cast        float64
year                     object
place_type               object
place_clean              object
state_name               object
dtype: object

---

In [9]:
df_states = src[src["state_name"].isin(state_filter)]

### How jurisdictions in our states don't have polling place data? 

In [10]:
len(
    df_states[
        (df_states["poll_place_elect_day"].isnull()) & (df_states["year"] == "2012")
    ]
)

180

In [11]:
len(
    df_states[
        (df_states["poll_place_elect_day"].isnull()) & (df_states["year"] == "2016")
    ]
)

37

In [12]:
len(
    df_states[
        (df_states["poll_place_elect_day"].isnull()) & (df_states["year"] == "2020")
    ]
)

0

In [13]:
df = df_states[~df_states["poll_place_elect_day"].isnull()]

In [14]:
df.year.value_counts()

2020    608
2016    571
2012    428
Name: year, dtype: int64

---

In [15]:
df_pivot = df.pivot_table(
    index=["fips", "place_clean", "state_name", "place_type"],
    values="poll_place_elect_day",
    columns="year",
).reset_index()

In [16]:
df_pivot["change_16_20"] = df_pivot["2020"] - df_pivot["2016"]

In [23]:
df_pivot["pct_change_16_20"] = round(
    df_pivot["change_16_20"] / df_pivot["2016"] * 100, 2
)

In [24]:
df_pivot.change_16_20.sum()

-2115.0

In [25]:
df_pivot["2016"].sum()

16381.0

In [26]:
df_pivot["2020"].sum()

14643.0

In [27]:
df_pivot.head()

year,fips,place_clean,state_name,place_type,2012,2016,2020,change_16_20,pct_change_16_20
0,100100000,Autauga,Alabama,County,17.0,17.0,18.0,1.0,5.88
1,100300000,Baldwin,Alabama,County,46.0,87.0,50.0,-37.0,-42.53
2,100500000,Barbour,Alabama,County,17.0,21.0,16.0,-5.0,-23.81
3,100700000,Bibb,Alabama,County,8.0,16.0,8.0,-8.0,-50.0
4,100900000,Blount,Alabama,County,24.0,28.0,24.0,-4.0,-14.29


In [28]:
df_pivot.to_csv("output/polling_places_pivot.csv", index=False)