# Polling closures from U.S. Election Assistance Commission [survey](https://www.eac.gov/research-and-data/datasets-codebooks-and-surveys)

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd

In [3]:
pd.options.display.max_columns = 1000
pd.options.display.max_rows = 1000
pd.options.display.max_colwidth = None

### 2020 survey

In [4]:
url_2020 = "https://www.eac.gov/sites/default/files/EAVS%202020/2020_EAVS_for_Public_Release_nolabel_V2.csv"

In [5]:
src_2020 = pd.read_csv(url_2020, dtype={"FIPSCode": str})

In [6]:
places2020 = src_2020[
    ["FIPSCode", "Jurisdiction_Name", "State_Abbr", "A1a", "C1a", "D1a", "D3a", "D4a"]
].copy()

In [7]:
places2020.rename(
    columns={
        "FIPSCode": "fips",
        "State_Abbr": "state",
        "Jurisdiction_Name": "place",
        "A1a": "total_reg_voters",
        "C1a": "mail_ballots_sent",
        "D1a": "total_votes_cast",
        "D3a": "poll_place_elect_day",
        "D4a": "poll_place_early",
    },
    inplace=True,
)

In [8]:
places2020["year"] = "2020"

In [9]:
places2020[
    [
        "total_reg_voters",
        "mail_ballots_sent",
        "total_votes_cast",
        "poll_place_elect_day",
        "poll_place_early",
    ]
] = places2020[
    [
        "total_reg_voters",
        "mail_ballots_sent",
        "total_votes_cast",
        "poll_place_elect_day",
        "poll_place_early",
    ]
].astype(
    float
)

In [10]:
places2020.head()

Unnamed: 0,fips,place,state,total_reg_voters,mail_ballots_sent,total_votes_cast,poll_place_elect_day,poll_place_early,year
0,100100000,AUTAUGA COUNTY,AL,43695.0,1329.0,24217.0,18.0,1.0,2020
1,100300000,BALDWIN COUNTY,AL,176668.0,11147.0,96609.0,50.0,1.0,2020
2,100500000,BARBOUR COUNTY,AL,17850.0,726.0,9234.0,16.0,1.0,2020
3,100700000,BIBB COUNTY,AL,15014.0,332.0,9031.0,8.0,1.0,2020
4,100900000,BLOUNT COUNTY,AL,41927.0,1032.0,25823.0,24.0,1.0,2020


---

### 2016

In [11]:
src_2016 = pd.read_csv(
    "input/EAVS 2016 Final Data for Public Release v.3.csv",
    dtype={"FIPSCode": str},
    low_memory=False,
)

In [12]:
places2016 = src_2016[
    ["FIPSCode", "JurisdictionName", "State", "A1a", "C1a", "F1a", "D2a", "D2e",]
].copy()

In [13]:
places2016.rename(
    columns={
        "FIPSCode": "fips",
        "State": "state",
        "JurisdictionName": "place",
        "A1a": "total_reg_voters",
        "C1a": "mail_ballots_sent",
        "F1a": "total_votes_cast",
        "D2a": "poll_place_elect_day",
        "D2e": "poll_place_early",
    },
    inplace=True,
)

In [14]:
places2016["year"] = "2016"

---

### 2012

#### Total voters

In [15]:
src_2012a = pd.read_excel("input/2012EAVS_NVRAData.xlsx", dtype={"FIPSCode": str})

In [16]:
src_2012a = src_2012a[["State", "Jurisdiction", "FIPSCode", "QA1a"]].copy()

In [17]:
src_2012a.rename(columns={"QA1a": "total_reg_voters"}, inplace=True)

In [18]:
src_2012a.head()

Unnamed: 0,State,Jurisdiction,FIPSCode,total_reg_voters
0,AK,ALASKA,200000000,579304.0
1,AL,AUTAUGA COUNTY,100100000,37170.0
2,AL,BALDWIN COUNTY,100300000,126703.0
3,AL,BARBOUR COUNTY,100500000,17318.0
4,AL,BIBB COUNTY,100700000,12819.0


#### Absentee ballots

In [19]:
src_2012b = pd.read_excel("input/2012EAVS_UOCAVAData.xls", dtype={"FIPSCode": str})

In [20]:
src_2012b = src_2012b[["State", "Jurisdiction", "FIPSCode", "QB1a"]]

In [21]:
src_2012b.rename(columns={"QB1a": "mail_ballots_sent"}, inplace=True)

In [22]:
src_2012b.head()

Unnamed: 0,State,Jurisdiction,FIPSCode,mail_ballots_sent
0,AK,ALASKA,200000000,11935.0
1,AL,AUTAUGA COUNTY,100100000,75.0
2,AL,BALDWIN COUNTY,100300000,253.0
3,AL,BARBOUR COUNTY,100500000,25.0
4,AL,BIBB COUNTY,100700000,3.0


#### Polling places

In [23]:
src_2012d = pd.read_excel(
    "input/Excel Files-Part 1/Section D.xls", dtype={"FIPSCode": str}
)

In [24]:
src_2012d = src_2012d[["State", "Jurisdiction", "FIPSCode", "QD2a", "QD2e"]]

In [25]:
src_2012d.rename(
    columns={"QD2a": "poll_place_elect_day", "QD2e": "poll_place_early"}, inplace=True
)

In [26]:
src_2012d.head()

Unnamed: 0,State,Jurisdiction,FIPSCode,poll_place_elect_day,poll_place_early
0,AK,ALASKA,200000000,533.0,75.0
1,AL,AUTAUGA COUNTY,100100000,17.0,-999999.0
2,AL,BALDWIN COUNTY,100300000,46.0,-999999.0
3,AL,BARBOUR COUNTY,100500000,17.0,-999999.0
4,AL,BIBB COUNTY,100700000,8.0,-999999.0


#### Votes cast

In [27]:
src_2012f = pd.read_excel("input/Section F.xls", dtype={"FIPSCode": str})

In [28]:
src_2012f = src_2012f[["State", "Jurisdiction", "FIPSCode", "QF1a"]]

In [29]:
src_2012f.rename(columns={"QF1a": "total_votes_cast"}, inplace=True)

In [30]:
src_2012f.head()

Unnamed: 0,State,Jurisdiction,FIPSCode,total_votes_cast
0,AK,ALASKA,200000000,302465.0
1,AL,AUTAUGA COUNTY,100100000,24065.0
2,AL,BALDWIN COUNTY,100300000,85873.0
3,AL,BARBOUR COUNTY,100500000,11534.0
4,AL,BIBB COUNTY,100700000,8454.0


### Merge 'em 

In [31]:
from functools import reduce

In [32]:
src_2012_all = reduce(
    lambda x, y: pd.merge(x, y, on=["State", "FIPSCode"], how="inner"),
    [src_2012a, src_2012b, src_2012f, src_2012d],
)

In [33]:
src_2012_all.rename(
    columns={"Jurisdiction_x": "Jurisdiction", "Jurisdiction_y": "Jurisdiction"},
    inplace=True,
)

In [34]:
src_2012_all = src_2012_all.loc[:, ~src_2012_all.columns.duplicated()]

In [35]:
src_2012_all.head()

Unnamed: 0,State,Jurisdiction,FIPSCode,total_reg_voters,mail_ballots_sent,total_votes_cast,poll_place_elect_day,poll_place_early
0,AK,ALASKA,200000000,579304.0,11935.0,302465.0,533.0,75.0
1,AL,AUTAUGA COUNTY,100100000,37170.0,75.0,24065.0,17.0,-999999.0
2,AL,BALDWIN COUNTY,100300000,126703.0,253.0,85873.0,46.0,-999999.0
3,AL,BARBOUR COUNTY,100500000,17318.0,25.0,11534.0,17.0,-999999.0
4,AL,BIBB COUNTY,100700000,12819.0,3.0,8454.0,8.0,-999999.0


In [36]:
places2012 = src_2012_all.copy()

In [37]:
places2012.rename(
    columns={"FIPSCode": "fips", "State": "state", "Jurisdiction": "place",},
    inplace=True,
)

In [38]:
places2012["year"] = "2012"

In [39]:
places2012.head()

Unnamed: 0,state,place,fips,total_reg_voters,mail_ballots_sent,total_votes_cast,poll_place_elect_day,poll_place_early,year
0,AK,ALASKA,200000000,579304.0,11935.0,302465.0,533.0,75.0,2012
1,AL,AUTAUGA COUNTY,100100000,37170.0,75.0,24065.0,17.0,-999999.0,2012
2,AL,BALDWIN COUNTY,100300000,126703.0,253.0,85873.0,46.0,-999999.0,2012
3,AL,BARBOUR COUNTY,100500000,17318.0,25.0,11534.0,17.0,-999999.0,2012
4,AL,BIBB COUNTY,100700000,12819.0,3.0,8454.0,8.0,-999999.0,2012


---

### Concatenate the dataframes

In [40]:
src = pd.concat([places2020, places2016, places2012]).reset_index(drop=True)

In [41]:
src["fips"] = src["fips"].str.zfill(10)

---

In [42]:
src.to_csv("polling_places_analysis.csv", index=False)