# Early voting statistics in battleground states

In [1]:
import json
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
from pandas.io.json import json_normalize
import bs4 as BeautifulSoup
from altair_saver import save
from altair import datum
import altair as alt
import altair_latimes as lat
import re
import urllib
from bs4 import BeautifulSoup
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 34000
pd.set_option('display.max_colwidth', None)

### North Carolina

In [35]:
url = 'https://electproject.github.io/Early-Vote-2020G/NC.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/NC.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[5].string

In [36]:
nc_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [37]:
nc_counties = nc_counties.rename(columns={0:'county', 1:'mail_returned', 2:'mail_rejected', 3:'rejected_pct'}).fillna('0')

In [38]:
nc_counties['rejected_pct'] = nc_counties['rejected_pct']*100

In [39]:
nc_counties['state'] = 'NC'

In [40]:
nc_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = nc_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [49]:
nc_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/nc_counties.csv', index=False)

In [50]:
df_nc = pd.read_html(url)

#### Summary tables for mail ballots, by race and party

In [47]:
nc_rejected_party = df_nc[11]
nc_rejected_party.rename(columns={ 'Party':'party','Rejected Ballots':'rejected','Freq. Distribution':'electorate_share','All Returned Ballots':'ballots', 'Rejection Rate':'reject_rate' })

Unnamed: 0,party,rejected,electorate_share,ballots,reject_rate
0,Democrats,4067,51.5,411439,1.0
1,Republicans,1447,18.3,179782,0.8
2,Minor,36,0.5,4330,0.8
3,No Party Affiliation,2348,29.7,296311,0.8
4,TOTAL,7898,100.0,891862,0.9


In [48]:
nc_rejected_race = df_nc[12]
nc_rejected_race.rename(columns={ 'Race/Ethnicity':'race/ethnicity','Rejected Ballots':'rejected','Freq. Distribution':'electorate_share','All Returned Ballots':'ballots', 'Rejection Rate':'reject_rate' })

Unnamed: 0,race/ethnicity,rejected,electorate_share,ballots,reject_rate
0,Non-Hispanic White,3468,43.9,611241,0.6
1,Non-Hispanic Black,2572,32.6,136449,1.9
2,Hispanic,422,5.3,20670,2.0
3,Non-Hispanic Asian American,360,4.6,23103,1.6
4,Non-Hispanic Native American,100,1.3,3353,3.0
5,Other/Multiple/Unknown,976,12.4,97031,1.0
6,TOTAL,7898,100.0,891862,0.9


---

## Pennsylvania

#### Read data

In [None]:
df_pa = pd.read_html('https://electproject.github.io/Early-Vote-2020G/PA.html')

#### Summary table for mail ballots

In [None]:
df_pa[0]

#### Get detailed data for counties

In [59]:
url = 'https://electproject.github.io/Early-Vote-2020G/PA.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/PA.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[3].string

In [60]:
pa_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [61]:
pa_counties = pa_counties.rename(columns={0:'county', 1:'mail_returned', 2:'mail_rejected', 3:'rejected_pct'}).fillna('0')

In [62]:
pa_counties['rejected_pct'] = pa_counties['rejected_pct']*100

In [63]:
pa_counties['state'] = 'PA'

In [64]:
pa_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = pa_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [65]:
pa_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/pa_counties.csv', index=False)

---

### Iowa

In [69]:
url = 'https://electproject.github.io/Early-Vote-2020G/IA.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/IA.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[3].string

In [70]:
ia_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [71]:
ia_counties = ia_counties.rename(columns={0:'county', 1:'mail_returned', 2:'mail_rejected', 3:'rejected_pct'}).fillna('0')

In [72]:
ia_counties['rejected_pct'] = ia_counties['rejected_pct']*100

In [73]:
ia_counties['state'] = 'IA'

In [74]:
ia_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = ia_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [75]:
ia_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/ia_counties.csv', index=False)

---

### Michigan

In [99]:
url = 'https://electproject.github.io/Early-Vote-2020G/MI.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/MI.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[3].string

In [100]:
mi_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [101]:
mi_counties = mi_counties.rename(columns={0:'county', 1:'mail_returned', 2:'mail_rejected', 3:'rejected_pct'}).fillna('0')

In [102]:
mi_counties['rejected_pct'] = mi_counties['rejected_pct']*100

In [103]:
mi_counties['state'] = 'MI'

In [104]:
mi_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = mi_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [105]:
mi_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/mi_counties.csv', index=False)

---

### Georgia

In [125]:
url = 'https://electproject.github.io/Early-Vote-2020G/GA.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/GA.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[5].string

In [126]:
ga_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [127]:
ga_counties = ga_counties.rename(columns={0:'county', 1:'mail_returned', 2:'mail_rejected', 3:'rejected_pct'}).fillna('0')

In [128]:
ga_counties['rejected_pct'] = ga_counties['rejected_pct']*100

In [129]:
ga_counties['state'] = 'GA'

In [130]:
ga_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = ga_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [131]:
ga_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/ga_counties.csv', index=False)

---

## Aggregate

In [132]:
counties = pd.concat([ga_counties, mi_counties, ia_counties, pa_counties, nc_counties])

In [140]:
counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/all_counties.csv', index=False)