# Early voting statistics in battleground states

In [1]:
import tabula 
import json
import os
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
from pandas.io.json import json_normalize
import bs4 as BeautifulSoup
from altair_saver import save
from altair import datum
import altair as alt
import altair_latimes as lat
import re
import urllib
from bs4 import BeautifulSoup
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 34000
pd.set_option('display.max_colwidth', None)

### North Carolina

In [2]:
url = 'https://electproject.github.io/Early-Vote-2020G/NC.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/NC.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[5].string

In [3]:
nc_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

KeyError: 'data'

In [None]:
nc_counties = nc_counties.rename(columns={0:'county', 1:'mail_returned', 2:'mail_rejected', 3:'rejected_pct'}).fillna('0')

In [None]:
nc_counties['rejected_pct'] = nc_counties['rejected_pct']*100

In [None]:
nc_counties['state'] = 'North Carolina'
nc_counties['fips'] = '37'

In [None]:
nc_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = nc_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [None]:
nc_counties.sort_values('rejected_pct', \
                        ascending=False).to_csv('output/rejected/nc_counties.csv', index=False)

In [None]:
df_nc = pd.read_html(url)

#### Summary tables for mail ballots, by race and party

In [None]:
nc_rejected_party = df_nc[11]
nc_rejected_party.rename(columns={ 'Party':'party','Rejected Ballots':'rejected','Freq. Distribution':'electorate_share','All Returned Ballots':'ballots', 'Rejection Rate':'reject_rate' })

In [None]:
nc_rejected_race = df_nc[12]
nc_rejected_race.rename(columns={ 'Race/Ethnicity':'race/ethnicity','Rejected Ballots':'rejected','Freq. Distribution':'electorate_share','All Returned Ballots':'ballots', 'Rejection Rate':'reject_rate' })

---

## Pennsylvania

#### Read data

In [None]:
df_pa = pd.read_html('https://electproject.github.io/Early-Vote-2020G/PA.html')

#### Summary table for all mail ballots

In [None]:
df_pa[0]

#### Get detailed data for counties

In [None]:
url = 'https://electproject.github.io/Early-Vote-2020G/PA.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/PA.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[3].string

In [None]:
pa_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [None]:
pa_counties = pa_counties.rename(columns={0:'county', 1:'mail_returned', 2:'mail_rejected', 3:'rejected_pct'}).fillna('0')

In [None]:
pa_counties['rejected_pct'] = pa_counties['rejected_pct']*100

In [None]:
pa_counties['state'] = 'Pennsylvania'
pa_counties['fips'] = '42'

In [None]:
pa_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = pa_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [None]:
pa_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/pa_counties.csv', index=False)

---

### Iowa

In [None]:
ia_url = 'https://electproject.github.io/Early-Vote-2020G/IA.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get(ia_url)
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[3].string

#### Summary table for all mail ballots

In [None]:
df_ia = pd.read_html(ia_url)

In [None]:
df_ia[0]

#### Get counties

In [None]:
ia_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [None]:
ia_counties = ia_counties.rename(columns={0:'county', 1:'mail_returned', 2:'mail_rejected', 3:'rejected_pct'}).fillna('0')

In [None]:
ia_counties['rejected_pct'] = ia_counties['rejected_pct']*100

In [None]:
ia_counties['state'] = 'Iowa'
ia_counties['fips'] = '19'

In [None]:
ia_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = ia_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [None]:
ia_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/ia_counties.csv', index=False)

In [None]:
ia_counties.sort_values('rejected_pct', ascending=False).head()

---

### Michigan

In [None]:
url = 'https://electproject.github.io/Early-Vote-2020G/MI.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/MI.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[3].string

In [None]:
mi_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [None]:
mi_counties = mi_counties.rename(columns={0:'county', 1:'mail_rejected', 2:'mail_returned', 3:'rejected_pct'}).fillna('0')

In [None]:
mi_counties['rejected_pct'] = mi_counties['rejected_pct']*100

In [None]:
mi_counties['state'] = 'Michigan'
mi_counties['fips'] = '26'

In [None]:
mi_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = mi_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [None]:
mi_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/mi_counties.csv', index=False)

In [None]:
mi_counties.sort_values('rejected_pct', ascending=False).head()

#### Michigan manual

In [None]:
mi_manual = 1856

---

### Georgia

In [4]:
url = 'https://electproject.github.io/Early-Vote-2020G/GA.html'
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome('/Users/mhustiles/Desktop/chromedriver', options=options)
driver.get("https://electproject.github.io/Early-Vote-2020G/GA.html")
html_table = driver.page_source
soup = BeautifulSoup(html_table)
json_data = soup.find_all("script", type='application/json')[5].string

In [5]:
ga_tables = pd.read_html('https://electproject.github.io/Early-Vote-2020G/GA.html')

In [6]:
ga_rejection_race = ga_tables[10]

In [7]:
ga_rejection_race

Unnamed: 0,Race/Ethnicity,Rejected Ballots,Freq. Distribution,Returned Ballots (All),Rejection Rate
0,Non-Hispanic White,649,33.1,699219,0.1
1,Non-Hispanic Black,908,46.3,400572,0.2
2,Hispanic,71,3.6,34747,0.2
3,Non-Hispanic Asian American,90,4.6,49375,0.2
4,Non-Hispanic Native American,3,0.2,1940,0.2
5,Other/Multiple/Unknown,239,12.2,134499,0.2
6,TOTAL,1960,100.0,1320352,0.1


In [8]:
ga_rejection_race['clean_rate'] = (ga_rejection_race['Rejected Ballots'] / ga_rejection_race['Returned Ballots (All)'])*100

In [9]:
ga_counties = pd.DataFrame(json.loads(json_data)['x']['data']).transpose()

In [10]:
ga_counties = ga_counties.rename(columns={0:'county', 1:'mail_rejected', 2:'mail_returned', 3:'rejected_pct'}).fillna('0')

In [11]:
ga_counties['rejected_pct'] = ga_counties['rejected_pct']*100

In [12]:
ga_counties['state'] = 'Georgia'
ga_counties['fips'] = '13'

In [13]:
ga_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = ga_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [14]:
ga_counties = ga_counties[[ 'county', 'mail_returned', 'mail_rejected', 'rejected_pct', 'state', 'fips']]

In [15]:
ga_counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/ga_counties.csv', index=False)

In [16]:
ga_counties.sort_values('rejected_pct', ascending=False).head()

Unnamed: 0,county,mail_returned,mail_rejected,rejected_pct,state,fips
29,CLAY,414.0,9.0,2.173913,Georgia,13
23,CHARLTON,703.0,8.0,1.13798,Georgia,13
127,STEWART,488.0,5.0,1.02459,Georgia,13
87,LEE,2509.0,23.0,0.9167,Georgia,13
139,TREUTLEN,586.0,5.0,0.853242,Georgia,13


---

### Nevada

In [None]:
### Download https://www.nvsos.gov/sos/home/showdocument?id=9060

In [None]:
# !curl -o nevada.pdf 'https://www.nvsos.gov/sos/home/showdocument?id=9060'

In [None]:
nv_need_cure = pd.read_csv('input/tabula-2020GeneralElectionSignatu.csv', header=None, names=[ 'county', 'mail_rejected', 'rejected_pct', 'dem', 'rep', 'other' ])

In [None]:
nv_need_cure['mail_rejected'] =  nv_need_cure['mail_rejected'].str.replace(' ,', '').str.replace(' ', '').replace('-', '0')
nv_need_cure['rejected_pct'] =  nv_need_cure['rejected_pct'].str.replace('%', '')

In [None]:
nv_need_cure[['dem', 'rep', 'other']] =  nv_need_cure[['dem', 'rep', 'other']].replace('-', '0')

In [None]:
nv_need_cure['state'] = 'Nevada'
nv_need_cure['fips'] = '32'

In [None]:
nv_mail_totals = pd.read_csv('input/tabula-2020GeneralElectionMailBal.csv', header=None)

In [None]:
nv_mail_totals = nv_mail_totals[[0,11]].rename(columns={0:'county', 11:'mail_returned'})

In [None]:
nv_mail_totals['mail_returned'] = nv_mail_totals['mail_returned'].str.replace(' ', '').str.replace('%', '').str.replace(',', '')
nv_mail_totals['county'] = nv_mail_totals['county'].str.replace('*', '', regex=None)

In [None]:
nv_counties_parties = pd.merge(nv_mail_totals, nv_need_cure, on='county')

In [None]:
nv_counties = nv_counties_parties.drop(['dem', 'rep', 'other'], axis=1)

In [None]:
nv_counties[['mail_returned', 'mail_rejected', 'rejected_pct']] = nv_counties[['mail_returned', 'mail_rejected', 'rejected_pct']].astype(float)

In [None]:
nv_counties.sort_values('rejected_pct', ascending=False).head()

---

## Florida

#### Broward County

In [None]:
url = 'https://www.browardsoe.org/'

In [None]:
# flraw = pd.read_html(url)

In [None]:
# fl_broward = pd.DataFrame(flraw[1]).drop([0, 5], axis=0)

In [None]:
# fl_broward = fl_broward.rename(columns={0:'cause', 1: 'count'})

In [None]:
# fl_broward['count'] = fl_broward['count'].astype(int)

#### How many rejected in Broward County? 

In [None]:
# fl_broward['count'].sum()

#### How many rejected in Miami-Dade? 

In [None]:
fl_miami = 2583

In [None]:
fl_tampa = 463

In [None]:
florida_sum = fl_miami + fl_tampa

#### How many total from our two counties in Florida? 

In [None]:
florida_sum

In [None]:
fl_tables = pd.read_html('https://electproject.github.io/Early-Vote-2020G/FL.html')

In [None]:
fl_mail = fl_tables[2]

In [None]:
fl_mail_sum = fl_mail.iloc[4,1]

In [None]:
florida_state = pd.DataFrame(columns = ['state', 'mail_returned', 'mail_rejected', 'rejected_pct'])

In [None]:
florida_state = florida_state.append({'state' : 'Florida*', 'mail_rejected' : florida_sum, }, ignore_index = True).fillna('')

In [None]:
florida_state['mail_returned'] = fl_mail_sum

In [None]:
florida_state['rejected_pct'] = round((florida_state['mail_rejected'] / florida_state['mail_returned'] )*100, 2)

In [None]:
florida_state.head()

---

## Aggregates

In [None]:
counties = pd.concat([ga_counties, mi_counties, ia_counties, pa_counties, nc_counties, nv_counties])

In [None]:
counties.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/all_counties.csv', index=False)

#### Manual labor *sigh* needed for future join

In [None]:
counties['county'] = counties['county'].str.upper()

In [None]:
counties['county'] = counties['county'].str.replace(' COUNTY', '')

In [None]:
states = counties.groupby('state').agg({'mail_returned':'sum', 'mail_rejected': 'sum', }).reset_index()

In [None]:
states.loc[((states['state'] == 'Michigan') & (states['mail_rejected'] < mi_manual)), 'mail_rejected'] = mi_manual

In [None]:
states = states.append(florida_state, ignore_index=True)

In [None]:
states['rejected_pct'] = round((states['mail_rejected'] / states['mail_returned'] )*100, 2)

#### How many returned nationwide?

In [None]:
states.mail_rejected.sum()

In [None]:
states.to_csv('output/states_mail_rejected.csv', index=False)

In [None]:
states.style.format({'mail_returned': "{:,.0f}", 'mail_rejected': "{:,.0f}", 'rejected_pct': '{:.2f}%'})

In [None]:
states.head(10)

#### Add FIPS codes

In [None]:
fips_counties = pd.read_csv('/Users/mhustiles/data/data/fips_counties.csv', dtype={'geoid':str}).drop(['Unnamed: 0'], axis=1)

In [None]:
fips_counties['county'] = fips_counties['county'].str.upper()

#### Merge after the manual labor under the hood

In [None]:
fips_merge = pd.merge(counties, fips_counties, left_on=['county','state'], right_on = ['county','state'])

In [None]:
fips_merge.head()

In [None]:
fips_merge.sort_values('rejected_pct', ascending=False).to_csv('output/rejected/fips_merge_all_counties.csv', index=False)

---

## Geography

In [None]:
our_fips = list(counties.fips.unique())

In [None]:
counties_geo = gpd.read_file('/Users/mhustiles/data/data/GIS/counties_lower48_small.json')

In [None]:
our_counties_geo = counties_geo[counties_geo['statefp'].isin(our_fips)]

In [None]:
fips_merge.dtypes

#### Merge geo with out voting counties

In [None]:
lat_counties_geo = our_counties_geo.merge(fips_merge, on='geoid')

In [None]:
lat_counties_geo['above_2018_rate'] = lat_counties_geo['rejected_pct'] > 1.42

### Plot our counties

In [None]:
lat_counties_geo.to_file('output/gis/lat_counties_geo.geojson', driver='GeoJSON')

In [None]:
for f in lat_counties_geo.statefp.unique():
    lat_counties_geo[lat_counties_geo['statefp'] == f].plot()

In [None]:
for f in lat_counties_geo.statefp.unique():
    lat_counties_geo[lat_counties_geo['statefp'] == f].to_file('output/gis/' + f + '.geojson', driver='GeoJSON')

---

In [18]:
ga_statewide = pd.read_csv('/Users/mhustiles/Desktop/35209/STATEWIDE.csv', low_memory=False, encoding='Latin-1')

In [19]:
ga_statewide.head()

Unnamed: 0,County,Voter Registration #,Last Name,First Name,Middle Name,Suffix,Street #,Street Name,Apt/Unit,City,State,Zip Code,Mailing Street #,Mailing Street Name,Mailing Apt/Unit,Mailing City,Mailing State,Mailing Zip Code,Application Status,Ballot Status,Status Reason,Application Date,Ballot Issued Date,Ballot Return Date,Ballot Style,Ballot Assisted,Challenged/Provisional,ID Required,Municipal Precinct,County Precinct,CNG,SEN,HOUSE\t,JUD,Combo #,Vote Center ID,Ballot ID,Post #,Party
0,APPLING,4670838,HUGHES,MABLE,J,,623,RED DOT RD,APT 1,BAXLEY,GA,31513,623.0,RED DOT RD,APT 1,BAXLEY,GA,31513,A,A,,04/09/2020,09/18/2020,10/15/2020,MAILED,NO,NO,NO,,2,12,19,156,BRUN,104,,87.0,688.0,
1,APPLING,651532,HARPER,RANZE,N,,1602,DUPE HERNDON RD,,SURRENCY,GA,31563-2912,1602.0,DUPE HERNDON RD,,SURRENCY,GA,31563-2912,A,C,Mailed Ballot Surrendered to Vote In-Person,04/09/2020,09/18/2020,10/13/2020,MAILED,NO,NO,NO,,4B,12,19,178,BRUN,116,,32.0,547.0,
2,APPLING,650195,NEWKIRK,CLISTEN,,,133,POOR ROBIN RD,,BAXLEY,GA,31513-5100,,PO BOX 519,,BAXLEY,GA,31515-0051,A,C,Voter Turned in Ballot at The Polls,10/13/2020,10/13/2020,10/13/2020,IN PERSON,NO,NO,NO,,2,12,19,156,BRUN,104,1021.0,571.0,2591.0,
3,APPLING,4502220,ROBERTS,JENNIFER,GRACE,,670,GRAHAM ZOAR RD,,BAXLEY,GA,31513,670.0,GRAHAM ZOAR RD,,BAXLEY,GA,31513,A,A,,10/30/2020,10/30/2020,10/30/2020,IN PERSON,NO,NO,NO,GRAHA,5B,12,19,156,BRUN,121,1021.0,4588.0,6788.0,
4,APPLING,8131824,NAILS,VERNON,DEON,,3452,PINEY BLUFF RD,,BAXLEY,GA,31513,3452.0,PINEY BLUFF RD,,BAXLEY,GA,31513,A,A,,10/29/2020,10/29/2020,10/29/2020,IN PERSON,NO,NO,NO,,1B,12,19,156,BRUN,102,1021.0,4181.0,6373.0,


In [28]:
ga_status_c = ga_statewide[ga_statewide['Ballot Status'] == 'C']

In [29]:
ga_status_c['Status Reason'].value_counts()

Mailed Ballot Surrendered to Vote In-Person                 65907
Ballot was Undelivered                                      48877
Voter Requested                                             48026
Voter Turned in Ballot at The Polls                         25835
VOTED IN PERSON                                             23140
VIP                                                          9135
Administrative Cancellation                                  7513
VOTE IN PERSON                                               5713
VIO                                                          5396
IN PERSON EARLY VOTING                                       3968
VOTED AT POLLS                                               3836
VOTING IN PERSON                                             3657
VOTED AIP                                                    2721
EARLY VOTING                                                 2507
IN PERSON                                                    2458
VOTED ON B