# Outstanding ballots in CA

In [1]:
import pandas as pd
import geopandas as gpd
from altair_saver import save
import altair as alt
import altair_latimes as lat
alt.themes.register("latimes", lat.theme)
alt.themes.enable("latimes")
pd.options.display.max_columns = 50
pd.options.display.max_rows = 34000
pd.set_option('display.max_colwidth', None)

### What day is it? 

In [30]:
now = pd.Timestamp('today')

### Define our metro area

In [2]:
socal = [ 'Riverside','Los Angeles','Ventura','San Bernardino','Orange' ]

---

### Get latest results from Secretary of State

In [3]:
url = 'https://electionresults.sos.ca.gov/returns/status'

In [4]:
tables = pd.read_html(url, parse_dates=True)

### Clean up results

In [5]:
results_src = tables[0].drop([58,59], axis=0)

In [6]:
results = results_src.rename(columns={'County Name':'county', 'TotalPrecincts':'total_pcts','PrecinctsPartiallyReporting *':'precincts_partially','% PartiallyReporting **':'reporting_pct',\
                            'RegisteredVoters':'voters', 'Ballots Cast':'ballots_cast','% Turnout ***':'turnout_pct','FirstReportDate-Time':'first_report','Last Report Date-Time':'last_report','ReportType****':'report_type'})

In [7]:
results.reporting_pct = results.reporting_pct.str.replace('%', '', regex=False)

In [8]:
results.turnout_pct = results.turnout_pct.str.replace('%', '', regex=False)

In [9]:
results[['total_pcts', 'precincts_partially', 'voters', 'ballots_cast', 'turnout_pct']] = \
results[['total_pcts', 'precincts_partially', 'voters', 'ballots_cast', 'turnout_pct']].astype(int)

### Results in our counties

In [10]:
socal_results = results[results['county'].isin(socal)]

In [11]:
socal_results.head()

Unnamed: 0,county,total_pcts,precincts_partially,reporting_pct,voters,ballots_cast,turnout_pct,first_report,last_report,report_type
18,Los Angeles,3383,3383,100.0,5813333,4153568,71,Nov 38:21 p.m.,Nov 94:18 p.m.,U
29,Orange,1795,1795,100.0,1772700,1507977,85,Nov 38:33 p.m.,Nov 95:30 p.m.,U
32,Riverside,864,864,100.0,1241552,622695,50,Nov 38:47 p.m.,Nov 95:44 p.m.,U
35,San Bernardino,2327,2327,100.0,1102687,628806,57,Nov 310:07 p.m.,Nov 94:04 p.m.,U
55,Ventura,695,695,100.0,500442,364308,72,Nov 38:31 p.m.,Nov 54:16 p.m.,U


### Turnout (so far) in SoCal

In [12]:
round(socal_results.ballots_cast.sum() / socal_results.voters.sum()*100,2)

69.77

---

### Download latest outstanding ballots report from Secretary of State

In [13]:
!wget 'https://elections.cdn.sos.ca.gov/statewide-elections/2020-general/unprocessed-ballots-report.pdf' -F -O input/unprocessed-ballots-report.pdf

--2020-11-10 09:13:25--  https://elections.cdn.sos.ca.gov/statewide-elections/2020-general/unprocessed-ballots-report.pdf
Resolving elections.cdn.sos.ca.gov (elections.cdn.sos.ca.gov)... 204.147.113.115
Connecting to elections.cdn.sos.ca.gov (elections.cdn.sos.ca.gov)|204.147.113.115|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 117783 (115K) [application/pdf]
Saving to: ‘input/unprocessed-ballots-report.pdf’


2020-11-10 09:13:26 (1.04 MB/s) - ‘input/unprocessed-ballots-report.pdf’ saved [117783/117783]



### Export text from PDF and read into dataframe

In [14]:
!PDFtoText -layout 'input/unprocessed-ballots-report.pdf'

In [15]:
src = pd.read_fwf('input/unprocessed-ballots-report.txt', skiprows=6, skipfooter=7, infer_nrows=60, header=None).dropna(axis=1)

### When was this updated? 

In [31]:
updated_date = pd.read_fwf('input/unprocessed-ballots-report.txt', skiprows=69, skipfooter=1, header=None, infer_nrows=3)

In [32]:
updated = updated_date.drop([0,1,4], axis=1).rename(columns={2:'date', 3:'time'})

In [33]:
updated

Unnamed: 0,date,time
0,11/09/20,5:00


---

### Clean up headers, parse columns

In [19]:
outstanding = src.rename(columns={0:'county', 1:'mail_out',4:'provisional_out',7:'cond_prov_out',8:'other_out',11:'total_out', 13:'updated'})

In [20]:
outstanding.county = outstanding.county.str.replace('**', '', regex=False)

In [21]:
str_cols = ['mail_out','provisional_out','cond_prov_out','other_out','total_out']
outstanding[str_cols] = outstanding[str_cols].replace(',', '', regex=True)

In [22]:
outstanding[str_cols] = outstanding[str_cols].astype(int)

In [23]:
outstanding.updated = pd.to_datetime(outstanding.updated)

### Outstanding in SoCal

In [29]:
outstanding[outstanding['county'].isin(socal)]

Unnamed: 0,county,mail_out,provisional_out,cond_prov_out,other_out,total_out,updated
18,Los Angeles,512600,2000,87000,9200,610800,2020-11-06 16:28:00
29,Orange,7564,0,23686,1826,33076,2020-11-07 17:02:00
32,Riverside,272000,0,25000,0,297000,2020-11-07 17:08:00
35,San Bernardino,210000,19000,0,600,229600,2020-11-09 16:04:00
55,Ventura,25631,467,2520,4292,32910,2020-11-05 14:46:00


### Export to csv

In [26]:
outstanding.to_csv('output/outstanding/' + f'counties_outstanding_ballots_{now:%m%d%Y_%H:%M}.csv', index=False)
results.to_csv('output/results/' + f'results_{now:%m%d%Y_%H:%M}.csv', index=False)

---

### Aggregate

In [27]:
totals = outstanding.agg({'mail_out':sum, 'total_out':sum}).reset_index(name='count').rename(columns={'index':'type'})

In [28]:
totals

Unnamed: 0,type,count
0,mail_out,2326430
1,total_out,2740845


---

## Geography tk