In [1]:
import geopandas as gp
import pandas as pd
import os
import maup
from warnings import simplefilter
simplefilter(action="ignore", category=pd.errors.PerformanceWarning)
simplefilter(action='ignore', category=FutureWarning)
wd = os.getcwd()

# Sonoma_County_Turnout_10_13_2022

## Background:
- We received a request for "CVAP not yet registered to vote in Sonoma County CA" from a user for "nonpartisan letter writing urging them to register to vote".
- RDH does not have information of individuals who are not registered to vote, but we can provide information regarding how many people are registered at various geographies.
- The user was also interested in turnout statistics from the 2020 general election, as well as other demographic statistics.
- We used data at the block level, as it is the most granular data we can work with, and then aggregated the data to the precinct level, since this is the geography which the user was interested in.

## Approach:
- Use RDH datasests of Disaggregated 2020 CVAP to the Block Level, 2020 Voter Turnout at the 2020 Block Level, L2 Voter File Aggregated to 2020 Census Blocks, and VEST 2020 Precinct Boundaries with Election Results (all for California).
- Query out data for Sonoma County and join together to have block level datasets.
- Aggregate block level data to the precinct level using [maup library](https://github.com/mggg/maup)
- Create unregistered voter estimate counts at the precinct level by subtracting the aggregated total registered voters from the aggregated Citizen Voting Age Population (CVAP). The percentage of unregistered voters estimate is 1 minus the total registered voters divided by the total CVAP population at the precinct level. 
- Please note the unregistered voter estimates are *estimates* based on the method described above.

## Links to datasets used:
- [2021 California L2 Voter File Aggregated to 2020 Census Blocks](https://redistrictingdatahub.org/dataset/2021-california-l2-voter-file-aggregated-to-2020-census-blocks/)
- [2020 CA L2 Voterfile Elections Turnout Statistics Aggregated to Census Blocks](https://redistrictingdatahub.org/dataset/2020-ca-l2-voterfile-elections-turnout-statistics-aggregated-to-census-blocks/)
- [California CVAP Data Disaggregated to the 2020 Block Level (2020)](https://redistrictingdatahub.org/dataset/california-cvap-data-disaggregated-to-the-2020-block-level-2020/)
- [VEST 2020 California precinct and election results](https://redistrictingdatahub.org/dataset/vest-2020-california-precinct-and-election-results/) via Voting and Election Science Team [(VEST)](https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/K7760H)

You can find the final file on the Redistricting Data Hub website here: https://redistrictingdatahub.org/dataset/sonoma-county-california-2020-voter-statistics-and-unregistered-voter-estimates-on-2020-precincts/

*Please note that in order to run this notebook you would need the files listed above downloaded to you working directory.*

Read in CVAP file and query out for Sonoma County (FIPS 097)

In [2]:
cvap = gp.read_file(os.path.join(os.path.join(wd,'ca_cvap_2020_2020_b'),'ca_cvap_2020_2020_b.shp'))
cvap['COUNTYFP20'] = cvap['COUNTYFP20'].apply(lambda x: str(x).zfill(3))
cvap_sc = cvap[cvap['COUNTYFP20']=='097']

Read in Voter File and query out for Sonoma County (FIPS 097)

In [3]:
vf = pd.read_csv(os.path.join(os.path.join(wd,'CA_L2_2020BlockAgg'),'CA_l2_2020block_agg_20210902.csv'))
vf['geoid20'] = vf['geoid20'].apply(lambda x: str(x).zfill(15))
vf['COUNTYFP20']=vf['geoid20'].apply(lambda x: str(x)[2:5])
vf_sc = vf[vf['COUNTYFP20'] == '097'].copy(deep=True)
vf_sc.rename(columns = {'geoid20':'GEOID20'},inplace=True)

Read in Turnout Data and query out for Sonoma County (FIPS 097)

In [4]:
to = pd.read_csv(os.path.join(os.path.join(wd,'CA_l2_turnout_2020blockAgg'),'CA_l2_turnout_stats_block20.csv'))
to['geoid20'] = to['geoid20'].apply(lambda x: str(x).split('.')[0].zfill(15))
to['COUNTYFP20']  =  to['geoid20'].apply(lambda x: x[2:5])
to_sc= to[to['COUNTYFP20']=='097'].copy(deep=True)
to_sc.rename(columns = {'geoid20':'GEOID20'},inplace=True)

Read in 2020 Precinct shapefile for California and query out for Sononma County (FIPS 097)

In [5]:
prec = gp.read_file(os.path.join(os.path.join(wd,'ca_2020_clean'),'ca_2020_clean.shp'))
prec_sc = prec[prec['FIPS_CODE'] == '6097']

Join and clean block level data

In [6]:
vf_sc.drop(columns = 'COUNTYFP20',inplace=True)
to_sc.drop(columns = ['COUNTYFP20','total_reg'],inplace=True)

joined_block = pd.merge(cvap_sc,vf_sc,on='GEOID20',how='outer',indicator=False)
joined_block = pd.merge(joined_block,to_sc,on='GEOID20',how='outer',indicator=False)
joined_block =  joined_block.fillna(0)
joined_block['UNREG_EST_COUNT']= round(joined_block['CVAP_TOT20'] - joined_block['total_reg'],2)
joined_block = joined_block.fillna(0)
display(joined_block[['GEOID20','CVAP_TOT20','total_reg','UNREG_EST_COUNT']])

Unnamed: 0,GEOID20,CVAP_TOT20,total_reg,UNREG_EST_COUNT
0,060971503053007,65.18,47.0,18.18
1,060971539023014,28.68,7.0,21.68
2,060971537103019,35.02,15.0,20.02
3,060971514041008,31.31,18.0,13.31
4,060971541002033,14.26,10.0,4.26
...,...,...,...,...
7558,060971537121015,182.49,86.0,96.49
7559,060971508003002,9.51,15.0,-5.49
7560,060971530054011,0.00,0.0,0.00
7561,060971543041168,0.00,0.0,0.00


Run maup to assign blocks to precincts and aggregate data

In [7]:
variables = list(joined_block.columns)
variables.remove('GEOID20')
variables.remove('COUNTYFP20')
variables.remove('geometry')
for i in variables:
    joined_block[i]= joined_block[i].astype(float)

joined_block = joined_block.to_crs(prec_sc.crs)
display(joined_block.head(1))
assignment = maup.assign(joined_block, prec_sc)

sonoma_precs = prec_sc.copy(deep=True)
sonoma_precs[variables]=joined_block[variables].groupby(assignment).sum()

Unnamed: 0,GEOID20,COUNTYFP20,CVAP_TOT20,CVAP_NHS20,CVAP_AIA20,CVAP_ASN20,CVAP_BLK20,CVAP_NHP20,CVAP_WHT20,CVAP_AIW20,...,pp20200303_voted_esa,pp20200303_reg_esa,pp20200303_pct_voted_esa,pp20200303_voted_oth,pp20200303_reg_oth,pp20200303_pct_voted_oth,pp20200303_voted_unk,pp20200303_reg_unk,pp20200303_pct_voted_unk,UNREG_EST_COUNT
0,60971503053007,97,65.18,38.83,0.0,0.77,19.09,0.0,27.41,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,1.0,18.18


Crete unregistered estimate and clean columns

In [8]:
sonoma_precs['UNREG_EST_PERCENT'] =round((1-(sonoma_precs['total_reg']/sonoma_precs['CVAP_TOT20']))*100,2)
sonoma_precs = sonoma_precs.fillna(0)
prec_sc_order = list(sonoma_precs.columns)
prec_sc_order.remove('UNREG_EST_PERCENT')
prec_sc_order.remove('total_reg')
prec_sc_order.remove('CVAP_TOT20')
prec_sc_order.remove('UNREG_EST_COUNT')
prec_sc_order.remove('g20201103_voted_all')
prec_sc_order.remove('COUNTY')
prec_sc_order.remove('SRPREC_KEY')
prec_sc_order.remove('geometry')
sonoma_precs['SRPREC_KEY'] = sonoma_precs['SRPREC_KEY'].apply(lambda x: str(x).zfill(9))
front_cols = ['COUNTY','SRPREC_KEY','CVAP_TOT20','total_reg','UNREG_EST_COUNT','UNREG_EST_PERCENT','g20201103_voted_all']

long_cols = front_cols + prec_sc_order
prec_long = sonoma_precs[long_cols]
prec_short= sonoma_precs[front_cols]
display(prec_long.head(1))
display(prec_short.head(1))

Unnamed: 0,COUNTY,SRPREC_KEY,CVAP_TOT20,total_reg,UNREG_EST_COUNT,UNREG_EST_PERCENT,g20201103_voted_all,CNTY_CODE,FIPS_CODE,SRPREC,...,pp20200303_pct_voted_aa,pp20200303_voted_esa,pp20200303_reg_esa,pp20200303_pct_voted_esa,pp20200303_voted_oth,pp20200303_reg_oth,pp20200303_pct_voted_oth,pp20200303_voted_unk,pp20200303_reg_unk,pp20200303_pct_voted_unk
18651,Sonoma,60975112,650.48,330.0,320.48,49.27,237.0,49,6097,5112,...,0.0,1.0,2.0,1.0,3.0,3.0,2.0,4.0,11.0,1.6667


Unnamed: 0,COUNTY,SRPREC_KEY,CVAP_TOT20,total_reg,UNREG_EST_COUNT,UNREG_EST_PERCENT,g20201103_voted_all
18651,Sonoma,60975112,650.48,330.0,320.48,49.27,237.0


Validate data

In [9]:
non_matching = []
for col in list(prec_long.columns):
    for b_col in list(joined_block.columns):
        if col ==  b_col:
            prec_sum = round(prec_long[col].sum())
            b_sum = round(joined_block[b_col].sum())
            if prec_sum!=b_sum:
                print('Totals do not match for ', col)
                print('Precinct sum: ', prec_sum)
                print('Block sum: ', b_sum)
                print('')
                non_matching.append(col)
if len(non_matching)!=0:
    print('These columns are mismatched: ',', '.join(non_matching))
else:
    print('Column totals are validated!')

Column totals are validated!


Extract data

In [10]:
prec_short.to_csv('sonoma_county_turnout_reg_short.csv',index=False)
prec_long.to_csv('sonoma_county_turnout_reg_long.csv',index=False)