In [1]:
import geopandas as gpd
import pandas as pd
import requests

In [2]:
CLARITY_BASE_URL = 'https://results.enr.clarityelections.com/PA/Cambria/116141/%s'

CLARITY_HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36'
}

HD72_MUNICIPALITIES = pd.DataFrame(
    columns=['muni_name', 'formatted_muni_name'],
    data=[
    ['JOHNSTOWN', 'JOHNSTOWN CITY'],
    ['BLACKLICK', 'BLACKLICK TWP'],
    ['CAMBRIA', 'CAMBRIA TWP'],
    ['CONEMAUGH', 'CONEMAUGH TWP'],
    ['CROYLE', 'CROYLE TWP'],
    ['EAST TAYLOR', 'EAST TAYLOR TWP'],
    ['JACKSON', 'JACKSON TWP'],
    ['LOWER YODER', 'LOWER YODER TWP'],
    ['MIDDLE TAYLOR', 'MIDDLE TAYLOR TWP'],
    ['UPPER YODER', 'UPPER YODER TWP'],
    ['WEST TAYLOR', 'WEST TAYLOR TWP'],
    ['BROWNSTOWN', 'BROWNSTOWN BORO'],
    ['DAISYTOWN', 'DAISYTOWN BORO'],
    ['DALE', 'DALE BORO'],
    ['EAST CONEMAUGH', 'EAST CONEMAUGH BORO'],
    ['EBENSBURG', 'EBENSBURG BORO'],
    ['EHRENFELD', 'EHRENFELD BORO'],
    ['FRANKLIN', 'FRANKLIN BORO'],
    ['LORAIN', 'LORAIN BORO'],
    ['NANTY GLO', 'NANTY GLO BORO'],
    ['SOUTHMONT', 'SOUTHMONT BORO'],
    ['SUMMERHILL', 'SUMMERHILL BORO'],
    ['VINTONDALE', 'VINTONDALE BORO'],
    ['WESTMONT', 'WESTMONT BORO']
])

# Generate GeoJSON map

### Standardize township class, limit municipalities to those in the district

In [3]:
munis = gpd.read_file('./input/municipalities-pa.geojson')

munis.loc[munis.CLASS_OF_M.isin(['1TWP', '2TWP']), 'CLASS_OF_M'] = 'TWP'
munis['muni_name'] = munis.MUNICIPAL1 + ' ' + munis.CLASS_OF_M

munis = munis[(munis.FIPS_COUNT == '021') & (munis.muni_name.isin(HD72_MUNICIPALITIES.formatted_muni_name))]
munis.to_file('./output/municipalities-hd72.geojson', driver='GeoJSON')

# Generate election results by municipality

### Process 2020 presidential election, via DRA

In [4]:
# Read in raw results from DRA, limit to district
results_2020 = pd.read_csv('./input/results_2020_DRA.csv', usecols=['GEOID20', 'District', 'Total_2020_Pres', 'Dem_2020_Pres', 'Rep_2020_Pres'])
results_2020 = results_2020[results_2020.District == 72]

# Merge in VTD data as way to get at the municipality
vtd = gpd.read_file('./input/cambria_vtd/cambria_vtd.shp')
results_2020 = pd.merge(results_2020, vtd[['GEOID20', 'NAME20']], on='GEOID20')

# Remove VTD artifacts from municipality name
results_2020['muni_name'] = results_2020.NAME20.str.replace(r' DISTRICT.*', '', regex=True)
results_2020['muni_name'] = results_2020.muni_name.str.replace(r' WARD.*', '', regex=True)

# Sum results by municipality
results_2020 = pd.pivot_table(results_2020, aggfunc='sum', index='muni_name', values=['Total_2020_Pres', 'Dem_2020_Pres', 'Rep_2020_Pres']).reset_index()

# Properly format municipality name
results_2020 = pd.merge(results_2020, HD72_MUNICIPALITIES[['formatted_muni_name', 'muni_name']], on='muni_name')

# Calculate percentages
results_2020['DEM_pct'] = round(100 * (results_2020.Dem_2020_Pres / results_2020.Total_2020_Pres), 1)
results_2020['REP_pct'] = round(100 * (results_2020.Rep_2020_Pres / results_2020.Total_2020_Pres), 1)
results_2020['OTH_pct'] = round(100 * ((results_2020.Total_2020_Pres - results_2020.Rep_2020_Pres - results_2020.Dem_2020_Pres) / results_2020.Total_2020_Pres), 1)

# Limit columns
results_2020 = results_2020[['formatted_muni_name', 'DEM_pct', 'REP_pct', 'OTH_pct']]
results_2020 = results_2020.rename(columns={ 'formatted_muni_name': 'muni_name' })
results_2020.head(1)

Unnamed: 0,muni_name,DEM_pct,REP_pct,OTH_pct
0,BLACKLICK TWP,24.6,74.7,0.8


### Process 2022 house election, via Clarity

In [5]:
# Get current version and results
current_version = requests.get(CLARITY_BASE_URL % 'current_ver.txt', headers=CLARITY_HEADERS).text
results_2022 = requests.get(CLARITY_BASE_URL % ('%s/json/ALL.json' % current_version), headers=CLARITY_HEADERS).json()

# Convert precincts to workable results
def convert_precinct(precinct):
    if '0008' not in precinct['C'] or precinct['A'] == '-1':
        return None

    totals = precinct['V'][precinct['C'].index('0008')]
    return [precinct['A'], totals[0], totals[1], totals[0] + totals[1] + totals[2]]

results_2022 = list(map(convert_precinct, results_2022['Contests']))
results_2022 = [r for r in results_2022 if r is not None]

# Convert to dataframe
results_2022 = pd.DataFrame(columns=['precinct_name', 'dem_total', 'rep_total', 'precinct_total'], data=results_2022)

# Try to backdoor into municipality name
results_2022['precinct_name'] = results_2022.precinct_name.str.upper()
results_2022['muni_name'] = results_2022.precinct_name.str.replace(r' TOWNSHIP.*', '', regex=True)
results_2022['muni_name'] = results_2022.muni_name.str.replace(r' BOROUGH.*', '', regex=True)
results_2022.loc[results_2022.muni_name.str.contains('JOHNSTOWN'), 'muni_name'] = 'JOHNSTOWN'

# Sum results by municipality
results_2022 = pd.pivot_table(results_2022, aggfunc='sum', index='muni_name', values=['precinct_total', 'dem_total', 'rep_total']).reset_index()

# Properly format municipality name
results_2022 = pd.merge(results_2022, HD72_MUNICIPALITIES[['formatted_muni_name', 'muni_name']], on='muni_name')

# Calculate percentages
results_2022['DEM_pct'] = round(100 * (results_2022.dem_total / results_2022.precinct_total), 1)
results_2022['REP_pct'] = round(100 * (results_2022.rep_total / results_2022.precinct_total), 1)
results_2022['OTH_pct'] = round(100 * ((results_2022.precinct_total - results_2022.rep_total - results_2022.dem_total) / results_2022.precinct_total), 1)

# Limit columns
results_2022 = results_2022[['formatted_muni_name', 'DEM_pct', 'REP_pct', 'OTH_pct']]
results_2022 = results_2022.rename(columns={ 'formatted_muni_name': 'muni_name' })

results_2022.head(1)

Unnamed: 0,muni_name,DEM_pct,REP_pct,OTH_pct
0,BLACKLICK TWP,45.3,54.7,0.0
