# Analyze past Allegheny County primaries

In [1]:
import pandas as pd
import re

In [2]:
### Calculate relevant statistics, append year suffix to columns
def calculate_stats(df, year):
    df = df.groupby('precinct').agg({
        'd_reg': 'sum',
        'd_cast': 'sum'
    }).reset_index()

    df['turnout'] = df.d_cast / df.d_reg
    df['ballots_pct_of_total'] = df.d_cast / df.d_cast.sum()
    df['reg_pct_of_total'] = df.d_reg / df.d_reg.sum()

    df.columns = [f'{i}_{year}' if i not in ['precinct'] else i for i in df.columns]
    df = df.set_index('precinct')
    return df

### Remove ward and district and ward markings, but leave ward for Pittsburgh
def format_precinct_name(precinct):
    precinct = precinct.replace(' WD', ' WARD')
    precinct = precinct.replace(' WRD', ' WARD')
    precinct = re.sub(r' DIST.*', '', precinct)

    if precinct.startswith('PITTSBURGH'):
        return precinct
    else:
        return re.sub(r' WARD.*', '', precinct)

### Import voting data

##### Import P17

In [3]:
p17 = pd.read_csv('input/primary_2017.csv')
p17 = p17[['Precinct Name', 'Democratic Reg Voters', 'Democratic Ballots Cast']]
p17.columns = ['precinct', 'd_reg', 'd_cast']

p17['precinct'] = p17.precinct.apply(format_precinct_name)
p17 = calculate_stats(p17, '17')
p17.head(1)

Unnamed: 0_level_0,d_reg_17,d_cast_17,turnout_17,ballots_pct_of_total_17,reg_pct_of_total_17
precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALEPPO,520,123,0.236538,0.001016,0.000967


##### Import P19

In [4]:
p19 = pd.read_csv('input/primary_2019.csv')
p19 = p19[['Precinct Name', 'Democratic Reg Voters', 'Democratic Ballots Cast']]
p19.columns = ['precinct', 'd_reg', 'd_cast']

p19['d_cast'] = p19.d_cast.str.replace(r'.\s', '', regex=True).astype(int)

p19['precinct'] = p19.precinct.apply(format_precinct_name)
p19 = calculate_stats(p19, '19')
p19.head(1)

Unnamed: 0_level_0,d_reg_19,d_cast_19,turnout_19,ballots_pct_of_total_19,reg_pct_of_total_19
precinct,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ALEPPO,527,98,0.185958,0.00083,0.000955


### Merge primaries; adjust precinct names

### Analyze primaries

##### PGH vs. non-PGH