## Convenient imports

In [1]:
%matplotlib ipympl

def mkfig(w=12, h=6, nrow=1, ncol=1, dpi=100, style='seaborn', **kwargs):
    import matplotlib.pyplot as plt
    plt.style.use(style)
    return plt.subplots(
        nrow, ncol, figsize=(w, h), dpi=dpi, 
        facecolor='lightgray', edgecolor='k', **kwargs)
import numpy as np
import scipy.signal as sig
import pandas as pd

pd.options.display.max_columns = 50
pd.set_option('expand_frame_repr', True)

## Load dataframe from csv

In [2]:
df = pd.read_csv('iowa_data.csv')

## List columns

In [3]:
df.columns

Index(['county', 'precinct', 'candidate', 'first_exp', 'final_exp', 'sde'], dtype='object')

## Get unique County/Precinct Pairs

In [4]:
county_precinct_pairs = list(set([(r.county, r.precinct) for ix,r in df.iterrows()]))
county_precinct_pairs.sort()
len(county_precinct_pairs)

1711

## Look at first county / precinct pair
### Create `pdf` or 'Precinct Dataframe'

In [5]:
cty, pr = county_precinct_pairs[0]
pdf = df[ (df.county == cty) & (df.precinct == pr) ]
pdf

Unnamed: 0,county,precinct,candidate,first_exp,final_exp,sde
0,Adair,1NW ADAIR,Bennet,0,0,0.0
1,Adair,1NW ADAIR,Biden,6,6,0.0784
2,Adair,1NW ADAIR,Bloomberg,0,0,0.0
3,Adair,1NW ADAIR,Buttigieg,8,8,0.0784
4,Adair,1NW ADAIR,Delaney,0,0,0.0
5,Adair,1NW ADAIR,Gabbard,0,0,0.0
6,Adair,1NW ADAIR,Klobuchar,8,8,0.1569
7,Adair,1NW ADAIR,Other,0,0,0.0
8,Adair,1NW ADAIR,Patrick,0,0,0.0
9,Adair,1NW ADAIR,Sanders,9,9,0.1569


## Make function to look for mismatch
Specifically, we're looking for a mismatch between maximum `final_exp` and maximum `sde`

In [6]:
def print_if_mismatch(pdf, f=None):
    def print_and_log(s=''):
        s = f"{s}" # force to str
        print(s)
        if f:
            f.write(s)
    exp_max_idx = pdf.final_exp.idxmax()
    sde_max_idx = pdf.sde.idxmax()

    exp_max_exp_val = pdf.loc[ exp_max_idx, 'final_exp' ]
    sde_max_exp_val = pdf.loc[ sde_max_idx, 'final_exp' ]  

    exp_max_sde_val = pdf.loc[ exp_max_idx, 'sde' ]
    sde_max_sde_val = pdf.loc[ sde_max_idx, 'sde' ] 
    
    if exp_max_sde_val != sde_max_sde_val:
        print_and_log()
        if exp_max_exp_val == sde_max_exp_val:
            print_and_log(f"\nCOIN TOSS? : county = {pdf.county.any()}, precinct = {pdf.precinct.any()}")
        elif exp_max_exp_val != sde_max_exp_val:
            print_and_log(f"\n>>>>> NOT OK! : county = {pdf.county.any()}, precinct = {pdf.precinct.any()}")
            print_and_log(pdf)
        print_and_log(f"exp_max_idx = {exp_max_idx}, sde_max_idx = {sde_max_idx}")
        print_and_log(pdf.loc[ [exp_max_idx, sde_max_idx], :])
        print_and_log()
    else:
        print_and_log(f"Looks OK: county = {pdf.county.any()}, precinct = {pdf.precinct.any()}")

## Test on test pdf (precinct df)

In [7]:
print_if_mismatch(pdf)

Looks OK: county = Adair, precinct = 1NW ADAIR


## Run check for mismatches across all county / precinct pairs

In [8]:
with open('iowa_not_ok.txt', 'w') as f:
    for cty, pr in county_precinct_pairs:
        pdf = df[ (df.county == cty) & (df.precinct == pr) ]
        print_if_mismatch(pdf, f)

Looks OK: county = Adair, precinct = 1NW ADAIR
Looks OK: county = Adair, precinct = 2NE STUART
Looks OK: county = Adair, precinct = 3SW FONTANELLE
Looks OK: county = Adair, precinct = 4SE ORIENT
Looks OK: county = Adair, precinct = 5GF GREENFIELD
Looks OK: county = Adams, precinct = Adams 1
Looks OK: county = Adams, precinct = Adams 2
Looks OK: county = Adams, precinct = Adams 3
Looks OK: county = Adams, precinct = Adams 4
Looks OK: county = Adams, precinct = Adams 5
Looks OK: county = Allamakee, precinct = Pct 01 - WL/HV
Looks OK: county = Allamakee, precinct = Pct 02 - FC/JF/LL/MK/UP
Looks OK: county = Allamakee, precinct = Pct 03 - PO/FK
Looks OK: county = Allamakee, precinct = Pct 04 - PV City
Looks OK: county = Allamakee, precinct = Pct 05 - LT/PC/WV City
Looks OK: county = Allamakee, precinct = Pct 07 - CN/LF/LS/LS City
Looks OK: county = Allamakee, precinct = Pct 08 - IA/UC/NA City
Looks OK: county = Allamakee, precinct = Pct 09 - Waukon 1
Looks OK: county = Allamakee, precinct 