In [1]:
import pandas as pd # standard python data library
import geopandas as gp # the geo-version of pandas
import numpy as np 
import os
import fiona
from statistics import mean, median
from pandas import read_csv
gp.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw' #To load KML files
import string
import xml.etree.ElementTree as et

pd.options.display.max_columns = 999

# VEST CT 2016

## VEST Documentation

### Sources

#### Election Results
Election results from the Connecticut Secretary of State (https://portal.ct.gov/SOTS/Election-Services/Election-Results/Election-Results).

#### Shapefiles
Precinct shapefile from the U.S. Census Bureau's 2020 Redistricting Data Program.

### Processing
 
District splits not reported separately were merged for Bethel 5, Bridgeport 129-3, 130-2, Durham 3, East Haven 3-3, Hartford 11, 12, 24, Naugatuck 3-3, New Britain 12, New Haven 9-2, 17, 21-1, Stamford 3, 5, 6-1, 8, 12, 20-1, 21, Torrington 6, 7, Waterbury 74-5.

The Stratford and Westport precincts were renumbered from municipal districts to state/federal districts.

The following additional modifications were made to match the 2016 precinct boundaries.

Ansonia: Adjust 1/2 to match shapefile  
Avon: Adjust 1/3 to match street list  
Berlin: Adjust 2/5 to match voter file  
Bethel: Adjust 1/4 to match voter file  
Bridgeport: Adjust 124-2/3/4, 126-1/2/5, 130-3/4 to match PDF  
Colchester: Adjust 1/3, 2/4 to match street list  
Coventry: Adjust 1/2 to match street list  
Danbury: Align wards 4/5, 6/7 with PDF  
Darien: Align 1/5, 1/6, 2/4 with shapefile  
East Hartford: Adjust 1/2, 5/6 to match street list  
East Haven: Add 1-3, 5-3 to match street list; Align 3, 3-3 with LD  
East Windsor: Align 1, 1-2 with LD  
Enfield: Adjust 258/458 to match PDF  
Fairfield: Split 3-32/3-34 by LD; Adjust 8/9 to match PDF  
Glastonbury: Adjust 4/5, 4/9, 7/9 to match PDF  
Greenwich: Split 10/10-1 by LD; Adjust 1/2/3 to match PDF  
Guilford: Adjust 1/3, 2/3 to match descriptions  
Haddam: Adjust 1/2 to match street list  
Hamden: Adjust 1/9, 5/6 to match PDF  
Killingly: Adjust 2/4, 3/4 to match PDF  
Ledyard: Adjust 1/2 to match street list  
Litchfield: Adjust 2/4 to match voter file  
Manchester: Merge 5/9; Adjust 3/5 to match street list  
Middletown: Adjust 1/12 to match GIS  
Milford: Align 117/119, 119-1/3 with LD and PDF  
New Britain: Adjust 12/14 to match voter file  
New Haven: Split 11-1/11-3 and align VTDs with voter file  
New Milford: Align all VTDs with voter file  
Newtown: Adjust 1/2 to match street list  
Norwich: Adjust 4/5 to match voter file  
Plainfield: Adjust 1/3, 2/4 to match voter file  
Plymouth: Split 1/2 to match registrar description  
Ridgefield: Adjust 1/2, 1/3 to match voter file  
Rocky Hill: Adjust 2/3 to match voter file  
Seymour: Adjust 1/3, 2/3 to match voter file  
Shelton: Adjust 1/4, 2/3 to match PDF, voter file  
Simsbury: Adjust all VTDs to reverse 2017 redistricting  
South Windsor: Adjust 3/5, 4/5 to match PDF  
Southbury: Align all VTDs with shapefile, street list  
Southington: Adjust 3/6, 5/8, 8/9 to match PDF  
Stafford: Adjust 1/2 to match PDF  
Stamford: Merge 1/23 and adjust 6/7 to reverse 2019 redistricting  
Stonington: Align all VTDs with shapefile  
Stratford: Split 20-1/13, 80-1/21 by LD; Align VTDs with voter file  
Vernon: Adjust 1/2, 2/3 to match PDF  
Wallingford: Adjust 2/3, 2/4, 7/8 to match voter file  
Waterbury: Align VTDs with PDF, street list, voter file  
West Haven: Adjust 1/7, 4/5/6, 8/9/10 to match voter file  
Weston: Adjust 1/2 to match voter file  
Windham: Adjust 4/6 to match street list  
Windsor: Adjust 1/3 to match street list  
Wolcott: Adjust 1/2, 1/3 to match voter file  

### Races

G16PREDCLI - Hillary Clinton (Democratic Party)  
G16PRERTRU - Donald J. Trump (Republican Party)  
G16PRELJOH - Gary Johnson (Libertarian Party)  
G16PREGSTE - Jill Stein (Green Party)  
G16PREOWRI - Write-in Votes  

G16USSDBLU - Richard Blumenthal (Democratic Party and Working Families Party (fusion candidate))  
G16USSRCAR - Dan Carter (Republican Party)  
G16USSLLIO - Richard Lion (Libertarian Party)  
G16USSGRUS - Jeffery Russell (Green Party)  
G16USSOWRI - Write-in Votes  

## Election Result Processing

### Load in VEST file

In [2]:
vest_ct_16 = gp.read_file("./raw-from-source/VEST/ct_2016/ct_2016.shp")

In [3]:
data_columns = [i for i in vest_ct_16.columns if "G16" in i]

In [4]:
print(vest_ct_16.shape)

(743, 14)


In [5]:
election_results = pd.read_csv("./raw-from-source/Election_Results/ct-2016-statewide-voting-district-report.csv",skiprows=[0,1],index_col=0)

In [6]:
election_results.reset_index(inplace=True,drop=False)

In [7]:
election_results = election_results[election_results["OfficeName"].isin(['Presidential Electors for', 'United States Senator'])]

In [8]:
election_results["cand_detailed"] = election_results["OfficeName"]+"-"+election_results["CandidateName"]

In [9]:
election_results["OfficeName"].unique()

array(['Presidential Electors for', 'United States Senator'], dtype=object)

In [10]:
election_results["cand_detailed"].unique()

array(['Presidential Electors for-Clinton and KaineMachine/Polling Place/EDRAbsenteeTotal',
       'Presidential Electors for-Trump and PenceMachine/Polling Place/EDRAbsenteeTotal',
       'Presidential Electors for-Johnson and WeldMachine/Polling Place/EDRAbsenteeTotal',
       'Presidential Electors for-Stein and BarakaMachine/Polling Place/EDRAbsenteeTotal',
       'United States Senator-Richard BlumenthalMachine/Polling Place/EDRAbsenteeTotal',
       'United States Senator-Dan CarterMachine/Polling Place/EDRAbsenteeTotal',
       'United States Senator-Richard LionMachine/Polling Place/EDRAbsenteeTotal',
       'United States Senator-Jeffery RussellMachine/Polling Place/EDRAbsenteeTotal',
       'Presidential Electors for-McMullin and JohnsonMachine/Polling Place/EDRAbsenteeTotal',
       'Presidential Electors for-Cooper and MeyerMachine/Polling Place/EDRAbsenteeTotal',
       'United States Senator-John M. TraceskiMachine/Polling Place/EDRAbsenteeTotal',
       'Presidential Ele

In [11]:
cand_detailed_change_dict = {      
       'United States Senator-Richard BlumenthalMachine/Polling Place/EDRAbsenteeTotal':"G16USSDBLU",
       'United States Senator-Dan CarterMachine/Polling Place/EDRAbsenteeTotal':"G16USSRCAR",
       'United States Senator-Richard LionMachine/Polling Place/EDRAbsenteeTotal':"G16USSLLIO",
       'United States Senator-Jeffery RussellMachine/Polling Place/EDRAbsenteeTotal':"G16USSGRUS",
       'United States Senator-Andrew RuleMachine/Polling Place/EDRAbsenteeTotal':"G16USSOWRI",
       'United States Senator-John M. TraceskiMachine/Polling Place/EDRAbsenteeTotal':"G16USSOWRI",
       'Presidential Electors for-Clinton and KaineMachine/Polling Place/EDRAbsenteeTotal':"G16PREDCLI",
       'Presidential Electors for-Trump and PenceMachine/Polling Place/EDRAbsenteeTotal':"G16PRERTRU",
       'Presidential Electors for-Johnson and WeldMachine/Polling Place/EDRAbsenteeTotal':"G16PRELJOH",
       'Presidential Electors for-Stein and BarakaMachine/Polling Place/EDRAbsenteeTotal':"G16PREGSTE",
       'Presidential Electors for-McMullin and JohnsonMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Cooper and MeyerMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Castle and BradleyMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Kotlikoff and LeamerMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-La Riva and PuryearMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Evans and FitchMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Wu and WuMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Buchanan and WashingtonMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Basiago and KinnisonMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-De La Fuente and SteinbergMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Hoefling and SchulinMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Deame and LongMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Blumenthal and BlairMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Smith and WhiteMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Schoenke and Mitchell Jr.Machine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Cummings and RomanoffMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Skewes and LacyMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Klojzy Jr. and LeMayMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Maldonado and TerranovaMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI",
       'Presidential Electors for-Fox and KushnerMachine/Polling Place/EDRAbsenteeTotal':"G16PREOWRI"  
}

In [12]:
election_results["cand_detailed"] = election_results["cand_detailed"].map(cand_detailed_change_dict).fillna(election_results["cand_detailed"])

In [13]:
election_results

Unnamed: 0,ElectionName,TownName,ElectionCategory,Election_Date,TownClerk,OfficeName,CandidateName,PartyName,Polling_Place_Name,Machine_Count,Absentee_Count,Final_Count,cand_detailed
0,"November Presidential Election November 08, 2016",Andover,State Election,November 08' 2016,,Presidential Electors for,Clinton and KaineMachine/Polling Place/EDRAbse...,Democratic Party,DISTRICT 1-Andover Town Hall,853,43,896,G16PREDCLI
1,"November Presidential Election November 08, 2016",Andover,State Election,November 08' 2016,,Presidential Electors for,Trump and PenceMachine/Polling Place/EDRAbsent...,Republican Party,DISTRICT 1-Andover Town Hall,852,39,891,G16PRERTRU
2,"November Presidential Election November 08, 2016",Andover,State Election,November 08' 2016,,Presidential Electors for,Johnson and WeldMachine/Polling Place/EDRAbsen...,Libertarian Party,DISTRICT 1-Andover Town Hall,75,4,79,G16PRELJOH
3,"November Presidential Election November 08, 2016",Andover,State Election,November 08' 2016,,Presidential Electors for,Stein and BarakaMachine/Polling Place/EDRAbsen...,Green Party,DISTRICT 1-Andover Town Hall,42,0,42,G16PREGSTE
4,"November Presidential Election November 08, 2016",Andover,State Election,November 08' 2016,,United States Senator,Richard BlumenthalMachine/Polling Place/EDRAbs...,Democratic Party,DISTRICT 1-Andover Town Hall,947,44,991,G16USSDBLU
...,...,...,...,...,...,...,...,...,...,...,...,...,...
16334,"November Presidential Election November 08, 2016",Woodstock,State Election,November 08' 2016,Judy Walberg,United States Senator,Richard BlumenthalMachine/Polling Place/EDRAbs...,Democratic Party,DISTRICT 1-Woodstock Town Hall,1966,200,2166,G16USSDBLU
16335,"November Presidential Election November 08, 2016",Woodstock,State Election,November 08' 2016,Judy Walberg,United States Senator,Dan CarterMachine/Polling Place/EDRAbsenteeTotal,Republican Party,DISTRICT 1-Woodstock Town Hall,1702,144,1846,G16USSRCAR
16336,"November Presidential Election November 08, 2016",Woodstock,State Election,November 08' 2016,Judy Walberg,United States Senator,Richard BlumenthalMachine/Polling Place/EDRAbs...,Working Families Party,DISTRICT 1-Woodstock Town Hall,221,14,235,G16USSDBLU
16337,"November Presidential Election November 08, 2016",Woodstock,State Election,November 08' 2016,Judy Walberg,United States Senator,Richard LionMachine/Polling Place/EDRAbsenteeT...,Libertarian Party,DISTRICT 1-Woodstock Town Hall,63,9,72,G16USSLLIO


In [14]:
pivoted_results = pd.pivot_table(election_results,index=["Polling_Place_Name","TownName"],values=["Final_Count"],aggfunc=sum,columns="cand_detailed")

In [15]:
pivoted_results.reset_index(inplace=True,drop=False)

In [16]:
pivoted_results.columns = pivoted_results.columns.droplevel(0)
pivoted_results.rename(columns={1:"unique_ID"},inplace=True)

In [17]:
pivoted_results.columns = ['Precinct', 'Town', 'G16PREDCLI', 'G16PREGSTE', 'G16PRELJOH', 'G16PREOWRI',
       'G16PRERTRU', 'G16USSDBLU', 'G16USSGRUS', 'G16USSLLIO', 'G16USSOWRI',
       'G16USSRCAR']

In [18]:
pivoted_results = pivoted_results.fillna(0)

In [19]:
pivoted_results

Unnamed: 0,Precinct,Town,G16PREDCLI,G16PREGSTE,G16PRELJOH,G16PREOWRI,G16PRERTRU,G16USSDBLU,G16USSGRUS,G16USSLLIO,G16USSOWRI,G16USSRCAR
0,DISTRICT 1-1 Town Hall,Plainfield,563.0,24.0,85.0,3.0,866.0,868.0,29.0,38.0,0.0,561.0
1,DISTRICT 1-1-1a Town Hall,Plainfield,458.0,20.0,47.0,3.0,698.0,689.0,9.0,25.0,0.0,470.0
2,DISTRICT 1-1-Bd Of Ed Central Office - Cafeteria,Killingly,472.0,34.0,60.0,2.0,669.0,721.0,22.0,31.0,0.0,417.0
3,DISTRICT 1-1-Cromwell High School,Cromwell,3808.0,111.0,289.0,20.0,3486.0,4685.0,64.0,93.0,0.0,2696.0
4,DISTRICT 1-1-Cross Street School - A,Naugatuck,160.0,7.0,11.0,1.0,194.0,200.0,4.0,1.0,0.0,152.0
...,...,...,...,...,...,...,...,...,...,...,...,...
738,DISTRICT 9-Wesley School - District 9,Middletown,1156.0,37.0,60.0,3.0,745.0,1334.0,23.0,22.0,0.0,606.0
739,DISTRICT 9-West Woods School,Hamden,1447.0,29.0,42.0,1.0,1206.0,1633.0,30.0,21.0,0.0,979.0
740,DISTRICT 9-Y W C A,Hartford,608.0,12.0,7.0,0.0,55.0,553.0,14.0,5.0,0.0,42.0
741,DISTRICT 90-1-Bunnell High School 120 21,Stratford,823.0,23.0,37.0,4.0,964.0,1130.0,12.0,11.0,0.0,680.0


In [20]:
def statewide_totals_check(partner_df,source_df,column_list):
    """Compares the totals of two election result dataframes at the statewide total level

    Args:
      partner_df: DataFrame of election results we are comparing against
      source_df: DataFrame of election results we are comparing to
      column_list: List of races that there are votes for
 
    Returns:
      Nothing, only prints out an analysis
    """
    print("***Statewide Totals Check***")
    for race in column_list:
        if (partner_df[race].sum()- source_df[race].sum() != 0):
            print(race+" has a difference of "+str(partner_df[race].sum()-source_df[race].sum())+" votes")
            print("\tVEST: "+str(partner_df[race].sum())+" votes")
            print("\tSOURCES: "+str(source_df[race].sum())+" votes")
        else:
            print(race + " is equal", "\tVEST / RDH: " + str(partner_df[race].sum()))

In [21]:
statewide_totals_check(pivoted_results,vest_ct_16,data_columns)

***Statewide Totals Check***
G16PREDCLI is equal 	VEST / RDH: 897572.0
G16PRERTRU is equal 	VEST / RDH: 673215.0
G16PRELJOH is equal 	VEST / RDH: 48676.0
G16PREGSTE is equal 	VEST / RDH: 22841.0
G16PREOWRI is equal 	VEST / RDH: 2616.0
G16USSDBLU is equal 	VEST / RDH: 1008714.0
G16USSRCAR is equal 	VEST / RDH: 552621.0
G16USSLLIO is equal 	VEST / RDH: 18190.0
G16USSGRUS is equal 	VEST / RDH: 16713.0
G16USSOWRI is equal 	VEST / RDH: 38.0


In [22]:
def county_totals_check(partner_df,source_df,column_list,county_col,full_print=False):
    """Compares the totals of two election result dataframes at the county level

    Args:
      partner_df: DataFrame of election results we are comparing against
      source_df: DataFrame of election results we are comparing to
      column_list: List of races that there are votes for
      county_col: String of the column name that contains county information
      full_print: Boolean specifying whether to print out everything, including counties w/ similarities

    Returns:
      Nothing, only prints out an analysis
    """
    
    print("***Countywide Totals Check***")
    print("")
    diff_counties=[]
    for race in column_list:
        diff = partner_df.groupby([county_col]).sum()[race]-source_df.groupby([county_col]).sum()[race]
        for val in diff[diff != 0].index.values.tolist():
            if val not in diff_counties:
                diff_counties.append(val)
        if len(diff[diff != 0]!=0):   
            print(race + " contains differences in these counties:")
            for val in diff[diff != 0].index.values.tolist():
                county_differences = diff[diff != 0]
                print("\t"+val+" has a difference of "+str(county_differences[val])+" votes")
                print("\t\tVEST: "+str(partner_df.groupby([county_col]).sum().loc[val,race])+" votes")
                print("\t\tSOURCES: "+str(source_df.groupby([county_col]).sum().loc[val,race])+" votes")
            if (full_print):
                for val in diff[diff == 0].index.values.tolist():
                    county_similarities = diff[diff == 0]
                    print("\t"+val + ": "+ str(partner_df.groupby([county_col]).sum().loc[val,race])+" votes")
        else:
            print(race + " is equal across all counties")
            if (full_print):
                for val in diff[diff == 0].index.values.tolist():
                    county_similarities = diff[diff == 0]
                    print("\t"+val + ": "+ str(partner_df.groupby([county_col]).sum().loc[val,race])+" votes")
    if (len(diff_counties)>0):
        print()
        print(diff_counties)

In [23]:
vest_ct_16

Unnamed: 0,STATEFP20,COUNTYFP20,NAME20,G16PREDCLI,G16PRERTRU,G16PRELJOH,G16PREGSTE,G16PREOWRI,G16USSDBLU,G16USSRCAR,G16USSLLIO,G16USSGRUS,G16USSOWRI,geometry
0,09,011,Montville 004-00,1175,1572,120,52,0,1659,1111,35,37,0,"POLYGON ((-72.22762 41.51870, -72.22757 41.518..."
1,09,005,Barkhamsted 001-00,905,1157,94,37,7,1103,1007,43,20,0,"POLYGON ((-73.05268 41.89076, -73.05217 41.892..."
2,09,005,Bethlehem 001-00,801,1405,59,47,5,998,1196,32,39,0,"POLYGON ((-73.26145 41.66464, -73.25791 41.664..."
3,09,005,Bridgewater 001-00,518,571,33,17,2,631,467,16,0,0,"POLYGON ((-73.40474 41.53702, -73.40457 41.537..."
4,09,005,Canaan 001-00,357,203,23,9,0,383,172,12,11,0,"POLYGON ((-73.37304 41.96030, -73.37296 41.960..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
738,09,009,Waterbury 071-02,445,419,19,13,0,538,292,13,6,0,"POLYGON ((-73.04755 41.51520, -73.04857 41.515..."
739,09,009,Waterbury 075-04,1102,491,23,17,0,1142,327,19,20,0,"POLYGON ((-73.04852 41.54376, -73.04838 41.543..."
740,09,009,Waterbury 071-03,1328,1449,69,15,0,1634,1064,38,23,0,"POLYGON ((-73.06130 41.52992, -73.06116 41.529..."
741,09,009,New Haven 022-01,691,19,7,14,5,653,28,3,12,0,"POLYGON ((-72.93264 41.31274, -72.93213 41.312..."


In [24]:
ct_crosswalk = pd.read_csv("./raw-from-source/Crosswalk/Connecticut_Towns_-_Crosswalk_with_Tax_Codes_and_FIPS_Codes.csv")

In [25]:
ct_crosswalk = ct_crosswalk.fillna("0011")
ct_crosswalk["FIPS Code"] = ct_crosswalk["FIPS Code"].astype(str)



In [26]:
ct_crosswalk


Unnamed: 0,Tax Code,Town Name,FIPS Code
0,1,Andover,901301080.0
1,2,Ansonia,900901220.0
2,3,Ashford,901501430.0
3,4,Avon,900302060.0
4,5,Barkhamsted,900502760.0
...,...,...,...
164,165,Windsor Locks,900387070.0
165,166,Wolcott,900987560.0
166,167,Woodbridge,900987700.0
167,168,Woodbury,900587910.0


In [27]:
ct_crosswalk_dict = dict(zip(ct_crosswalk["Town Name"],ct_crosswalk["FIPS Code"].str[1:4]))

In [28]:
ct_crosswalk_dict

{'Andover': '013',
 'Ansonia': '009',
 'Ashford': '015',
 'Avon': '003',
 'Barkhamsted': '005',
 'Beacon Falls': '009',
 'Berlin': '003',
 'Bethany': '009',
 'Bethel': '001',
 'Bethlehem': '005',
 'Bloomfield': '003',
 'Bolton': '013',
 'Bozrah': '011',
 'Branford': '009',
 'Bridgeport': '001',
 'Bridgewater': '005',
 'Bristol': '003',
 'Brookfield': '001',
 'Brooklyn': '015',
 'Burlington': '003',
 'Canaan': '005',
 'Canterbury': '015',
 'Canton': '003',
 'Chaplin': '015',
 'Cheshire': '009',
 'Chester': '007',
 'Clinton': '007',
 'Colchester': '011',
 'Colebrook': '005',
 'Columbia': '013',
 'Cornwall': '005',
 'Coventry': '013',
 'Cromwell': '007',
 'Danbury': '001',
 'Darien': '001',
 'Deep River': '007',
 'Derby': '009',
 'Durham': '007',
 'Eastford': '015',
 'East Granby': '003',
 'East Haddam': '007',
 'East Hampton': '007',
 'East Hartford': '003',
 'East Haven': '009',
 'East Lyme': '011',
 'Easton': '001',
 'East Windsor': '003',
 'Ellington': '013',
 'Enfield': '003',
 'Esse

In [29]:
pivoted_results["COUNTYFP20"] =  pivoted_results["Town"].map(ct_crosswalk_dict).fillna(pivoted_results["Town"])

In [30]:
pivoted_results["COUNTYFP20"].unique()

array(['015', '007', '009', '003', '001', '013', '011', '005'],
      dtype=object)

In [31]:
county_totals_check(vest_ct_16,pivoted_results,data_columns,"COUNTYFP20",full_print=False)

***Countywide Totals Check***

G16PREDCLI is equal across all counties
G16PRERTRU is equal across all counties
G16PRELJOH is equal across all counties
G16PREGSTE is equal across all counties
G16PREOWRI is equal across all counties
G16USSDBLU is equal across all counties
G16USSRCAR is equal across all counties
G16USSLLIO is equal across all counties
G16USSGRUS is equal across all counties
G16USSOWRI is equal across all counties


## Precinct-by-Precinct

In [32]:
print(vest_ct_16["NAME20"].value_counts(dropna=False))
print(pivoted_results["Precinct"].value_counts(dropna=False))

Monroe 001-00         1
Colebrook 001-00      1
Stonington 001-00     1
New Milford 001-00    1
Norwalk 140-01        1
                     ..
Greenwich 012-00      1
Vernon 003-00         1
Danbury 003-07        1
Litchfield 004-00     1
Meriden 013-00        1
Name: NAME20, Length: 743, dtype: int64
DISTRICT 1-Town Hall                            13
DISTRICT 31-Assembly Hall                        1
DISTRICT 3-5-Head O Meadow School Cafetorium     1
DISTRICT 7-33-Holland Hill School                1
DISTRICT 8-Central Middle School                 1
                                                ..
DISTRICT 2-Shepardson Community Center 002       1
DISTRICT 3-Coe Park 2                            1
DISTRICT 3-Grace Lutheran Church                 1
DISTRICT 4-1-Killingly High School               1
DISTRICT 19-Bulkeley High School                 1
Name: Precinct, Length: 731, dtype: int64


In [33]:
pivoted_results["NAME20"] = pivoted_results["Town"]+"-"+pivoted_results["Precinct"]
print(pivoted_results["NAME20"].value_counts(dropna=False))

Enfield-DISTRICT 258-Enfield Street School                    1
Old Saybrook-DISTRICT 2-Old Saybrook High School Gymnasium    1
Colchester-DISTRICT 3-Bacon Academy                           1
Trumbull-DISTRICT 2-23-St. Joseph High School 123             1
Easton-DISTRICT 1-Samuel Staples School                       1
                                                             ..
Stafford-DISTRICT 3-West Stafford Fire Department             1
Wilton-DISTRICT 2-Cider Mill School - District 2              1
Plainfield-DISTRICT 1-1 Town Hall                             1
Fairfield-DISTRICT 6-33-McKinley School                       1
Fairfield-DISTRICT 9-32-Sherman School                        1
Name: NAME20, Length: 743, dtype: int64


In [34]:
join_attempt_one = pd.merge(vest_ct_16,pivoted_results,on="NAME20",how="outer",validate="1:1",indicator=True)
join_attempt_one["_merge"].value_counts()

join_attempt_one[join_attempt_one["_merge"]=="left_only"].to_csv("./vest.csv")
join_attempt_one[join_attempt_one["_merge"]=="right_only"].to_csv("./elections.csv")

In [35]:
elections_vest_id_changes = pd.read_csv("./elections_vest_id_changes.csv")

In [36]:
elections_vest_id_changes_dict = dict(zip(elections_vest_id_changes["election_ID"],elections_vest_id_changes["vest_ID"]))

In [37]:
pivoted_results["NAME20"] = pivoted_results["NAME20"].map(elections_vest_id_changes_dict).fillna(pivoted_results["NAME20"])

In [38]:
join_attempt_two = pd.merge(vest_ct_16,pivoted_results,on="NAME20",how="outer",validate="1:1",indicator=True)
join_attempt_two["_merge"].value_counts()

#join_attempt_two[join_attempt_two["_merge"]=="left_only"].to_csv("./vest.csv")
#join_attempt_two[join_attempt_two["_merge"]=="right_only"].to_csv("./elections.csv")

both          743
left_only       0
right_only      0
Name: _merge, dtype: int64

In [39]:
def precinct_votes_check(merged_df,column_list,vest_on_left,name_col,print_level=0):
    """Checks a merged dataframe with two election results at the precinct level

    Args:
      merged_df: DataFrame with one set of election results joined to another
      column_list: List of races that there are votes for
      vest_on_left: Boolean specifying whether VEST data is on the left side of merged_df
      name_col: String of the column name to refer to precincts when a difference occurs
      print_level: Integer that specifies how large the vote difference in a precinct must be to be printed

    Returns:
      Nothing, only prints out an analysis
    """
    merged_df = merged_df.sort_values(by=[name_col],inplace=False)
    matching_rows = 0
    different_rows = 0
    diff_list=[]
    diff_values = []
    max_diff = 0
    for index,row in merged_df.iterrows():
        same = True
        for i in column_list:
            left_data = i + "_x"
            right_data = i + "_y"
            if ((row[left_data] is None) or (row[right_data] is None) or (np.isnan(row[right_data])or(np.isnan(row[left_data])))):
                print("FIX NaN value at: ", row[name_col])
                return;
            diff = abs(row[left_data]-row[right_data])
            if (diff>0):
                same = False
                diff_values.append(abs(diff))
                if (diff>max_diff):
                    max_diff = diff
            if(diff>print_level):
                if (vest_on_left):
                    print(i, "{:.>72}".format(row[name_col]), "(V)","{:.>5}".format(int(row[left_data]))," (S){:.>5}".format(int(row[right_data])),"(D):{:>5}".format(int(row[left_data]-row[right_data])))                           
                else:
                    print(i, "{:.>72}".format(row[name_col]), "(S)","{:.>5}".format(int(row[left_data]))," (V){:.>5}".format(int(row[right_data])),"(D):{:>5}".format(int(row[left_data]-row[right_data])))
        if(same != True):
            different_rows +=1
            diff_list.append(row[name_col])
        else:
            matching_rows +=1
    print("")
    print("There are ", len(merged_df.index)," total rows")
    print(different_rows," of these rows have election result differences")
    print(matching_rows," of these rows are the same")
    print("")
    print("The max difference between any one shared column in a row is: ", max_diff)
    if(len(diff_values)!=0):
        print("The average difference is: ", str(sum(diff_values)/len(diff_values)))
    count_big_diff = len([i for i in diff_values if i > 10])
    print("There are ", str(count_big_diff), "precinct results with a difference greater than 10")
    print("")
    print("All precincts containing differences:")
    diff_list.sort()
    print(diff_list)

In [40]:
precinct_votes_check(join_attempt_two[join_attempt_two["_merge"]=="both"],data_columns,True,"NAME20",print_level=0)


There are  743  total rows
0  of these rows have election result differences
743  of these rows are the same

The max difference between any one shared column in a row is:  0
There are  0 precinct results with a difference greater than 10

All precincts containing differences:
[]
