In [1]:
import geopandas as gp
import pandas as pd
import os
import xml.etree.ElementTree as et
import numpy as np
import pdv_functions as pdv

# 1. Load General Election Data

This data had to be downloaded county-by-county in XML format. The below code parses the XML and grabs the necessary data, adds it to a list, gives it the appropriate column names, and converts the data into a dataframe.

In [2]:
loaded_counties = os.listdir("./raw-from-source/general/")
z=[]
for locale in loaded_counties:
    if locale.endswith('.xml'):
        file_string = "./raw-from-source/general/"+locale
        xtree = et.parse(file_string)
        xroot = xtree.getroot()
        store_list = []
        county_area = xroot.findall(".//Region")
        for i in county_area:
            county = i.text
        contests = xroot.findall(".//Contest")
        for i in contests:
            contest = i.attrib.get('text')
            lower = i.findall("./Choice")
            for j in lower:
                choice = j.attrib.get('text')
                lower_2 = j.findall("./VoteType")
                for k in lower_2:
                    voting_method = k.attrib.get('name')
                    lower_3 = k.findall("./Precinct")
                    for l in lower_3:
                        precinct_name = l.attrib.get('name')
                        num_votes = l.attrib.get('votes')
                        z.append([county,contest,choice,voting_method,precinct_name,num_votes])
dfcols = ['county','contest','choice','voting_method','precinct','num_votes']
df_general = pd.DataFrame(z,columns=dfcols)
df_general["election"] = "general"
df_general["precinct"] = df_general["precinct"].str.strip()

In [3]:
ga_22_election_combined = pd.concat([df_general])


# Join Election Data Together


In [4]:
#Sanity check that there are the right number of counties
print(len(ga_22_election_combined["county"].unique()))

# Clean the contest name
ga_22_election_combined["contest"] = ga_22_election_combined["contest"].str.strip()

159


In [5]:
ga_22_election_combined

Unnamed: 0,county,contest,choice,voting_method,precinct,num_votes,election
0,Monroe,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Bentons,32,general
1,Monroe,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Brantleys,6,general
2,Monroe,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Burgays,47,general
3,Monroe,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Cabaniss,5,general
4,Monroe,US Senate,Herschel Junior Walker (Rep),Absentee by Mail Votes,Cox,6,general
...,...,...,...,...,...,...,...
474283,Taliaferro,Statewide Referendum Question B,No,Advance Voting Votes,Crawfordville,91,general
474284,Taliaferro,Statewide Referendum Question B,No,Election Day Votes,Sharon,13,general
474285,Taliaferro,Statewide Referendum Question B,No,Election Day Votes,Crawfordville,49,general
474286,Taliaferro,Statewide Referendum Question B,No,Provisional Votes,Sharon,0,general


In [6]:
ga_22_election_combined["precinct"] = ga_22_election_combined["precinct"]

# Clean Contests


In [7]:
all_contests = list(ga_22_election_combined["contest"].unique())
all_contests.sort()

In [8]:
keep_these = [
    'Attorney General',
 'Attorney General/ Fiscal General',
    'Commissioner Of Agriculture',
 'Commissioner Of Insurance',
 'Commissioner Of Labor',
 'Commissioner of Agriculture',
 'Commissioner of Agriculture/ Comisionado de Agricultura',
 'Commissioner of Insurance',
 'Commissioner of Insurance/ Comisionado de Seguros',
 'Commissioner of Labor',
 'Commissioner of Labor/ Comisionado de Trabajo',
     'Constitutional Amendment #1',
 'Constitutional Amendment #2',
    'Governor',
 'Governor/Gobernador',
    'Lieutenant Governor',
 'Lieutenant Governor/ Vicegobernador',
    'Secretary of State',
 'Secretary of State/ Secretario de Estado',
    'State House - District 128',
 'State House - District 133',
 'State House - District 145',
 'State House - District 149',
 'State House - District 150',
 'State House - District 155',
 'State House - District 156',
 'State House - District 157',
 'State House - District 158',
 'State House - District 159',
 'State House - District 160',
 'State House - District 161',
 'State House - District 167',
 'State House - District 168',
 'State House - District 178',
 'State House Dist 1',
 'State House Dist 100',
 'State House Dist 11',
 'State House Dist 116',
 'State House Dist 12',
 'State House Dist 13',
 'State House Dist 135',
 'State House Dist 136',
 'State House Dist 137',
 'State House Dist 138',
 'State House Dist 139',
 'State House Dist 14',
 'State House Dist 140',
 'State House Dist 141',
 'State House Dist 145',
 'State House Dist 15',
 'State House Dist 150',
 'State House Dist 151',
 'State House Dist 16',
 'State House Dist 17',
 'State House Dist 18',
 'State House Dist 19',
 'State House Dist 2',
 'State House Dist 20',
 'State House Dist 21',
 'State House Dist 22',
 'State House Dist 23',
 'State House Dist 24',
 'State House Dist 25',
 'State House Dist 26',
 'State House Dist 27',
 'State House Dist 28',
 'State House Dist 3',
 'State House Dist 4',
 'State House Dist 44',
 'State House Dist 46',
 'State House Dist 47',
 'State House Dist 48',
 'State House Dist 49',
 'State House Dist 5',
 'State House Dist 50',
 'State House Dist 51',
 'State House Dist 52',
 'State House Dist 53',
 'State House Dist 54',
 'State House Dist 55',
 'State House Dist 56',
 'State House Dist 57',
 'State House Dist 58',
 'State House Dist 59',
 'State House Dist 6',
 'State House Dist 60',
 'State House Dist 61',
 'State House Dist 62',
 'State House Dist 63',
 'State House Dist 64',
 'State House Dist 65',
 'State House Dist 66',
 'State House Dist 67',
 'State House Dist 68',
 'State House Dist 69',
 'State House Dist 7',
 'State House Dist 70',
 'State House Dist 71',
 'State House Dist 72',
 'State House Dist 73',
 'State House Dist 74',
 'State House Dist 75',
 'State House Dist 76',
 'State House Dist 77',
 'State House Dist 78',
 'State House Dist 79',
 'State House Dist 8',
 'State House Dist 9',
 'State House of Representatives - District 10',
 'State House of Representatives - District 100',
 'State House of Representatives - District 100/Para Representante Estatal ante la Asamblea General, Distrito 100',
 'State House of Representatives - District 101/Para Representante Estatal ante la Asamblea General, Distrito 101',
 'State House of Representatives - District 102/Para Representante Estatal ante la Asamblea General, Distrito 102',
 'State House of Representatives - District 103',
 'State House of Representatives - District 103/Para Representante Estatal ante la Asamblea General, Distrito 103',
 'State House of Representatives - District 104',
 'State House of Representatives - District 104/Para Representante Estatal ante la Asamblea General, Distrito 104',
 'State House of Representatives - District 105/Para Representante Estatal ante la Asamblea General, Distrito 105',
 'State House of Representatives - District 106/Para Representante Estatal ante la Asamblea General, Distrito 106',
 'State House of Representatives - District 107/Para Representante Estatal ante la Asamblea General, Distrito 107',
 'State House of Representatives - District 108/Para Representante Estatal ante la Asamblea General, Distrito 108',
 'State House of Representatives - District 109/Para Representante Estatal ante la Asamblea General, Distrito 109',
 'State House of Representatives - District 110/Para Representante Estatal ante la Asamblea General, Distrito 110',
 'State House of Representatives - District 111',
 'State House of Representatives - District 111/Para Representante Estatal ante la Asamblea General, Distrito 111',
 'State House of Representatives - District 112',
 'State House of Representatives - District 113',
 'State House of Representatives - District 114',
 'State House of Representatives - District 115',
 'State House of Representatives - District 116',
 'State House of Representatives - District 117',
 'State House of Representatives - District 118',
 'State House of Representatives - District 119',
 'State House of Representatives - District 120',
 'State House of Representatives - District 121',
 'State House of Representatives - District 122',
 'State House of Representatives - District 123',
 'State House of Representatives - District 124',
 'State House of Representatives - District 125',
 'State House of Representatives - District 126',
 'State House of Representatives - District 127',
 'State House of Representatives - District 128',
 'State House of Representatives - District 129',
 'State House of Representatives - District 130',
 'State House of Representatives - District 131',
 'State House of Representatives - District 132',
 'State House of Representatives - District 133',
 'State House of Representatives - District 134',
 'State House of Representatives - District 135',
 'State House of Representatives - District 136',
 'State House of Representatives - District 137',
 'State House of Representatives - District 142',
 'State House of Representatives - District 143',
 'State House of Representatives - District 144',
 'State House of Representatives - District 145',
 'State House of Representatives - District 146',
 'State House of Representatives - District 147',
 'State House of Representatives - District 148',
 'State House of Representatives - District 149',
 'State House of Representatives - District 150',
 'State House of Representatives - District 151',
 'State House of Representatives - District 152',
 'State House of Representatives - District 153',
 'State House of Representatives - District 154',
 'State House of Representatives - District 155',
 'State House of Representatives - District 156',
 'State House of Representatives - District 157',
 'State House of Representatives - District 159',
 'State House of Representatives - District 160',
 'State House of Representatives - District 161',
 'State House of Representatives - District 162',
 'State House of Representatives - District 163',
 'State House of Representatives - District 164',
 'State House of Representatives - District 165',
 'State House of Representatives - District 166',
 'State House of Representatives - District 167',
 'State House of Representatives - District 169',
 'State House of Representatives - District 170',
 'State House of Representatives - District 171',
 'State House of Representatives - District 172',
 'State House of Representatives - District 173',
 'State House of Representatives - District 174',
 'State House of Representatives - District 175',
 'State House of Representatives - District 176',
 'State House of Representatives - District 177',
 'State House of Representatives - District 178',
 'State House of Representatives - District 179',
 'State House of Representatives - District 180',
 'State House of Representatives - District 22',
 'State House of Representatives - District 27',
 'State House of Representatives - District 28',
 'State House of Representatives - District 29',
 'State House of Representatives - District 30',
 'State House of Representatives - District 30/Para Representante Estatal ante la Asamblea General, Distrito 30',
 'State House of Representatives - District 31',
 'State House of Representatives - District 32',
 'State House of Representatives - District 33',
 'State House of Representatives - District 34',
 'State House of Representatives - District 35',
 'State House of Representatives - District 36',
 'State House of Representatives - District 37',
 'State House of Representatives - District 38',
 'State House of Representatives - District 39',
 'State House of Representatives - District 40',
 'State House of Representatives - District 41',
 'State House of Representatives - District 42',
 'State House of Representatives - District 43',
 'State House of Representatives - District 44',
 'State House of Representatives - District 45',
 'State House of Representatives - District 46',
 'State House of Representatives - District 48/Para Representante Estatal ante la Asamblea General, Distrito 48',
 'State House of Representatives - District 52',
 'State House of Representatives - District 74',
 'State House of Representatives - District 78',
 'State House of Representatives - District 8',
 'State House of Representatives - District 80',
 'State House of Representatives - District 81',
 'State House of Representatives - District 82',
 'State House of Representatives - District 83',
 'State House of Representatives - District 84',
 'State House of Representatives - District 85',
 'State House of Representatives - District 86',
 'State House of Representatives - District 87',
 'State House of Representatives - District 88',
 'State House of Representatives - District 88/Para Representante Estatal ante la Asamblea General, Distrito 88',
 'State House of Representatives - District 89',
 'State House of Representatives - District 9',
 'State House of Representatives - District 90',
 'State House of Representatives - District 91',
 'State House of Representatives - District 92',
 'State House of Representatives - District 93',
 'State House of Representatives - District 94',
 'State House of Representatives - District 94/Para Representante Estatal ante la Asamblea General, Distrito 94',
 'State House of Representatives - District 95',
 'State House of Representatives - District 95/Para Representante Estatal ante la Asamblea General, Distrito 95',
 'State House of Representatives - District 96/Para Representante Estatal ante la Asamblea General, Distrito 96',
 'State House of Representatives - District 97/Para Representante Estatal ante la Asamblea General, Distrito 97',
 'State House of Representatives - District 98/Para Representante Estatal ante la Asamblea General, Distrito 98',
 'State House of Representatives - District 99/Para Representante Estatal ante la Asamblea General, Distrito 99',
 'State School Superintendent',
 'State School Superintendent/ Superintendente de las Escuelas del Estado',
 'State Senate - District 1',
 'State Senate - District 10',
 'State Senate - District 11',
 'State Senate - District 12',
 'State Senate - District 13',
 'State Senate - District 16',
 'State Senate - District 17',
 'State Senate - District 18',
 'State Senate - District 19',
 'State Senate - District 2',
 'State Senate - District 20',
 'State Senate - District 22',
 'State Senate - District 23',
 'State Senate - District 24',
 'State Senate - District 25',
 'State Senate - District 26',
 'State Senate - District 29',
 'State Senate - District 3',
 'State Senate - District 32',
 'State Senate - District 33',
 'State Senate - District 37',
 'State Senate - District 38',
 'State Senate - District 4',
 'State Senate - District 40',
 'State Senate - District 40/ Senador Estatal del Distrito 40',
 'State Senate - District 41',
 'State Senate - District 41/ Senador Estatal del Distrito 41',
 'State Senate - District 42',
 'State Senate - District 43',
 'State Senate - District 44',
 'State Senate - District 45',
 'State Senate - District 45/ Senador Estatal del Distrito 45',
 'State Senate - District 46',
 'State Senate - District 46/ Senador Estatal del Distrito 46',
 'State Senate - District 47',
 'State Senate - District 48/ Senador Estatal del Distrito 48',
 'State Senate - District 49',
 'State Senate - District 5/ Senador Estatal del Distrito 5',
 'State Senate - District 50',
 'State Senate - District 51',
 'State Senate - District 55',
 'State Senate - District 55/ Senador Estatal del Distrito 55',
 'State Senate - District 56',
 'State Senate - District 6',
 'State Senate - District 7/ Senador Estatal del Distrito 7',
 'State Senate - District 8',
 'State Senate - District 9/ Senador Estatal del Distrito 9',
 'State Senate Dist 12',
 'State Senate Dist 14',
 'State Senate Dist 15',
 'State Senate Dist 16',
 'State Senate Dist 18',
 'State Senate Dist 21',
 'State Senate Dist 27',
 'State Senate Dist 28',
 'State Senate Dist 29',
 'State Senate Dist 30',
 'State Senate Dist 31',
 'State Senate Dist 32',
 'State Senate Dist 34',
 'State Senate Dist 35',
 'State Senate Dist 36',
 'State Senate Dist 37',
 'State Senate Dist 38',
 'State Senate Dist 39',
 'State Senate Dist 44',
 'State Senate Dist 48',
 'State Senate Dist 51',
 'State Senate Dist 52',
 'State Senate Dist 53',
 'State Senate Dist 54',
 'State Senate Dist 56',
 'State Senate Dist 6',
 'Statewide Referendum A',
 'Statewide Referendum B',
 'Statewide Referendum Question 1',
 'Statewide Referendum Question 2',
 'Statewide Referendum Question A',
 'Statewide Referendum Question B',
    'US House Dist 11',
 'US House Dist 13',
 'US House Dist 14',
 'US House Dist 2',
 'US House Dist 3',
 'US House Dist 5',
 'US House Dist 6',
 'US House Dist 7',
 'US House Dist 9',
 'US House of Representatives - District 1',
 'US House of Representatives - District 10',
 'US House of Representatives - District 11',
 'US House of Representatives - District 12',
 'US House of Representatives - District 13',
 'US House of Representatives - District 14',
 'US House of Representatives - District 2',
 'US House of Representatives - District 3',
 'US House of Representatives - District 4',
 'US House of Representatives - District 5',
 'US House of Representatives - District 6',
 'US House of Representatives - District 6/ Representante de EE.UU., Distrito del Congreso 6',
 'US House of Representatives - District 7/ Representante de EE.UU., Distrito del Congreso 7',
 'US House of Representatives - District 8',
 'US House of Representatives - District 9',
 'US House of Representatives - District 9/ Representante de EE.UU., Distrito del Congreso 9',
 'US Senate',
 'US Senate/ Senado de los EE.UU.',
    'Proposed Constitutional Amendment 1',
 'Proposed Constitutional Amendment 1/ Enmiendas Constitucionales Propuestas 1',
 'Proposed Constitutional Amendment 2',
 'Proposed Constitutional Amendment 2/ Enmiendas Constitucionales Propuestas 2',
 'Proposed Statewide Referendum 1/ Referéndum estatal propuesto 1',
 'Proposed Statewide Referendum 2/ Referéndum estatal propuesto 2',
]

In [9]:
remaining = list(set(ga_22_election_combined["contest"].unique()) - set(keep_these))
remaining.sort()
remaining

['1% Educational Sales Tax',
 '1% Special Sales Tax',
 'Alcohol Question',
 'Alcohol Referendum',
 'Altamaha Conservation District Soil and Water Supervisor',
 'Altamaha Soil and Water District Supervisor',
 'Annexation - City of Stockbridge',
 'Appendix Two - Code of Ethics and Prohibited Practices',
 'Arcade Fire Advisory Board Post 3',
 'Arcade Fire Advisory Board Post 4',
 'Arcade Fire Advisory Board Post 5',
 'Article IV - Proposed Charter Amendment',
 'Article V - Proposed Charter Amendment',
 'Article VI - Proposed Charter Amendment',
 'Article VII - Chapter 4 Charter Amendment',
 'Article VII - Chapter 5 Charter Amendment',
 'Article VIII - Charter Amendment',
 'Ashburn City Council',
 'Atlanta - Sale of Package Alcoholic Beverages on Sundays',
 'Atlanta Sunday Alcohol Sales Extension',
 'BOE D1',
 'BOE D4',
 'Ball Ground City Council Post 1',
 'Ball Ground City Council Post 2',
 'Ball Ground City Council Post 3',
 'Bartow County Distrilled Spirits',
 'Bartow County Senior Scho

In [10]:
keep_these_dict = {i:i.split("/")[0] for i in keep_these if "/" in i }
keep_these_dict

{'Attorney General/ Fiscal General': 'Attorney General',
 'Commissioner of Agriculture/ Comisionado de Agricultura': 'Commissioner of Agriculture',
 'Commissioner of Insurance/ Comisionado de Seguros': 'Commissioner of Insurance',
 'Commissioner of Labor/ Comisionado de Trabajo': 'Commissioner of Labor',
 'Governor/Gobernador': 'Governor',
 'Lieutenant Governor/ Vicegobernador': 'Lieutenant Governor',
 'Secretary of State/ Secretario de Estado': 'Secretary of State',
 'State House of Representatives - District 100/Para Representante Estatal ante la Asamblea General, Distrito 100': 'State House of Representatives - District 100',
 'State House of Representatives - District 101/Para Representante Estatal ante la Asamblea General, Distrito 101': 'State House of Representatives - District 101',
 'State House of Representatives - District 102/Para Representante Estatal ante la Asamblea General, Distrito 102': 'State House of Representatives - District 102',
 'State House of Representatives 

In [11]:
ga_22_election_combined["contest"] = ga_22_election_combined["contest"].map(keep_these_dict).fillna(ga_22_election_combined["contest"])
ga_22_election_statewide = ga_22_election_combined[~ga_22_election_combined["contest"].isin(remaining)]
ga_22_election_statewide["contest"] = ga_22_election_statewide["contest"].str.title()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide["contest"] = ga_22_election_statewide["contest"].str.title()


In [12]:
holder = list(ga_22_election_statewide["contest"].unique())
holder.sort()
holder

['Attorney General',
 'Commissioner Of Agriculture',
 'Commissioner Of Insurance',
 'Commissioner Of Labor',
 'Constitutional Amendment #1',
 'Constitutional Amendment #2',
 'Governor',
 'Lieutenant Governor',
 'Proposed Constitutional Amendment 1',
 'Proposed Constitutional Amendment 2',
 'Proposed Statewide Referendum 1',
 'Proposed Statewide Referendum 2',
 'Secretary Of State',
 'State House - District 128',
 'State House - District 133',
 'State House - District 145',
 'State House - District 149',
 'State House - District 150',
 'State House - District 155',
 'State House - District 156',
 'State House - District 157',
 'State House - District 158',
 'State House - District 159',
 'State House - District 160',
 'State House - District 161',
 'State House - District 167',
 'State House - District 168',
 'State House - District 178',
 'State House Dist 1',
 'State House Dist 100',
 'State House Dist 11',
 'State House Dist 116',
 'State House Dist 12',
 'State House Dist 13',
 'Sta

In [13]:
# Cast the votes to an integer
ga_22_election_statewide["num_votes"] = ga_22_election_statewide["num_votes"].astype(int)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide["num_votes"] = ga_22_election_statewide["num_votes"].astype(int)


# Add a FIPS column

In [14]:
fips_file = pd.read_csv("./raw-from-source/FIPS/US_FIPS_Codes.csv")
fips_file = fips_file[fips_file["State"]=="Georgia"]
fips_file["FIPS County"] = fips_file["FIPS County"].astype(str)
fips_file["FIPS County"] = fips_file["FIPS County"].str.zfill(3)
fips_file["County Name"] = fips_file["County Name"].replace("De Kalb","DeKalb")
fips_dict = dict(zip(fips_file['County Name'], fips_file['FIPS County']))
ga_22_election_statewide['COUNTYFP'] = ga_22_election_statewide['county'].map(fips_dict).fillna(ga_22_election_statewide['county'])
ga_22_election_statewide['COUNTYFP'] = ga_22_election_statewide['COUNTYFP'].astype(str)
ga_22_election_statewide['COUNTYFP'] = ga_22_election_statewide['COUNTYFP'].str.zfill(3)

# Print statements to check the county FIPs we've added
print(ga_22_election_statewide['COUNTYFP'].unique())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide['COUNTYFP'] = ga_22_election_statewide['county'].map(fips_dict).fillna(ga_22_election_statewide['county'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide['COUNTYFP'] = ga_22_election_statewide['COUNTYFP'].astype(str)


['207' '263' '251' '239' '103' '131' '157' '159' '125' '105' '245' '257'
 '267' '211' '261' '275' '155' '129' '133' '161' '273' '255' '213' '247'
 '281' '153' '137' '139' '149' '277' '253' '221' '215' '271' '249' '135'
 '145' '123' '127' '151' '141' '243' '269' '217' '315' '297' '063' '053'
 '173' '191' '001' '171' '061' '067' '299' '303' '293' '307' '043' '057'
 '071' '199' '167' '169' '189' '073' '059' '047' '309' '295' '023' '287'
 '311' '187' '177' '055' '079' '085' '049' '179' '197' '313' '289' '019'
 '317' '285' '183' '195' '075' '051' '045' '077' '193' '175' '291' '321'
 '025' '015' '087' '083' '021' '031' '017' '027' '007' '181' '185' '319'
 '029' '011' '301' '003' '035' '201' '205' '039' '005' '305' '037' '013'
 '069' '065' '009' '033' '229' '283' '143' '081' '115' '209' '121' '091'
 '147' '279' '223' '219' '227' '117' '093' '095' '119' '233' '225' '241'
 '111' '101' '099' '109' '235' '259' '231' '089' '113' '163' '165' '107'
 '097' '237' '265']


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide['COUNTYFP'] = ga_22_election_statewide['COUNTYFP'].str.zfill(3)


# Clean Candidate Names

## Pivot the data


In [15]:
# Define a UNIQUE_ID column
ga_22_election_statewide["UNIQUE_ID"]=ga_22_election_statewide["COUNTYFP"]+"-"+ga_22_election_statewide["precinct"]

# Add in the vote type (Yes or No) for each contest
ga_22_election_statewide["choice"]=ga_22_election_statewide["choice"]+ga_22_election_statewide["contest"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide["UNIQUE_ID"]=ga_22_election_statewide["COUNTYFP"]+"-"+ga_22_election_statewide["precinct"]
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide["choice"]=ga_22_election_statewide["choice"]+ga_22_election_statewide["contest"]


In [16]:
clean_choice = {'David Raudabaugh (Ind)Commissioner Of Agriculture':'David Raudabaugh (Lib)Commissioner Of Agriculture',
               'Andrew Clyde (I) (Rep)Us House Of Representatives - District 9': 'Andrew Clyde (I) (Rep)Us House Dist 9',
 'Antonio Daza (Dem)Us House Of Representatives - District 11': 'Antonio Daza (Dem)Us House Dist 11',
 'Austin Scott (I)(Rep)Us House Of Representatives - District 8': 'Austin Scott (I) (Rep)Us House Of Representatives - District 8',
 'Barry Fleming(I) (Rep)State House Of Representatives - District 125': 'Barry Fleming (I) (Rep)State House Of Representatives - District 125',
 'Barry Loudermilk (I) (Rep)Us House Of Representatives - District 11': 'Barry Loudermilk (I) (Rep)Us House Dist 11',
 'Ben Watson (I)(Rep)State Senate - District 1': 'Ben Watson (I) (Rep)State Senate - District 1',
 'Beth Camp (I) (Rep)State House Of Representatives - District 135': 'Beth Camp (I) (Rep)State House Dist 135',
 'Bill Hitchens (I) (Rep)State House Of Representatives - District 161': 'Bill Hitchens (I) (Rep)State House - District 161',
 'Bob Christian (Dem)Us House Of Representatives - District 6': 'Bob Christian (Dem)Us House Dist 6',
 'Brent Cox (Rep)State House Of Representatives - District 28': 'Brent Cox (Rep)State House Dist 28',
 'Bruce Bennington (Rep)State House Of Representatives - District 116': 'Bruce Bennington (Rep)State House Dist 116',
 'Buddy DeLoach (I)(Rep)State House - District 167': 'Buddy DeLoach (I) (Rep)State House - District 167',
 'Buddy Deloach (I) (Rep)State House - District 167': 'Buddy DeLoach (I) (Rep)State House - District 167',
 'Buddy Deloach (I) (Rep)State House Of Representatives - District 167': 'Buddy DeLoach (I) (Rep)State House - District 167',
 'Caesar Gonzales (Rep)Us House Of Representatives - District 13': 'Caesar Gonzales (Rep)Us House Dist 13',
 'Chase Oliver (Lib)Us Senate': 'Chase Oliver (L)Us Senate',
 'Chris Benton (Dem)State Senate Dist 18': 'Chris Benton (Dem)State Senate - District 18',
 'Chris West (Rep)Us House Of Representatives - District 2': 'Chris West (Rep)Us House Dist 2',
 'Christian Zimm (Rep)Us House Of Representatives - District 5': 'Christian Zimm (Rep)Us House Dist 5',
 'Claudia Wood (Dem)State House Of Representatives - District 28': 'Claudia Wood (Dem)State House Dist 28',
 'Danny Mathis (I) (Rep)State House Of Representatives - District 149': 'Danny Mathis (I) (Rep)State House - District 149',
 'Danny Mathis (I)(Rep)State House Of Representatives - District 149': 'Danny Mathis (I) (Rep)State House - District 149',
 "Dar'shun Kendrick (I) (Dem)State House Of Representatives - District 95": "Dar'Shun Kendrick (I) (Dem)State House Of Representatives - District 95",
 'Darrius Butler (Democrat)Us House Of Representatives - District 8': 'Darrius Butler (Dem)Us House Of Representatives - District 8',
 'David Clark (I) (Rep)State House Of Representatives - District 100': 'David Clark (I) (Rep)State House Dist 100',
 'David Jenkins (I) (Rep)State House Of Representatives - District 136': 'David Jenkins (I) (Rep)State House Dist 136',
 'David Scott (I) (Dem)Us House Of Representatives - District 13': 'David Scott (I) (Dem)Us House Dist 13',
 'Debbie G. Buckner (I) (Dem)State House Of Representatives - District 137': 'Debbie G. Buckner (I) (Dem)State House Dist 137',
 'Demetrius Douglas (I) (Dem)State House Of Representatives - District 78': 'Demetrius Douglas (I) (Dem)State House Dist 78',
 'Don L. Parsons (I) (Rep)State House Of Representatives - District 44': 'Don L. Parsons (I) (Rep)State House Dist 44',
 'Drew Ferguson (I) (Rep)Us House Of Representatives - District 3': 'Drew Ferguson (I) (Rep)Us House Dist 3',
 'Earl L. ""Buddy"" Carter (I)(Rep)Us House Of Representatives - District 1': 'Earl L. ""Buddy"" Carter (I) (Rep)Us House Of Representatives - District 1',
 'Ed Setzler (Rep)State Senate Dist 37': 'Ed Setzler (Rep)State Senate - District 37',
 'El-Mahdi Holly (I) (Dem)State House Of Representatives - District 116': 'El-Mahdi Holly (I) (Dem)State House Dist 116',
 "Elizabeth 'Liz' Johnson (Dem)Us House Of Representatives - District 12": 'Elizabeth ""Liz"" Johnson (Dem)Us House Of Representatives - District 12',
 'Ellen Wright (Dem)State Senate Dist 29': 'Ellen Wright (Dem)State Senate - District 29',
 'Emily Anderson (Lib)Commissioner Of Labor': 'Emily Anderson (L)Commissioner Of Labor',
 'Fred Glass (Rep)State Senate Dist 6': 'Fred Glass (Rep)State Senate - District 6',
 'Freddie Powell Sims (I) (Dem)State Senate Dist 12': 'Freddie Powell Sims (I) (Dem)State Senate - District 12',
 'Gail Davenport (I) (Dem)State Senate Dist 44': 'Gail Davenport (I) (Dem)State Senate - District 44',
 'Henry C. ""Hank"" Johnson Jr (I) (Dem)Us House Of Representatives - District 4': 'Henry C ""Hank"" Johnson Jr (I) (Dem)Us House Of Representatives - District 4',
 'Hoganne Harrison Walton (Dem)State House Of Representatives - District 133': 'Hoganne Harrison Walton (Dem)State House - District 133',
 'Horacena Tate (I) (Dem)State Senate Dist 38': 'Horacena Tate (I) (Dem)State Senate - District 38',
 'Jason Esteves (Dem)State Senate Dist 6': 'Jason Esteves (Dem)State Senate - District 6',
 'John Albers (I) (Rep)State Senate Dist 56': 'John Albers (I) (Rep)State Senate - District 56',
 'John Carson (I) (Rep)State House Of Representatives - District 46': 'John Carson (I) (Rep)State House Dist 46',
 'John F. Kennedy (I) (Rep)State Senate Dist 18': 'John F. Kennedy (I) (Rep)State Senate - District 18',
 'Jon G. Burns (I) (Rep)State House Of Representatives - District 159': 'Jon G. Burns (I) (Rep)State House - District 159',
 'Jordan Ridley (Rep)State House Of Representatives - District 22': 'Jordan Ridley (Rep)State House Dist 22',
 'Josh Uddin (Dem)State Senate Dist 48': 'Josh Uddin (Dem)State Senate - District 48',
 'Joyce Barlow (Dem)State House Of Representatives - District 151': 'Joyce Barlow (Dem)State House Dist 151',
 'June Krise (Dem)State House Of Representatives - District 8': 'June Krise (Dem)State House Dist 8',
 'Justin Rickett (Rep)State House Of Representatives - District 137': 'Justin Rickett (Rep)State House Dist 137',
 'Karen Mathiak (I) (Rep)State House Of Representatives - District 74': 'Karen Mathiak (I) (Rep)State House Dist 74',
 'Kay Kirkpatrick (I) (Rep)State Senate Dist 32': 'Kay Kirkpatrick (I) (Rep)State Senate - District 32',
 'Kenneth Vance (Rep)State House Of Representatives - District 133': 'Kenneth Vance (Rep)State House - District 133',
 'Larry Walker (I)(Rep)State Senate - District 20': 'Larry Walker (I) (Rep)State Senate - District 20',
 'Lee Hawkins (I) (Rep)State House Of Representatives - District 27': 'Lee Hawkins (I) (Rep)State House Dist 27',
 'Leesa Hagan (I) (Rep)State House Of Representatives - District 156': 'Leesa Hagan (I) (Rep)State House - District 156',
 'Lehman Franklin (Rep)State House Of Representatives - District 160': 'Lehman Franklin (Rep)State House - District 160',
 'Lethia J. Kittrell (Dem)State House Of Representatives - District 156': 'Lethia J. Kittrell (Dem)State House - District 156',
 'Louisa Shell Jackson (Dem)State House Of Representatives - District 100': 'Louisa Shell Jackson (Dem)State House Dist 100',
 'Lucy McBath (I) (Dem)Us House Of Representatives - District 7': 'Lucy McBath (I) (Dem)Us House Dist 7',
 'Mack Jackson(I) (Dem)State House Of Representatives - District 128': 'Mack Jackson (I) (Dem)State House - District 128',
 'Marcus Flowers (Dem)Us House Of Representatives - District 14': 'Marcus Flowers (Dem)Us House Dist 14',
 'Margo Barbee (Dem)State House Of Representatives - District 161': 'Margo Barbee (Dem)State House - District 161',
 'Marjorie Taylor Greene (I) (Rep)Us House Of Representatives - District 14': 'Marjorie Taylor Greene (I) (Rep)Us House Dist 14',
 'Mark Gonsalves (Rep)Us House Of Representatives - District 7': 'Mark Gonsalves (Rep)Us House Dist 7',
 'Martin Cowen (Lib)Attorney General': 'Martin Cowen (L)Attorney General',
 'Marty Harbin (I) (Rep)State Senate Dist 16': 'Marty Harbin (I) (Rep)State Senate - District 16',
 'Mary Robichaux (I) (Dem)State House Of Representatives - District 48': 'Mary Robichaux (I) (Dem)State House Dist 48',
 'Matt Hatchett (I) (Rep)State House Of Representatives - District 155': 'Matt Hatchett (I) (Rep)State House - District 155',
 'Max Burns(I) (Rep)State Senate - District 23': 'Max Burns (I) (Rep)State Senate - District 23',
 'Michael ""Mike"" Ford (Dem)Us House Of Representatives - District 9': 'Michael ""Mike"" Ford (Dem)Us House Dist 9',
 'Micheal Garza (Dem)State House Of Representatives - District 46': 'Micheal Garza (Dem)State House Dist 46',
 'Mike Cheokas (I) (Rep)State House Of Representatives - District 151': 'Mike Cheokas (I) (Rep)State House Dist 151',
 'Nikema Williams (I) (Dem)Us House Of Representatives - District 5': 'Nikema Williams (I) (Dem)Us House Dist 5',
 'NoProposed Constitutional Amendment 1': 'NoConstitutional Amendment #1',
 'NoProposed Constitutional Amendment 2': 'NoConstitutional Amendment #2',
 'NoStatewide Referendum A': 'NoProposed Statewide Referendum 1',
 'NoStatewide Referendum B': 'NoProposed Statewide Referendum 2',
 'NoStatewide Referendum Question 1': 'NoProposed Statewide Referendum 1',
 'NoStatewide Referendum Question 2': 'NoProposed Statewide Referendum 2',
 'NoStatewide Referendum Question A': 'NoProposed Statewide Referendum 1',
 'NoStatewide Referendum Question B': 'NoProposed Statewide Referendum 2',
 'Patrick Thompson (Dem)State Senate Dist 56': 'Patrick Thompson (Dem)State Senate - District 56',
 'Patty Bentley (I) (Dem)State House Dist 150': 'Patty Bentley (I) (Dem)State House - District 150',
 'Patty Bentley (I) (Dem)State House Of Representatives - District 150': 'Patty Bentley (I) (Dem)State House - District 150',
 'Penny Houston (Rep)State House Of Representatives - District 170': 'Penny Houston (I) (Rep)State House Of Representatives - District 170',
 'Pingke Dubignon (Dem)State Senate Dist 16': 'Pingke Dubignon (Dem)State Senate - District 16',
 'Randy Robertson (I) (Rep)State Senate Dist 29': 'Randy Robertson (I) (Rep)State Senate - District 29',
 'Rich McCormick (Rep)Us House Of Representatives - District 6': 'Rich McCormick (Rep)Us House Dist 6',
 'Rick W. Allen(I) (Rep)Us House Of Representatives - District 12': 'Rick W. Allen (I) (Rep)Us House Of Representatives - District 12',
 'Robert Dickey (I) (Rep)State House Dist 145': 'Robert Dickey (I) (Rep)State House - District 145',
 'Robert Dickey (I) (Rep)State House Of Representatives - District 145': 'Robert Dickey (I) (Rep)State House - District 145',
 'Ryan Graham (Lib)Lieutenant Governor': 'Ryan Graham (L)Lieutenant Governor',
 'Sanford Bishop (I) (Dem)Us House Of Representatives - District 2': 'Sanford Bishop (I) (Dem)Us House Dist 2',
 'Scott Hilton (Rep)State House Of Representatives - District 48': 'Scott Hilton (Rep)State House Dist 48',
 'Shane Hazel (Lib)Governor': 'Shane Hazel (L)Governor',
 'Shawn Still (Rep)State Senate Dist 48': 'Shawn Still (Rep)State Senate - District 48',
 'Shea Roberts (I) (Dem)State House Of Representatives - District 52': 'Shea Roberts (I) (Dem)State House Dist 52',
 'Stacee Lashone Hill (Dem)State House Of Representatives - District 22': 'Stacee Lashone Hill (Dem)State House Dist 22',
 'Stan Gunter (I) (Rep)State House Of Representatives - District 8': 'Stan Gunter (I) (Rep)State House Dist 8',
 'Steve Gooch (I) (Rep)State Senate Dist 51': 'Steve Gooch (I) (Rep)State Senate - District 51',
 'Steven Meeks (I) (Rep)State House Of Representatives - District 178': 'Steven Meeks (I) (Rep)State House - District 178',
 'Sylvia L. Bennett (Dem)State Senate Dist 32': 'Sylvia L. Bennett (Dem)State Senate - District 32',
 'Ted Metz (Lib)Secretary Of State': 'Ted Metz (L)Secretary Of State',
 'Val Almonord (Dem)Us House Of Representatives - District 3': 'Val Almonord (Dem)Us House Dist 3',
 'Vanessa Parker (Dem)State Senate Dist 37': 'Vanessa Parker (Dem)State Senate - District 37',
 'Wendy Ahrenkiel (Rep)State House Of Representatives - District 52': 'Wendy  Ahrenkiel (Rep)State House Dist 52',
 'Will Wade (I) (Rep)State House Of Representatives - District 9': 'Will Wade (I) (Rep)State House Dist 9',
 'William ""Bill"" Werkheiser (I) (Rep)State House Of Representatives - District 157': 'William ""Bill"" Werkheiser (I) (Rep)State House - District 157',
 'William ""Will"" Boddie, Jr. (Dem)Commissioner Of Labor': 'William ""Will"" Boddie, Jr (Dem)Commissioner Of Labor',
 'William Harris (Dem)State House Of Representatives - District 74': 'William Harris (Dem)State House Dist 74',
 'Willie Mae Oyogoa (Dem)State House Of Representatives - District 44': 'Willie Mae Oyogoa (Dem)State House Dist 44',
 'YesConstitutional Amendment #1': 'Yes / SíProposed Constitutional Amendment 1',
 'YesConstitutional Amendment #2': 'Yes / SíProposed Constitutional Amendment 2',
 'YesProposed Constitutional Amendment 1': 'Yes / SíProposed Constitutional Amendment 1',
 'YesProposed Constitutional Amendment 2': 'Yes / SíProposed Constitutional Amendment 2',
 'YesStatewide Referendum A': 'Yes / SíProposed Statewide Referendum 1',
 'YesStatewide Referendum B': 'Yes / SíProposed Statewide Referendum 2',
 'YesStatewide Referendum Question 1': 'Yes / SíProposed Statewide Referendum 1',
 'YesStatewide Referendum Question 2': 'Yes / SíProposed Statewide Referendum 2',
 'YesStatewide Referendum Question A': 'Yes / SíProposed Statewide Referendum 1',
 'YesStatewide Referendum Question B': 'Yes / SíProposed Statewide Referendum 2',
 'David Raudabaugh (L)Commissioner Of Agriculture':'David Raudabaugh (Lib)Commissioner Of Agriculture',
 'Darrius Butler(Dem)Us House Of Representatives - District 8':'Darrius Butler (Dem)Us House Of Representatives - District 8',
'Tabitha Johnson- Green (Dem)Us House Of Representatives - District 10': 'Tabitha Johnson-Green (Dem)Us House Of Representatives - District 10'}

In [17]:
ga_22_election_statewide["choice"] = ga_22_election_statewide["choice"].str.strip()
ga_22_election_statewide["choice"] = ga_22_election_statewide["choice"].map(clean_choice).fillna(ga_22_election_statewide["choice"])
# Perform the pivot, clean the resulting dataframe
ga_22_election_statewide_pivot =pd.pivot_table(ga_22_election_statewide,index=["UNIQUE_ID","county","COUNTYFP","precinct"],columns=["choice"],values=['num_votes'],aggfunc=sum)
ga_22_election_statewide_pivot = ga_22_election_statewide_pivot.fillna(0)
ga_22_election_statewide_pivot.columns = ga_22_election_statewide_pivot.columns.droplevel(0)
ga_22_election_statewide_pivot.reset_index(inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide["choice"] = ga_22_election_statewide["choice"].str.strip()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  ga_22_election_statewide["choice"] = ga_22_election_statewide["choice"].map(clean_choice).fillna(ga_22_election_statewide["choice"])


In [18]:
def get_race(race_string):
    race_string = race_string.title()
    race_string = race_string.replace("(Vote For 1)","")
    if "U.S. House" in race_string or 'Us House' in race_string:
        return "CON"
    elif "State House" in race_string:
        return "SL"
    elif "State Senate" in race_string:
        return "SU"
    elif "President" in race_string:
        return "PRE"
    elif "US Senate" in race_string or "Us Senate" in race_string:
        return "USS"
    elif "Public Service" in race_string:
        return "PSC"
    elif "Attorney General" in race_string:
        return "ATG"
    elif "Auditor General" in race_string:
        return "AUD"
    elif "Treasurer" in race_string:
        return "TRE"
    elif "Superintendent" in race_string:
        return "SUP"
    elif "Secretary Of State" in race_string:
        return "SOS"
    elif "Lieutenant Governor" in race_string:
        return "LTG"
    elif "Governor" in race_string:
        return "GOV"
    elif "Commissioner Of Labor" in race_string:
        return "LAB"
    elif "Commissioner Of Agriculture" in race_string:
        return "AGR"
    elif "Commissioner Of Insurance" in race_string:
        return "INS"
    elif "Amendment" in race_string:
        if "1" in race_string:
            return "A01"
        elif "2" in race_string:
            return "A02"
        else:
            print("No race for:", race_string)
            raise ValueError
    elif "Referendum" in race_string:
        if "1" in race_string or "A" in race_string:
            return "RFA"
        elif "2" in race_string or "B" in race_string:
            return "RFB"
        else:
            print("No race for:", race_string)
            raise ValueError
    else:
        print("No race for:", race_string)
        raise ValueError
        
def get_election_type(race_string):
    if "(runoff)" in race_string:
        return "R"
    else:
        return "G"
        
def get_party(race_string):
    if "(Rep)" in race_string:
        return "R"
    elif "(Dem)" in race_string or "(Democrat)" in race_string:
        return "D"
    elif "(Lib)" in race_string or "(L)" in race_string:
        return "L"
    elif race_string[0:3]=="Yes":
        return "YES"
    elif race_string[0:2]=="No":
        return "NO"
    else:
        print(race_string)
        return ""
           
def get_name(name_string):
    if " (" in name_string and "Amendment" not in name_string and "Referendum" not in name_string:
        #print(name_string)
        name_string = name_string.split(" (")[0]
        name_string = name_string.replace("'","")
        likely_last = name_string.split(" ")[-1]
        proposed_last = likely_last[:3]
        if proposed_last in ['II', 'III', 'Jr', 'Jr.', 'Sr.', 'JR.', "JR", "IV"]:
            likely_last = name_string.split(" ")[-2]
            proposed_last = likely_last[:3]
        #print(proposed_last.upper())
        return proposed_last.upper()
    else:
        return ""
#     name_string = name_string.split("-:-")[1]
#     name_string = name_string.replace(" (I)","")
#     name_string = name_string.replace("'","")
#     likely_last = name_string.split(" ")[-1]
#     proposed_last = likely_last[:3]
#     if proposed_last in ['II', 'III', 'Jr', 'Jr.', 'Sr.', 'JR.', "JR", "IV"]:
#         likely_last = name_string.split(" ")[-2]
#         proposed_last = likely_last[:3]
#     return proposed_last.upper()

def get_district(race_string, fill_level):
    race_string = race_string.replace(" (Vote For 1)","")
    if "Dist " in race_string:
        break_word = "Dist "
    elif "District " in race_string:
        break_word = "District "
    temp = race_string.split(break_word)[1]
    return temp.zfill(fill_level)

def column_rename_function(name_string):
    election_type = get_election_type(name_string)
    year = "22"
    party = get_party(name_string)
    race = get_race(name_string)
    district = ""
    if race in ["CON", "SU"]:
        district = get_district(name_string, 2)
        year = ""
    elif race in ["SL"]:
        district = get_district(name_string, 3)
        year = ""
    name = get_name(name_string)
    new_col_name = election_type + year + race + district + party + name
    if len(new_col_name) > 10:
        print(name_string)
        print(new_col_name)
    return new_col_name

# Make a dictionary that points to the new column names and checks for duplicates
race_columns = [i for i in ga_22_election_statewide_pivot.columns if i not in ['UNIQUE_ID', 'county', 'COUNTYFP', 'precinct']]

race_updates_dict = {}
race_updates_reversed = {}
clean_dups = {}
new_names = []
for val in race_columns:
    new_name = column_rename_function(val)
    race_updates_dict[val] = new_name
    if new_name not in new_names:
        new_names.append(new_name)
        race_updates_reversed[new_name] = val
    else:
        print("Duplicate", new_name)
        print(race_updates_reversed[new_name])
        print(val)
        clean_dups[val] = race_updates_reversed[new_name]

In [19]:
ga_22_election_statewide_pivot.rename(columns = race_updates_dict, inplace = True)
pd.DataFrame(race_updates_dict.items()).to_csv("./field_names.csv", index = False)
ga_22_election_statewide_pivot.reset_index(inplace = True, drop = True)

for col in race_updates_dict.values():
    ga_22_election_statewide_pivot[col] = ga_22_election_statewide_pivot[col].astype(int)
    
partial_col_names = list(race_updates_dict.values())
partial_col_names.sort()

ga_22_election_statewide_pivot = ga_22_election_statewide_pivot[["UNIQUE_ID", "county", "COUNTYFP", "precinct"]+partial_col_names]


In [20]:
precinct_names = list(ga_22_election_statewide_pivot["precinct"].unique())
precinct_names.sort()

# Check Statewide Totals


In [21]:
general_st_totals = pd.read_csv("./raw-from-source/statewide_checks/summary 6.csv")

In [22]:
combined_st_totals = pd.concat([general_st_totals])
combined_st_totals["cleaner"] = combined_st_totals["choice name"] + combined_st_totals["contest name"]


In [23]:
race_columns = [i for i in list(combined_st_totals["cleaner"].unique()) if "Judicial" not in i]

race_updates_dict = {}
race_updates_reversed = {}
clean_dups = {}
new_names = []
for val in race_columns:
    val_og = val
    if "/" in val:
        val = val.split("/")[0]
    new_name = column_rename_function(val)
    race_updates_dict[val_og] = new_name
    if new_name not in new_names:
        new_names.append(new_name)
        race_updates_reversed[new_name] = val
    else:
        print("Duplicate", new_name)
        print(race_updates_reversed[new_name])
        print(val)
        clean_dups[val] = race_updates_reversed[new_name]

In [24]:
combined_st_totals["cleaner"] = combined_st_totals["cleaner"].map(race_updates_dict).fillna(combined_st_totals["cleaner"])


In [25]:
for val in race_updates_dict.values():
    official = combined_st_totals.loc[combined_st_totals["cleaner"]==val,"total votes"].values[0]
    rdh = sum(ga_22_election_statewide_pivot[val])
    if official!= rdh:
        print(val)
        print("\tOfficial", official)
        print("\tRDH", rdh)

G22A01YES
	Official 3375437
	RDH 3381576
G22A01NO
	Official 439514
	RDH 440440
G22A02YES
	Official 3532212
	RDH 3538623
G22A02NO
	Official 313308
	RDH 314067
G22RFBYES
	Official 2885541
	RDH 2891000
G22RFBNO
	Official 888336
	RDH 889932


# Check Countywide Totals


In [26]:
loaded_counties = os.listdir("./raw-from-source/county_checks/")
z=[]
for locale in loaded_counties:
    if locale.endswith('.xml'):
        file_string = "./raw-from-source/county_checks/"+locale
        xtree = et.parse(file_string)
        xroot = xtree.getroot()
        county_area = xroot.findall(".//Region")
        for i in county_area:
            county = i.text
        contests = xroot.findall(".//Contest")
        for i in contests:
            contest = i.attrib.get('text')
            lower = i.findall("./Choice")
            for j in lower:
                choice = j.attrib.get('text')
                lower_2 = j.findall("./VoteType")
                for k in lower_2:
                    voting_method = k.attrib.get('name')
                    lower_3 = k.findall("./County")
                    for l in lower_3:
                        precinct_name = l.attrib.get('name')
                        num_votes = l.attrib.get('votes')
                        if locale == "detail 2.xml":
                            elec_type = "general"
                        else:
                            elec_type = "runoff"
                        z.append([county,contest,choice,voting_method,precinct_name,num_votes, elec_type])
dfcols = ['county','contest','choice','voting_method','precinct','num_votes',"type"]
df_county = pd.DataFrame(z,columns=dfcols)

In [27]:
df_county["choice"] = np.where(df_county["type"]=="runoff", df_county["choice"] + "(runoff)", df_county["choice"])
df_county["num_votes"] = df_county["num_votes"].astype(int)
df_county["pivot"] = df_county["choice"] + df_county["contest"]


In [28]:
ga_22_election_countywide_pivot =pd.pivot_table(df_county,index=["precinct"],columns=["pivot"],values=['num_votes'],aggfunc=sum)
ga_22_election_countywide_pivot = ga_22_election_countywide_pivot.fillna(0)
ga_22_election_countywide_pivot.columns = ga_22_election_countywide_pivot.columns.droplevel(0)
ga_22_election_countywide_pivot.reset_index(inplace = True)

In [29]:
race_columns = [i for i in list(ga_22_election_countywide_pivot.columns) if "Judicial" not in i and i != "precinct"]

race_updates_dict = {}
race_updates_reversed = {}
clean_dups = {}
new_names = []
for val in race_columns:
    val_og = val
    if "/" in val:
        val = val.split("/")[0]
    new_name = column_rename_function(val)
    race_updates_dict[val_og] = new_name
    if new_name not in new_names:
        new_names.append(new_name)
        race_updates_reversed[new_name] = val
    else:
        print("Duplicate", new_name)
        print(race_updates_reversed[new_name])
        print(val)
        clean_dups[val] = race_updates_reversed[new_name]

In [30]:
ga_22_election_countywide_pivot.rename(columns = race_updates_dict, inplace = True)


In [31]:
ga_22_election_countywide_pivot['COUNTYFP'] = ga_22_election_countywide_pivot['precinct'].map(fips_dict).fillna(ga_22_election_countywide_pivot['precinct'])
ga_22_election_countywide_pivot['COUNTYFP'] = ga_22_election_countywide_pivot['COUNTYFP'].astype(str)
ga_22_election_countywide_pivot['COUNTYFP'] = ga_22_election_countywide_pivot['COUNTYFP'].str.zfill(3)

# Print statements to check the county FIPs we've added
print(ga_22_election_countywide_pivot['COUNTYFP'].unique())

['001' '003' '005' '007' '009' '011' '013' '015' '017' '019' '021' '023'
 '025' '027' '029' '031' '033' '035' '037' '039' '043' '045' '047' '049'
 '051' '053' '055' '057' '059' '061' '063' '065' '067' '069' '071' '073'
 '075' '077' '079' '081' '083' '085' '089' '087' '091' '093' '095' '097'
 '099' '101' '103' '105' '107' '109' '111' '113' '115' '117' '119' '121'
 '123' '125' '127' '129' '131' '133' '135' '137' '139' '141' '143' '145'
 '147' '149' '151' '153' '155' '157' '159' '161' '163' '165' '167' '169'
 '171' '173' '175' '177' '179' '181' '183' '185' '187' '193' '195' '197'
 '189' '191' '199' '201' '205' '207' '209' '211' '213' '215' '217' '219'
 '221' '223' '225' '227' '229' '231' '233' '235' '237' '239' '241' '243'
 '245' '247' '249' '251' '253' '255' '257' '259' '261' '263' '265' '267'
 '269' '271' '273' '275' '277' '279' '281' '283' '285' '287' '289' '291'
 '293' '295' '297' '299' '301' '303' '305' '307' '309' '311' '313' '315'
 '317' '319' '321']


In [32]:
pdv.county_totals_check(ga_22_election_countywide_pivot, "County", ga_22_election_statewide_pivot, "RDH", partial_col_names, "COUNTYFP", full_print=False, method='county')


***Countywide Totals Check***



  holder_1 = partner_df.groupby(county_col).sum()
  holder_2 = source_df.groupby(county_col).sum()


107 contains differences in these races:
	G22A01NO has a difference of -926.0 vote(s)
		County: 0.0 vote(s)
		RDH: 926 vote(s)
	G22A01YES has a difference of -6139.0 vote(s)
		County: 0.0 vote(s)
		RDH: 6139 vote(s)
	G22A02NO has a difference of -759.0 vote(s)
		County: 0.0 vote(s)
		RDH: 759 vote(s)
	G22A02YES has a difference of -6411.0 vote(s)
		County: 0.0 vote(s)
		RDH: 6411 vote(s)
	G22RFBNO has a difference of -1596.0 vote(s)
		County: 0.0 vote(s)
		RDH: 1596 vote(s)
	G22RFBYES has a difference of -5459.0 vote(s)
		County: 0.0 vote(s)
		RDH: 5459 vote(s)

['107']
Counties that match:

['001', '003', '005', '007', '009', '011', '013', '015', '017', '019', '021', '023', '025', '027', '029', '031', '033', '035', '037', '039', '043', '045', '047', '049', '051', '053', '055', '057', '059', '061', '063', '065', '067', '069', '071', '073', '075', '077', '079', '081', '083', '085', '087', '089', '091', '093', '095', '097', '099', '101', '103', '105', '109', '111', '113', '115', '117', '

In [33]:
ga_22_election_statewide_pivot.columns

Index(['UNIQUE_ID', 'county', 'COUNTYFP', 'precinct', 'G22A01NO', 'G22A01YES',
       'G22A02NO', 'G22A02YES', 'G22AGRDHEM', 'G22AGRLRAU',
       ...
       'GSU49RECH', 'GSU50DWIL', 'GSU50RHAT', 'GSU51RGOO', 'GSU52RHUF',
       'GSU53RMOO', 'GSU54RPAY', 'GSU55DBUT', 'GSU56DTHO', 'GSU56RALB'],
      dtype='object', name='choice', length=413)

In [34]:
race_cols = list(ga_22_election_statewide_pivot.columns[3:])
race_cols.sort()

In [35]:
race_cols.remove("precinct")

In [36]:
ga_22_election_statewide_pivot = ga_22_election_statewide_pivot[["UNIQUE_ID", "COUNTYFP", "county", "precinct"] + race_cols]


In [37]:
[i for i in ga_22_election_statewide_pivot.columns if len(i)>10]


[]

In [38]:
ga_22_election_statewide_pivot.to_csv("./ga_2022_gen_prec/ga_2022_gen_prec.csv", index = False)


In [39]:
ga_22_election_statewide_pivot

choice,UNIQUE_ID,COUNTYFP,county,precinct,G22A01NO,G22A01YES,G22A02NO,G22A02YES,G22AGRDHEM,G22AGRLRAU,...,GSU49RECH,GSU50DWIL,GSU50RHAT,GSU51RGOO,GSU52RHUF,GSU53RMOO,GSU54RPAY,GSU55DBUT,GSU56DTHO,GSU56RALB
0,001-1B,001,Appling,1B,83,690,84,694,77,8,...,0,0,0,0,0,0,0,0,0,0
1,001-1C,001,Appling,1C,64,532,55,548,43,5,...,0,0,0,0,0,0,0,0,0,0
2,001-2,001,Appling,2,163,782,111,859,608,19,...,0,0,0,0,0,0,0,0,0,0
3,001-3A1,001,Appling,3A1,77,434,62,454,17,2,...,0,0,0,0,0,0,0,0,0,0
4,001-3C,001,Appling,3C,110,778,88,811,180,10,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2717,321-Scooterville,321,Worth,Scooterville,56,314,62,309,19,3,...,0,0,0,0,0,0,0,0,0,0
2718,321-Sumner,321,Worth,Sumner,69,454,56,471,73,6,...,0,0,0,0,0,0,0,0,0,0
2719,321-Sylvester,321,Worth,Sylvester,197,1163,147,1240,856,17,...,0,0,0,0,0,0,0,0,0,0
2720,321-Sylvester East,321,Worth,Sylvester East,97,874,102,876,137,12,...,0,0,0,0,0,0,0,0,0,0


In [40]:
ga_22_election_statewide_pivot.shape

(2722, 413)