In [2]:
import pandas as pd # standard python data library
import geopandas as gp # the geo-version of pandas
import numpy as np 
import os
import fiona
from statistics import mean, median
from pandas import read_csv
gp.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw' #To load KML files
import string
import xml.etree.ElementTree as et

# Utah 2016

## VEST Documentation

### Data Sources

Election results from OpenElections.

Shapefile from the Utah Geographic Reference Center (https://gis.utah.gov/data/political/voter-precincts/).


### Election Processing

The shapefile from the UGRC is of subprecincts, breaking down precincts in cases of district splits. In some cases, results are reported at the subprecinct level, in most cases, they weren't, so merging was done where necessary.


### Candidates

G16PRERTRU - Donald J. Trump (Republican Party) 
G16PREDCLI - Hillary Rodham Clinton (Democratic Party)
G16PRELJOH - Gary Johnson (Libertarian Party)
G16PREISTE - Jill Stein (Unaffiliated)
G16PREIMCM - Evan McMullen (Unaffiliated)
G16PREOOTH - Other candidates on ballot

G16USSRLEE - Mike Lee (Republican Party)
G16USSDSNO - Misty K. Snow (Democratic Party)
G16USSAFON - Stoney Fonua (Independent American Party)
G16USSIBAR - Bill Barron (Unaffiliated)

G16GOVRHER - Gary R. Herbert (Republican Party)
G16GOVDWEI - Mike Weinholtz (Democratic Party)
G16GOVLKAM - Brian E. Kamerath (Libertarian Party)
G16GOVASCH - Superdell Schanze (Independent American Party)

G16ATGRREY - Sean D. Reyes (Republican Party)
G16ATGDHAR - Jon V. Harper (Democratic Party)
G16ATGLMCC - W. Andrew McCullogh (Libertarian Party)
G16ATGAISB - Michael W. IsBell (Independent American Party)

G16AUDRDOU - John Rougall (Republican Party)
G16AUDDMIT - Mike Mitchell (Democratic Party)
G16AUDAGRE - Jared Green (Independent American Party)

G16TRERDAM - David Damschen (Republican Party)
G16TREDHAN - Neil A. Hansen (Democratic Party)
G16TRECPRO - Richard Proctor (Constitution Party)


In [3]:
#Load UT election results at the precinct level
ut_election_results = pd.read_csv('raw-from-source/20161108__ut__general__precinct.csv')

#Load the final VEST file
ut_vest = gp.read_file("raw-from-source/ut_2016/ut_2016.shp")

#Load the UT 2016 VEST file
ut_vest_2016 = gp.read_file("raw-from-source/ut_2016/ut_2016.shp")

#Load UT election results at the county level
ut_vest_county = pd.read_csv('raw-from-source/20161108__ut__general__county.csv')

#Load in UGRS shapefile
ut_geo = gp.read_file('raw-from-source/Utah_Vista_Ballot_Areas/VistaBallotAreas.shp')

In [4]:
file_list = os.listdir("./raw-from-source/counties")
li = []
for file in file_list:
    if "general" in file:
        #print(file)
        file_ref = "./raw-from-source/counties/"+file
        file_prev = pd.read_csv(file_ref)
        if 'precincts' in file_prev.columns:
            file_prev['precinct'] = file_prev['precincts']
     
        file_prev = file_prev[['county', 'precinct', 'office', 'district', 'party', 'candidate','votes']]
    print(file_prev.shape)
    li.append(file_prev)
county_df = pd.concat(li, axis=0, ignore_index=True)
display(county_df)

(1017, 7)
(10488, 7)
(624, 7)
(432, 7)
(11297, 7)
(853, 7)
(646, 7)
(496, 7)
(216, 7)
(396, 7)
(6004, 7)
(720, 7)
(351, 7)
(4013, 7)
(1408, 7)
(608, 7)
(936, 7)
(1668, 7)
(3702, 7)
(648, 7)
(1908, 7)
(1310, 7)
(185, 7)
(700, 7)
(1224, 7)
(1224, 7)
(108, 7)
(238, 7)
(2275, 7)
(42686, 7)


Unnamed: 0,county,precinct,office,district,party,candidate,votes
0,San Pete,FAIRVIEW MUNICIPAL,President,,,Rocky De La Fuente,0.0
1,San Pete,FAIRVIEW MUNICIPAL,President,,,Jill Stein,2.0
2,San Pete,FAIRVIEW MUNICIPAL,President,,,Alyson Kennedy,0.0
3,San Pete,FAIRVIEW MUNICIPAL,President,,Libertarian,Gary Johnson,12.0
4,San Pete,FAIRVIEW MUNICIPAL,President,,Independent American,Rocky Giordani,0.0
...,...,...,...,...,...,...,...
98376,Salt Lake,SJD023,State House,52.0,Republican,John Knotwell,386.0
98377,Salt Lake,SJD025,State House,52.0,Republican,John Knotwell,377.0
98378,Salt Lake,SJD029,State House,52.0,Republican,John Knotwell,429.0
98379,Salt Lake,SWT001,State House,52.0,Republican,John Knotwell,266.0


In [5]:
list_of_races = ['President', 'U.S. Senate', 'Governor', 'Attorney General', 'State Auditor', 'State Treasurer']
clean_df = county_df[county_df['office'].isin(list_of_races)]

In [6]:
clean_df['candidate_office'] = clean_df['candidate'] + '/' + clean_df['office']
display(clean_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  clean_df['candidate_office'] = clean_df['candidate'] + '/' + clean_df['office']


Unnamed: 0,county,precinct,office,district,party,candidate,votes,candidate_office
0,San Pete,FAIRVIEW MUNICIPAL,President,,,Rocky De La Fuente,0.0,Rocky De La Fuente/President
1,San Pete,FAIRVIEW MUNICIPAL,President,,,Jill Stein,2.0,Jill Stein/President
2,San Pete,FAIRVIEW MUNICIPAL,President,,,Alyson Kennedy,0.0,Alyson Kennedy/President
3,San Pete,FAIRVIEW MUNICIPAL,President,,Libertarian,Gary Johnson,12.0,Gary Johnson/President
4,San Pete,FAIRVIEW MUNICIPAL,President,,Independent American,Rocky Giordani,0.0,Rocky Giordani/President
...,...,...,...,...,...,...,...,...
96109,Salt Lake,WVC076,State Treasurer,,Republican,David Damschen,52.0,David Damschen/State Treasurer
96110,Salt Lake,WVC077,State Treasurer,,Republican,David Damschen,106.0,David Damschen/State Treasurer
96111,Salt Lake,WVC078,State Treasurer,,Republican,David Damschen,133.0,David Damschen/State Treasurer
96112,Salt Lake,WVC079,State Treasurer,,Republican,David Damschen,107.0,David Damschen/State Treasurer


In [7]:
cand_list = clean_df.candidate.unique()
cand_list = [x for x in cand_list if pd.isnull(x) == False and x != 'nan']

cand_office_list = clean_df.candidate_office.unique()
cand_office_list = [x for x in cand_office_list if pd.isnull(x) == False and x != 'nan']

for cand in cand_list:
    #print(cand)
    pass


print('number of rows in original dataframe is:', len(clean_df))

null_precincts = (clean_df[clean_df['candidate_office'].isnull()]) #precincts where candidate is NaN
non_null_df = clean_df.drop(null_precincts.index)
print(len(null_precincts), len(non_null_df))
party_precincts = null_precincts[null_precincts['party'].isin(cand_list)]
party_precincts['candidate_office'] = party_precincts['party'] + '/' + party_precincts['office']
cand_office_df = pd.concat([non_null_df, party_precincts])
display(cand_office_df)
nan_precincts = null_precincts.drop(party_precincts.index)

print(len(cand_office_df), len(nan_precincts))
registered_voters = (nan_precincts[nan_precincts['party'].isnull()])
non_nan = nan_precincts.drop(registered_voters.index)
display(cand_office_df)

number of rows in original dataframe is: 84594
42 84552


Unnamed: 0,county,precinct,office,district,party,candidate,votes,candidate_office
0,San Pete,FAIRVIEW MUNICIPAL,President,,,Rocky De La Fuente,0.0,Rocky De La Fuente/President
1,San Pete,FAIRVIEW MUNICIPAL,President,,,Jill Stein,2.0,Jill Stein/President
2,San Pete,FAIRVIEW MUNICIPAL,President,,,Alyson Kennedy,0.0,Alyson Kennedy/President
3,San Pete,FAIRVIEW MUNICIPAL,President,,Libertarian,Gary Johnson,12.0,Gary Johnson/President
4,San Pete,FAIRVIEW MUNICIPAL,President,,Independent American,Rocky Giordani,0.0,Rocky Giordani/President
...,...,...,...,...,...,...,...,...
53387,Wayne,Hanksville,President,,Jill Stein,,0.0,Jill Stein/President
53388,Wayne,Hanksville,President,,Alyson Kennedy,,0.0,Alyson Kennedy/President
53394,Wayne,Hanksville,President,,Monica Moorehead,,0.0,Monica Moorehead/President
53395,Wayne,Hanksville,President,,Evan McMullin,,20.0,Evan McMullin/President


84594 0


Unnamed: 0,county,precinct,office,district,party,candidate,votes,candidate_office
0,San Pete,FAIRVIEW MUNICIPAL,President,,,Rocky De La Fuente,0.0,Rocky De La Fuente/President
1,San Pete,FAIRVIEW MUNICIPAL,President,,,Jill Stein,2.0,Jill Stein/President
2,San Pete,FAIRVIEW MUNICIPAL,President,,,Alyson Kennedy,0.0,Alyson Kennedy/President
3,San Pete,FAIRVIEW MUNICIPAL,President,,Libertarian,Gary Johnson,12.0,Gary Johnson/President
4,San Pete,FAIRVIEW MUNICIPAL,President,,Independent American,Rocky Giordani,0.0,Rocky Giordani/President
...,...,...,...,...,...,...,...,...
53387,Wayne,Hanksville,President,,Jill Stein,,0.0,Jill Stein/President
53388,Wayne,Hanksville,President,,Alyson Kennedy,,0.0,Alyson Kennedy/President
53394,Wayne,Hanksville,President,,Monica Moorehead,,0.0,Monica Moorehead/President
53395,Wayne,Hanksville,President,,Evan McMullin,,20.0,Evan McMullin/President


In [11]:
cand_dict = {
    
    'Donald Trump/President': 'G16PRERTRU', 
    'Republican/President': 'G16PRERTRU', 

    'Hillary Clinton/President': 'G16PREDCLI', 
    'Democratic/President': 'G16PREDCLI', 

    'Gary Johnson/President': 'G16PRELJOH',
    'Libertarian/President': 'G16PRELJOH', 
    
    'Jill Stein/President': 'G16PREISTE', 
    
    'Evan McMullin/President': 'G16PREIMCM', 


    'Rocky De La Fuente/President': 'G16PREOOTH', 
    'Alyson Kennedy/President': 'G16PREOOTH', 
    'Rocky Giordani/President': 'G16PREOOTH', 
    'Darrell Castle/President': 'G16PREOOTH', 
    'Monica Moorehead/President': 'G16PREOOTH', 
    'Write In/President': 'G16PREOOTH', 
    'Fuente/President': 'G16PREOOTH', 
    'Giordani/President': 'G16PREOOTH', 
    'Stephen Park/President': 'G16PREOOTH', 
    'Mike Smith/President': 'G16PREOOTH', 
    'Laurence Kotlikoff/President': 'G16PREOOTH', 
    'Hoefling/President': 'G16PREOOTH', 
    'Limbaugh/President': 'G16PREOOTH', 
    'Dustin Baird/President': 'G16PREOOTH',
    'Andrew Basiago/President': 'G16PREOOTH', 
    'Emidio Soltyisk/President': 'G16PREOOTH', 
    'Tony Valdivia/President': 'G16PREOOTH', 
    'Fox/President': 'G16PREOOTH',
    'Sheila Samm Tittle/President': 'G16PREOOTH', 
    'Buchnan/President': 'G16PREOOTH', 
    'Marshall Schoenke/President': 'G16PREOOTH', 
    'Janet Reid/President': 'G16PREOOTH', 
    'Burton/President': 'G16PREOOTH', 
    'Write-In/President': 'G16PREOOTH',
    'Giordani,Rocky/President': 'G16PREOOTH',
    'Roque De La Fuente/President': 'G16PREOOTH', 
    'Darrell L. Castle/President': 'G16PREOOTH', 
    'Tony Valdiva/President': 'G16PREOOTH',
    'Write-ins/President': 'G16PREOOTH',
    "'Rocky' De La Fuente/President": 'G16PREOOTH', 
    'Giordani/Anderson/President': 'G16PREOOTH', 
    'Independent American/President': 'G16PREOOTH', 
    'Constitution/President': 'G16PREOOTH', 
    'Stephen Paul Parks/President': 'G16PREOOTH', 
    'Tom Hoefling/President': 'G16PREOOTH', 
    'Emidio Soltysik/President': 'G16PREOOTH', 
    'Jamin Burton/President': 'G16PREOOTH', 
    'Robert Buchanan/President': 'G16PREOOTH', 
    'Cherunda Fox/President': 'G16PREOOTH', 
    'David Limbaugh/President': 'G16PREOOTH', 
    
    
    
    'Mike Lee/U.S. Senate': 'G16USSRLEE',
    'Republican/U.S. Senate': 'G16USSRLEE', 
    
    'Misty Snow/U.S. Senate': 'G16USSDSNO', 
    'Misty K. Snow/U.S. Senate': 'G16USSDSNO',
    'Democratic/U.S. Senate': 'G16USSDSNO', 

    'Stoney Fonua/U.S. Senate': 'G16USSAFON', 
    'Independent American/U.S. Senate': 'G16USSAFON', 

    'Bill Barron/U.S. Senate': 'G16USSIBAR', 

    
    
    'Gary Herbert/Governor': 'G16GOVRHER', 
    'Gary R. Herbert/Governor': 'G16GOVRHER', 
    'Gray Herbert/Governor': 'G16GOVRHER', 
    'Republican/Governor': 'G16GOVRHER', 
    'Herbert/Governor': 'G16GOVRHER', 

    'Mike Weinholtz/Governor': 'G16GOVDWEI', 
    'Democratic/Governor': 'G16GOVDWEI', 

    'Brian Kamerath/Governor': 'G16GOVLKAM', 
    'Brian E. Kamerath/Governor': 'G16GOVLKAM',
    'Libertarian/Governor': 'G16GOVLKAM', 

    'Dell Schanze/Governor': 'G16GOVASCH', 
    'Superdell Schanze/Governor': 'G16GOVASCH', 
    'Independent American/Governor': 'G16GOVASCH', 
    
    'L.S. Brown/Governor': None, 
    'Write In/Governor': None, 
    'Write-In/Governor': None, 
    'Write-ins/Governor': None, 

    
    
    'Sean Reyes/Attorney General': 'G16ATGRREY', 
    'Sean D. Reyes/Attorney General': 'G16ATGRREY',
    'Republican/Attorney General': 'G16ATGRREY', 

    'Jon V. Harper/Attorney General': 'G16ATGDHAR', 
    'Jon Harper/Attorney General': 'G16ATGDHAR', 
    'Democratic/Attorney General': 'G16ATGDHAR', 

    'W. Andrew McCullough/Attorney General': 'G16ATGLMCC', 
    'Libertarian/Attorney General': 'G16ATGLMCC', 
    
    'Michael Isbell/Attorney General': 'G16ATGAISB', 
    'Michael W. Isbell/Attorney General': 'G16ATGAISB', 
    'Independent American/Attorney General': 'G16ATGAISB', 

    
    
    'John Dougall/State Auditor': 'G16AUDRDOU', 
    'Republican/State Auditor': 'G16AUDRDOU', 
    
    'Mike Mitchell/State Auditor': 'G16AUDDMIT', 
    'Democratic/State Auditor': 'G16AUDDMIT', 

    'Jared Green/State Auditor': 'G16AUDAGRE', 
    'Independent American/State Auditor': 'G16AUDAGRE', 
    
    'Green/State Auditor': None, 

    
    
    'David Damschen/State Treasurer': 'G16TRERDAM', 
    'Davad Damschen/State Treasurer': 'G16TRERDAM',
    'Republican/State Treasurer': 'G16TRERDAM',
    
    'Neil A. Hansen/State Treasurer': 'G16TREDHAN',
    'Neil Hansen/State Treasurer': 'G16TREDHAN', 
    'Hansen/State Treasurer': 'G16TREDHAN',
    'Democratic/State Treasurer': 'G16TREDHAN', 

    'Richard Proctor/State Treasurer': 'G16TRECPRO', 
    'Constitution/State Treasurer': 'G16TRECPRO'
}

other_dict = {
    
    'Collin Simonsen/U.S. House': None, 
    'Doug Owens/U.S. House': None, 
    'Mia Love/U.S. House': None, 
    'Chris Stewart/U.S. House': None, 
    'Charlene Albarran/U.S. House': None, 
    'Paul McCollaum/U.S. House': None,
    'Rob Bishop/U.S. House': None, 
    'Craig Bowden/U.S. House': None, 
    'Chadwick Fairbanks/U.S. House': None, 
    'Peter Clemens/U.S. House': None,
    'Stephen Tryon/U.S. House': None, 
    'Jason Chaffetz/U.S. House': None,
    'Chadwick H. Fairbanks/U.S. House': None, 
    'Peter C. Clemens/U.S. House': None, 
    'Republican/U.S. House': None, 
    'Democratic/U.S. House': None, 
    'Constitution/U.S. House': None, 

    
    'Ralph Okerlund/State Senate': None,
    'Buxton/State Senate': None,
    'Alan Yorgason/State Senate': None,
    'Hartwick/State Senate': None, 
    'Todd Weiler/State Senate': None,
    'Deidre Henderson/State Senate': None, 
    'Andrew Apsley/State Senate': None, 
    'Jake Anderegg/State Senate': None,
    'Write-In/State Senate': None, 
    'Dan Hemmert/State Senate': None, 
    'Joe Buchman/State Senate': None, 
    'Curt Crosby/State Senate': None,
    'Curt Bramble/State Senate': None, 
    'Jason Christensen/State Senate': None, 
    'David Hinkins/State Senate': None,
    'Heidi Redd/State Senate': None,
    'Deana Froerer/State Senate': None, 
    'Allen Christensen/State Senate': None, 
    'Gregg Buxton/State Senate': None,
    'Lyle W. Hillyard/State Senate': None,
    'Dorothy Engelman/State Senate': None, 
    'Lyle Hillyard/State Senate': None, 
    'Republican/State Senate': None, 
    'Don Ipson/State Senate': None, 
    'Luz Escamilla/State Senate': None, 
    'Fred Johnson/State Senate': None, 
    'Wayne Harper/State Senate': None, 
    'Jim Dexter/State Senate': None, 
    'Celina Milner/State Senate': None, 
    'Brian Shiozawa/State Senate': None, 
    'Ash Anderson/State Senate': None, 
    'Dan Paget/State Senate': None, 
    'Lincoln Fillmore/State Senate': None, 
    'David (Dave) Lifferth/State Senate': None, 
    'Steve Hartwick/State Senate': None, 
 
    
    'Derrin Owens/State House': None,  
    'Carl Albrecht/State House': None, 
    'William Groff/State House': None,
    'Kelly Miles/State House': None, 
    'Amy Morgan/State House': None, 
    'Mike Schultz/State House': None, 
    'Bob Buckles/State House': None, 
    'Paul Ray/State House': None, 
    'Karianne Lisonbee/State House': None, 
    'Rich Miller/State House': None, 
    'Brad Wilson/State House': None, 
    'Steve Handy/State House': None, 
    'Tiffany Kopp/State House': None, 
    'Brent Zimmerman/State House': None, 
    'Christine Stenquist/State House': None, 
    'Stewart Barlow/State House': None, 
    'Timothy Hawkes/State House': None, 
    'Kurt Weiland/State House': None, 
    'Raymond Ward/State House': None, 
    'Becky Edwards/State House': None, 
    'Jon Marsh/State House': None, 
    'Kirk Pearson/State House': None, 
    'Merrill Nelson/State House': None, 
    'Cindee Beard/State House': None,  
    'Mike Noel/State House': None, 
    'Ty Markham/State House': None, 
    'Jefferson Moss/State House': None, 
    'Aaron Davis/State House': None, 
    'Donna Gibbons/State House': None, 
    'Cory Maloy/State House': None, 
    'Mike Kennedy/State House': None, 
    'Keven Stratton/State House': None, 
    'Kay Christofferson/State House': None, 
    'Brian Greene/State House': None, 
    'Val Peterson/State House': None, 
    'Rachel Nelson/State House': None, 
    'Bradley Daw/State House': None, 
    'B. Swallow-Fenton/State House': None, 
    'Tommy Williams/State House': None, 
    'Keith Grover/State House': None, 
    'Dean Sanpei/State House': None, 
    'Nathan Smith Jones/State House': None,
    'Norm Thuston/State House': None, 
    'Francis Gibson/State House': None, 
    'Mike McKell/State House': None, 
    'Marc Roberts/State House': None, 
    'Brad King/State House': None, 
    'Christine Watkins/State House': None,
    'Floyd Handley/State House': None, 
    'Justin Fawson/State House': None, 
    'Matthew Frandsen/State House': None, 
    'Gage Froerer/State House': None,
    'Jeremy Peterson/State House': None, 
    'Kathie Darby/State House': None, 
    'Dixon Pitcher/State House': None, 
    'Jesus Garcia/State House': None, 
    'Derryck Gordon/State House': None, 
    'Lee Perry/State House': None, 
    'Angela Urrea/State House': None,
    'Sherry Phipps/State House': None,
    'Scott Sandall/State House': None,
    'Val K. Potter/State House': None,
    'Edward Redd/State House': None, 
    'David L. Clark/State House': None, 
    'R. Curt Webb/State House': None, 
    'Chuck Goode/State House': None, 
    'Brad Last/State House': None, 
    'John Westwood/State House': None, 
    'Cole Capener/State House': None, 
    'Logan Wilde/State House': None,   
    'Wayne Stevens/State House': None, 
    'Scott Chew/State House': None, 
    'Brian King/State House': None, 
    'Rudi Kohler/State House': None, 
    'Tim Quinn/State House': None, 
    'Jon Stanard/State House': None, 
    'V Lowry Snow/State House': None, 
    'Walt Brooks/State House': None, 
    'Republican/State House': None, 
    'Democratic/State House': None, 
    'Rick Pollock/State House': None, 
    'Douglas Sagers/State House': None, 
    'Susan Duckworth/State House': None, 
    'Jamie White/State House': None, 
    'Scott Hawkins/State House': None, 
    'Sandra Hollins/State House': None, 
    'Rebecca Chavez Houck/State House': None, 
    'Joel Briscoe/State House': None, 
    'Angela Romero/State House': None, 
    'Frank Bedolla/State House': None, 
    'Mike Winder/State House': None, 
    'Sophia Dicaro/State House': None, 
    'Elizabeth Weight/State House': None, 
    'Suzanne Harrison/State House': None, 
    'Lavar Christensen/State House': None, 
    'Craig Hall/State House': None, 
    'Peter Tomala/State House': None, 
    'Karen Kwan/State House': None, 
    'Macade Jensen/State House': None, 
    'Patricia Phaklides/State House': None, 
    'Chelsea Travis/State House': None, 
    'Mark Wheatley/State House': None, 
    'Patrice Arent/State House': None, 
    'Kris Kimball/State House': None, 
    'Carol Spackman/State House': None, 
    'Charles Henderson/State House': None, 
    'Eric Hutchings/State House': None, 
    'Jim Dunnigan/State House': None, 
    'Paul Schulte/State House': None, 
    'Lynn Hemingway/State House': None, 
    'Joseph Breault/State House': None, 
    'Chad Harrington/State House': None, 
    'Dan McCay/State House': None, 
    'Kim Coleman/State House': None, 
    'Edgar Harwood/State House': None, 
    'Adam Gardiner/State House': None, 
    'Christine Passey/State House': None, 
    'Bruce Cutler/State House': None, 
    'Steve Eliason/State House': None, 
    'Nikki Cunard/State House': None, 
    'Marie Poulson/State House': None, 
    'Lee Anne Walker/State House': None, 
    'Ken Ivory/State House': None, 
    'John Rendell/State House': None, 
    'Robert Spendlove/State House': None, 
    'Zach Robinson/State House': None, 
    'Susan Pulsipher/State House': None, 
    'Patty Rich/State House': None, 
    'Greg Hughes/State House': None, 
    'Kyle Waters/State House': None, 
    'Gordon Jones/State House': None, 
    'Write-In/State House': None, 
    'Garr Smith/State House': None, 
    'John Knotwell/State House': None
}

In [12]:
display(cand_office_df)

Unnamed: 0,county,precinct,office,district,party,candidate,votes,candidate_office
0,San Pete,FAIRVIEW MUNICIPAL,President,,,Rocky De La Fuente,0.0,Rocky De La Fuente/President
1,San Pete,FAIRVIEW MUNICIPAL,President,,,Jill Stein,2.0,Jill Stein/President
2,San Pete,FAIRVIEW MUNICIPAL,President,,,Alyson Kennedy,0.0,Alyson Kennedy/President
3,San Pete,FAIRVIEW MUNICIPAL,President,,Libertarian,Gary Johnson,12.0,Gary Johnson/President
4,San Pete,FAIRVIEW MUNICIPAL,President,,Independent American,Rocky Giordani,0.0,Rocky Giordani/President
...,...,...,...,...,...,...,...,...
53387,Wayne,Hanksville,President,,Jill Stein,,0.0,Jill Stein/President
53388,Wayne,Hanksville,President,,Alyson Kennedy,,0.0,Alyson Kennedy/President
53394,Wayne,Hanksville,President,,Monica Moorehead,,0.0,Monica Moorehead/President
53395,Wayne,Hanksville,President,,Evan McMullin,,20.0,Evan McMullin/President


In [14]:
cand_office_df['CandidateID'] = cand_office_df['candidate_office'].map(cand_dict)

In [15]:
display(cand_office_df)

Unnamed: 0,county,precinct,office,district,party,candidate,votes,candidate_office,CandidateID
0,San Pete,FAIRVIEW MUNICIPAL,President,,,Rocky De La Fuente,0.0,Rocky De La Fuente/President,G16PREOOTH
1,San Pete,FAIRVIEW MUNICIPAL,President,,,Jill Stein,2.0,Jill Stein/President,G16PREISTE
2,San Pete,FAIRVIEW MUNICIPAL,President,,,Alyson Kennedy,0.0,Alyson Kennedy/President,G16PREOOTH
3,San Pete,FAIRVIEW MUNICIPAL,President,,Libertarian,Gary Johnson,12.0,Gary Johnson/President,G16PRELJOH
4,San Pete,FAIRVIEW MUNICIPAL,President,,Independent American,Rocky Giordani,0.0,Rocky Giordani/President,G16PREOOTH
...,...,...,...,...,...,...,...,...,...
53387,Wayne,Hanksville,President,,Jill Stein,,0.0,Jill Stein/President,G16PREISTE
53388,Wayne,Hanksville,President,,Alyson Kennedy,,0.0,Alyson Kennedy/President,G16PREOOTH
53394,Wayne,Hanksville,President,,Monica Moorehead,,0.0,Monica Moorehead/President,G16PREOOTH
53395,Wayne,Hanksville,President,,Evan McMullin,,20.0,Evan McMullin/President,G16PREIMCM


In [16]:
race_df = cand_office_df[~cand_office_df['CandidateID'].isnull()]

In [17]:
display(race_df)

Unnamed: 0,county,precinct,office,district,party,candidate,votes,candidate_office,CandidateID
0,San Pete,FAIRVIEW MUNICIPAL,President,,,Rocky De La Fuente,0.0,Rocky De La Fuente/President,G16PREOOTH
1,San Pete,FAIRVIEW MUNICIPAL,President,,,Jill Stein,2.0,Jill Stein/President,G16PREISTE
2,San Pete,FAIRVIEW MUNICIPAL,President,,,Alyson Kennedy,0.0,Alyson Kennedy/President,G16PREOOTH
3,San Pete,FAIRVIEW MUNICIPAL,President,,Libertarian,Gary Johnson,12.0,Gary Johnson/President,G16PRELJOH
4,San Pete,FAIRVIEW MUNICIPAL,President,,Independent American,Rocky Giordani,0.0,Rocky Giordani/President,G16PREOOTH
...,...,...,...,...,...,...,...,...,...
53387,Wayne,Hanksville,President,,Jill Stein,,0.0,Jill Stein/President,G16PREISTE
53388,Wayne,Hanksville,President,,Alyson Kennedy,,0.0,Alyson Kennedy/President,G16PREOOTH
53394,Wayne,Hanksville,President,,Monica Moorehead,,0.0,Monica Moorehead/President,G16PREOOTH
53395,Wayne,Hanksville,President,,Evan McMullin,,20.0,Evan McMullin/President,G16PREIMCM


In [22]:
dropped_votes = [
    'L.S. Brown/Governor',
    'Write In/Governor',
    'Write-In/Governor',
    'Write-ins/Governor',
    'Green/State Auditor'
]
dropped_df = cand_office_df[cand_office_df['candidate_office'].isin(dropped_votes)]
print('Length of total dataframe: ', len(cand_office_df))
print('Length of cleaned df plus candidates not recorded by VEST: ', (len(race_df) + len(dropped_df)))

Length of total dataframe:  84594
Length of cleaned df plus candidates not recorded by VEST:  84594


In [43]:
#county cast
county_to_num = {
    'Beaver': 1, 
    'Box Elder': 2, 
    'Cache': 3, 
    'Carbon': 4, 
    'Daggett': 5, 
    'Davis': 6, 
    'Duchesne': 7,
    'Emery': 8, 
    'Garfield': 9, 
    'Grand': 10, 
    'Iron': 11, 
    'Juab': 12, 
    'Kane': 13, 
    'Millard': 14, 
    'Morgan': 15, 
    'Piute': 16, 
    'Rich': 17, 
    'Salt Lake': 18,
    'San Juan': 19, 
    'Sanpete': 20, 
    'San Pete': 20, 
    'Sevier': 21, 
    'Summit': 22, 
    'Tooele': 23,
    'Uintah': 24, 
    'Utah': 25,
    'Wasatch': 26,
    'Washington': 27,
    'Wayne': 28,
    'Weber': 29
}

Unnamed: 0,county,precinct,office,district,party,candidate,votes,candidate_office,CandidateID,CountyID,mergeid


In [47]:
cand_office_df['CountyID'] = cand_office_df['county'].map(county_to_num).astype(int)

In [48]:
cand_office_df['mergeid'] = cand_office_df['CountyID'].astype(str)+'-'+cand_office_df['precinct']

In [49]:
pivoted_df = pd.pivot_table(cand_office_df,values=["votes"],index=["mergeid"],columns=["CandidateID"],aggfunc=sum)
pivoted_df=pivoted_df.fillna(0)
pivoted_df.reset_index(drop=False,inplace=True)
pivoted_df.columns = pivoted_df.columns.droplevel(0)

pivoted_df.columns = [
    'joinid', 'G16ATGAISB', 'G16ATGDHAR', 'G16ATGLMCC',
    'G16ATGRREY', 'G16AUDAGRE', 'G16AUDDMIT', 'G16AUDRDOU',
    'G16GOVASCH', 'G16GOVDWEI', 'G16GOVLKAM', 'G16GOVRHER',
    'G16PREDCLI', 'G16PREIMCM', 'G16PREISTE', 'G16PRELJOH',
    'G16PREOOTH', 'G16PRERTRU', 'G16TRECPRO', 'G16TREDHAN',
    'G16TRERDAM', 'G16USSAFON', 'G16USSDSNO', 'G16USSIBAR', 
    'G16USSRLEE']

display(pivoted_df)



Unnamed: 0,joinid,G16ATGAISB,G16ATGDHAR,G16ATGLMCC,G16ATGRREY,G16AUDAGRE,G16AUDDMIT,G16AUDRDOU,G16GOVASCH,G16GOVDWEI,G16GOVLKAM,G16GOVRHER,G16PREDCLI,G16PREIMCM,G16PREISTE,G16PRELJOH,G16PREOOTH,G16PRERTRU,G16TRECPRO,G16TREDHAN,G16TRERDAM,G16USSAFON,G16USSDSNO,G16USSIBAR,G16USSRLEE
0,1-Beaver 1,17.0,48.0,18.0,507.0,27.0,69.0,481.0,13.0,53.0,9.0,525.0,46.0,83.0,1.0,6.0,6.0,469.0,36.0,80.0,454.0,12.0,48.0,3.0,530.0
1,1-Beaver 2,14.0,48.0,25.0,363.0,21.0,72.0,345.0,4.0,51.0,7.0,395.0,38.0,74.0,4.0,4.0,8.0,334.0,19.0,90.0,329.0,8.0,48.0,4.0,390.0
2,1-Beaver 3,11.0,31.0,6.0,289.0,14.0,44.0,269.0,10.0,22.0,6.0,302.0,22.0,49.0,0.0,3.0,4.0,268.0,18.0,52.0,254.0,8.0,30.0,2.0,300.0
3,1-Beaver 4,3.0,16.0,9.0,100.0,12.0,23.0,92.0,0.0,15.0,4.0,114.0,10.0,23.0,0.0,3.0,0.0,101.0,14.0,19.0,94.0,3.0,7.0,4.0,116.0
4,1-Greenville,0.0,12.0,0.0,74.0,0.0,16.0,67.0,1.0,14.0,2.0,72.0,10.0,4.0,2.0,0.0,0.0,76.0,5.0,20.0,57.0,0.0,13.0,0.0,77.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2220,9-Henrieville,1.0,11.0,10.0,94.0,9.0,20.0,87.0,0.0,16.0,5.0,96.0,15.0,27.0,0.0,2.0,0.0,75.0,15.0,21.0,81.0,4.0,13.0,0.0,101.0
2221,9-Pang1,14.0,45.0,18.0,430.0,28.0,62.0,407.0,7.0,41.0,7.0,468.0,50.0,67.0,1.0,11.0,5.0,391.0,22.0,68.0,409.0,4.0,49.0,2.0,464.0
2222,9-Pang2,4.0,33.0,9.0,343.0,10.0,48.0,323.0,7.0,37.0,6.0,348.0,35.0,53.0,0.0,7.0,4.0,302.0,23.0,41.0,320.0,5.0,31.0,1.0,357.0
2223,9-Ticaboo,0.0,4.0,7.0,12.0,4.0,5.0,14.0,2.0,3.0,2.0,17.0,3.0,5.0,0.0,0.0,2.0,14.0,6.0,4.0,13.0,0.0,7.0,2.0,15.0


In [50]:
ut_vest['joinid'] = ut_vest['CountyID'].astype(str)+'-'+ut_vest['precinct']

In [51]:
display(ut_vest)

Unnamed: 0,CountyID,PrecinctID,county,precinct,G16PRERTRU,G16PREDCLI,G16PRELJOH,G16PREISTE,G16PREIMCM,G16PREOOTH,G16USSRLEE,G16USSDSNO,G16USSAFON,G16USSIBAR,G16GOVRHER,G16GOVDWEI,G16GOVLKAM,G16GOVASCH,G16ATGRREY,G16ATGDHAR,G16ATGLMCC,G16ATGAISB,G16AUDRDOU,G16AUDDMIT,G16AUDAGRE,G16TRERDAM,G16TREDHAN,G16TRECPRO,geometry,joinid
0,1,BV01,Beaver,Beaver 1,469,46,6,1,83,3,530,48,12,3,525,53,9,13,507,48,18,17,481,69,27,454,80,36,"POLYGON ((358362.560 4238423.840, 358372.350 4...",1-Beaver 1
1,1,BV02,Beaver,Beaver 2,334,38,4,4,74,4,390,48,8,4,395,51,7,4,363,48,25,14,345,72,21,329,90,19,"POLYGON ((355866.750 4240506.080, 356860.230 4...",1-Beaver 2
2,1,BV03,Beaver,Beaver 3,268,22,3,0,49,2,300,30,8,2,302,22,6,10,289,31,6,11,269,44,14,254,52,18,"POLYGON ((358902.400 4261055.000, 358909.700 4...",1-Beaver 3
3,1,BV04,Beaver,Beaver 4,101,10,3,0,23,0,116,7,3,4,114,15,4,0,100,16,9,3,92,23,12,94,19,14,"POLYGON ((367866.500 4262536.100, 367842.500 4...",1-Beaver 4
4,1,GR01,Beaver,Greenville,76,10,0,2,4,0,77,13,0,0,72,14,2,1,74,12,0,0,67,16,0,57,20,5,"POLYGON ((348977.460 4223848.220, 348973.090 4...",1-Greenville
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2291,7,RO81,Duchesne,RO81,216,9,2,1,18,0,223,18,3,0,207,18,16,5,215,10,15,3,216,16,8,198,14,28,"POLYGON ((580836.220 4464060.720, 580837.220 4...",7-RO81
2292,7,RO82,Duchesne,RO82,75,15,2,0,13,1,87,13,3,1,80,18,4,3,80,16,6,0,72,24,4,70,21,10,"POLYGON ((586967.910 4461063.280, 586317.330 4...",7-RO82
2293,7,RO91,Duchesne,RO91,0,0,0,0,2,1,3,0,0,0,3,0,0,0,3,0,0,0,3,0,0,2,0,1,"POLYGON ((583252.560 4462118.640, 583274.150 4...",7-RO91
2294,7,TA11,Duchesne,TA11,197,12,3,0,17,0,203,13,3,7,195,25,2,6,193,17,4,6,175,30,11,173,26,17,"POLYGON ((533169.000 4511919.160, 533169.000 4...",7-TA11


In [53]:
join_attempt_one = pd.merge(ut_vest,pivoted_df,how="outer",on="joinid",indicator=True)
print(join_attempt_one["_merge"].value_counts())

right_only = join_attempt_one[join_attempt_one["_merge"]=="right_only"]
left_only = join_attempt_one[join_attempt_one["_merge"]=="left_only"]
merged = join_attempt_one[join_attempt_one["_merge"]=="both"]

left_only.to_csv("./only_vest.csv")
right_only.to_csv("./only_source.csv")
merged.to_csv("./merged_votes.csv")

both          2219
left_only       77
right_only       6
Name: _merge, dtype: int64
