In [2]:
import maup # mggg's library for proration, see documentation here: https://github.com/mggg/maup
import pandas as pd # standard python data library
import geopandas as gp # the geo-version of pandas
import numpy as np 
import os
import fiona
from statistics import mean, median
from pandas import read_csv
gp.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw' #To load KML files

# VEST Georgia Validation

## Examining and Loading Files

### VEST

#### Copy in VEST GA 16 file

In [3]:
vest_ga = gp.read_file("./raw-from-source/VEST/ga_2016/ga_2016.shp")

In [4]:
print(vest_ga.head())

  DISTRICT CTYSOSID PRECINCT_I PRECINCT_N CTYNAME CTYNUMBER CTYNUMBER2 FIPS2  \
0  121JC15  121JC15       JC15       JC15  FULTON        60        060   121   
1  12108N1  12108N1       08N1       08N1  FULTON        60        060   121   
2  12108N2  12108N2       08N2       08N2  FULTON        60        060   121   
3  121JC14  121JC14       JC14       JC14  FULTON        60        060   121   
4  121JC05  121JC05       JC05       JC05  FULTON        60        060   121   

   G16PRERTRU  G16PREDCLI  G16PRELJOH  G16USSRISA  G16USSDBAR  G16USSLBUC  \
0         202         294          29         206         257          38   
1         313         232          32         445         133           6   
2         402         500          22         517         353          27   
3        1000         708          60        1199         497          39   
4         651         736          81         767         561          75   

   G16PSCRECH  G16PSCLHOS                               

#### VEST Data Sources (from documentation):

Election results from the Georgia Secretary of State Elections Division  (https://sos.ga.gov/index.php/Elections/current_and_past_elections_results)  
Precinct shapefile primarily from the Georgia General Assembly Reapportionment Office (http://www.legis.ga.gov/Joint/reapportionment/en-US/default.aspx)  

#### VEST Processing (from documentation):

Fulton County uses shapefiles and maps sourced from the county GIS program. Precincts CH01/CH04B, CP07A/CP07D, SS29A/SS29B, UC031/UC035 were merged to match how voters were registered in the 2016 voter file.

The following precincts were split by congressional district to match the 2016 election results: Avondale High, Glennwood, Wadsworth in Dekalb County; Cates D in Gwinnett County.

Cloudland and Teloga precincts in Chattooga County were split along the ridgeline that marks the boundary between them with the USGS Topographic Contour shapefile.

Three of the four VTDs in Chattahoochee County are comprised of Fort Benning. However, the county only has one polling location for all voters, including residents of Fort Benning that vote within the county. The four Chattahoochee County VTDs have therefore been merged in the shapefile.

#### VEST Field Names (from documentation):

G16PRERTRU - Donald J. Trump (Republican Party)  
G16PREDCLI - Hillary Clinton (Democratic Party)  
G16PRELJOH - Gary Johnson (Libertarian Party)  
  
G16USSRISA - Johnny Isakson (Republican Party)  
G16USSDBAR - Jim Barksdale (Democratic Party)  
G16USSLBUC - Allen Buckley (Libertarian Party)  
  
G16PSCRECH - Tim Echols (Republican Party)  
G16PSCLHOS - Eric Hoskins (Libertarian Party)  

### Election and Precinct Files

In [39]:
#Note: Some difficulties in using the link provided by VEST (need to do work on each county)
#Try this:
#medsl_election = pd.read_csv("./raw-from-source/MEDSL/2016-precinct-president.csv")
medsl_election = pd.read_csv("./raw-from-source/MEDSL/2016-precinct-president.tab",sep="\t")
medsl_election= medsl_election[medsl_election.state=="Georgia"]

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [40]:
print(medsl_election.columns)

Index(['year', 'stage', 'special', 'state', 'state_postal', 'state_fips',
       'state_icpsr', 'county_name', 'county_fips', 'county_ansi',
       'county_lat', 'county_long', 'jurisdiction', 'precinct', 'candidate',
       'candidate_normalized', 'office', 'district', 'writein', 'party',
       'mode', 'votes', 'candidate_opensecrets', 'candidate_wikidata',
       'candidate_party', 'candidate_last', 'candidate_first',
       'candidate_middle', 'candidate_full', 'candidate_suffix',
       'candidate_nickname', 'candidate_fec', 'candidate_fec_name',
       'candidate_google', 'candidate_govtrack', 'candidate_icpsr',
       'candidate_maplight'],
      dtype='object')


In [43]:

medsl_election= medsl_election[["county_name","county_fips",'jurisdiction', 'precinct', 'candidate',
       'candidate_normalized', 'office', 'district', 'writein', 'party',
       'mode', 'votes','candidate_fec', 'candidate_fec_name']]
print(medsl_election.head())


           county_name  county_fips jurisdiction precinct        candidate  \
573155  Appling County      13001.0      Appling        2  Hillary Clinton   
573156  Appling County      13001.0      Appling        2  Hillary Clinton   
573157  Appling County      13001.0      Appling        2  Hillary Clinton   
573158  Appling County      13001.0      Appling        2  Hillary Clinton   
573159  Appling County      13001.0      Appling        2     Gary Johnson   

       candidate_normalized        office   district  writein        party  \
573155              clinton  US President  statewide    False   democratic   
573156              clinton  US President  statewide    False   democratic   
573157              clinton  US President  statewide    False   democratic   
573158              clinton  US President  statewide    False   democratic   
573159              johnson  US President  statewide    False  libertarian   

                     mode  votes candidate_fec  \
573155      

In [30]:
medsl_election['county_fips'] = medsl_election['county_fips'].astype(int)
medsl_election['county_fips'] = medsl_election['county_fips'].astype(str)

In [31]:
medsl_election['prec_county'] = medsl_election['precinct'] + medsl_election['county_fips']

In [32]:
print(medsl_election['prec_county'])

573155          213001
573156          213001
573157          213001
573158          213001
573159          213001
              ...     
605996    Warwick13321
605997    Warwick13321
605998    Warwick13321
605999    Warwick13321
606000    Warwick13321
Name: prec_county, Length: 32846, dtype: object


In [10]:
precinct_shapes_ga = gp.read_file("./raw-from-source/GA_shapefile/vtd2016-shape/VTD2016-Shape.shp")

In [11]:
print(precinct_shapes_ga)

           ID       AREA  DATA  POPULATION DISTRICT CTYSOSID PRECINCT_I  \
0     6094217   1.864348   690        1013   073131   073131        131   
1     6094266   1.636596   714        3858   073064   073064        064   
2     6094297   5.598643   711        1920   073061   073061        061   
3     6094341   0.872412   713        1770   073063   073063        063   
4     6094377   3.783726   691        3131   073132   073132        132   
...       ...        ...   ...         ...      ...      ...        ...   
2692  5876204  36.596876  1892        2983     1473     1473          3   
2693  5876331  40.082724  1087        2424   105197   105197        197   
2694  5956950  39.503473  2565        4211   281990   281990        990   
2695  5957097  94.810515  2564        2839   281833   281833        833   
2696  5957241  58.307267  2766        1735    13707    13707         07   

                PRECINCT_N CTYNUMBER COUNTY    CTYNAME  FIPS1 FIPS2  \
0     JOURNEY COMM. CHURCH  

In [6]:
#This seems to be the correct shapefile given the same number of rows
print(precinct_shapes_ga.shape)
print(vest_ga.shape)

NameError: name 'precinct_shapes_ga' is not defined

In [17]:
print(precinct_shapes_ga.geom_almost_equals(vest_ga,decimal=4).unique())


[False]
