In [None]:
import maup # mggg's library for proration, see documentation here: https://github.com/mggg/maup
import pandas as pd # standard python data library
import geopandas as gp # the geo-version of pandas
import numpy as np 
import os
import fiona
from statistics import mean, median
from pandas import read_csv
gp.io.file.fiona.drvsupport.supported_drivers['KML'] = 'rw' #To load KML files

# VEST Florida Validation

In [None]:
vest_fl_16 = gp.read_file("./raw-from-source/VEST/fl_2016/fl_2016.shp")
vest_fl_16.plot()
vest_fl_16.crs

## Documentation

### Races

G16PRERTru - Donald J. Trump (Republican Party)  
G16PREDCli - Hillary Clinton (Democratic Party)  
G16PRELJoh - Gary Johnson (Libertarian Party)  
G16PRECCas - Darrell L. Castle (Constitution Party)  
G16PREGSte - Jill Stein (Green Party)  
G16PREIDeL - Roque De La Fuente (Reform Party)  
G16PREOth - Write-in Votes  

G16USSRRub - Marco Rubio (Republican Party)  
G16USSDMur - Patrick Murphy (Democratic Party)  
G16USSLSta - Paul Stanton (Libertarian Party)  
G16USSOth - Independent and Write-in Votes  

### Election Source

Election results from Florida Division of Elections (http://dos.myflorida.com/elections/data-statistics/elections-data/precinct-level-election-results/)  

### Precinct Source

The starting point was the Orlando Sentinel's precinct map for the 2016 presidential primary (http://interactive.orlandosentinel.com/elections/2016/presidential-primary/results/dem.html).  

Hillsborough, Lake, Miami-Dade, Orange, Palm Beach, and Pinellas Counties were updated directly with shapefiles downloaded from county Supervisor of Elections websites.  

Brevard, Marion, and Putnam Counties were updated with KML files from the respective county Supervisor of Elections websites, from their election result map pages.  

Columbia, DeSoto, Leon, Polk, and Osceola Counties updates received from Supervisor of Elections websites through personal contact.  

Highlands, Santa Rosa, Sarasota, and Volusia Counties had some precinct mergers, which were determined based on visual inspection of PDF precinct maps from the county websites.  

Charlotte, Hendry, Holmes, Jackson, Jefferson, Levy, Madison, and Walton Counties are from VTDs released by the Census Bureau's Redistricting Data Program.  

Hernando 99, Collier 450, and Palm Beach 8001/8002 are UOCAVA precincts with no real geography and votes were not redistributed.  

Precinct names were adjusted to align with the formatting used in the voter registration file (e.g., some precincts have leading spaces).  

In [None]:
#This Data is from the FL department of state, and can only be downloaded county by county
all_files = os.listdir("./raw-from-source/Election_Results/precinctlevelelectionresults2016gen")

In [None]:
#Check that all files have the same number of columns
for i in all_files:
    ref = "./raw-from-source/Election_Results/precinctlevelelectionresults2016gen/"
    file_ref = ref+i
    print(i)
    file_prev = pd.read_csv(file_ref,sep="\t",engine='python',index_col=None, header=None, error_bad_lines=False)
    print(file_prev.shape)
    
#All the files have 19 columns, so they should be good to combine

In [None]:
#Create a dataframe with the txt files
li = []
for i in all_files:
    ref = "./raw-from-source/Election_Results/precinctlevelelectionresults2016gen/"
    file_ref = ref+i
    file_prev = pd.read_csv(file_ref,sep="\t",engine='python',index_col=None, header=None,error_bad_lines=False)
    li.append(file_prev)
frame = pd.concat(li, axis=0, ignore_index=True)
print(frame.shape)

In [None]:
frame.columns = frame.iloc[395275]
frame.drop([395275],inplace=True)
frame.reset_index(inplace=True,drop=True)
print(frame["CountyCode"].unique())
print(frame.shape)

In [None]:
#Filter down to the relevant races
frame["ContestName"]=frame["ContestName"].str.upper()
office_List = ['PRESIDENT OF THE UNITED STATES','UNITED STATES SENATOR','President of the United States','United States Senator']
bad_name_List = ['OverVotes','UnderVotes','Times Blank Voted','Times Over Voted']
filtered_frame=frame[(frame["ContestName"].isin(office_List)) & ~(frame["CanName"].isin(bad_name_List))]
cand_name_dict = {"Trump / Pence":"Donald J. Trump","Clinton / Kaine":"Hillary R. Clinton",
                 "Johnson / Weld":"Gary Johnson","Stein / Baraka":"Jill Stein",
                 "Write-in":"WriteIn","De La Fuente / Steinberg":"Roque De La Fuente",
                 "Castle / Bradley":"Darrell L. Castle",'WriteInVotes':'WriteIn','WriteinVotes':'WriteIn'}
 
filtered_frame["CanName"]=filtered_frame["CanName"].map(cand_name_dict).fillna(filtered_frame["CanName"])
print(filtered_frame["CountyCode"].unique())

In [None]:
#Make the precinct column at least 4 digits
filtered_frame["modified_pre"]=filtered_frame["Precinct"].astype(str).str.zfill(5) 


#Make a column with the 3 letter county code and the precincts
filtered_frame["Pct_std"]=filtered_frame["CountyCode"]+filtered_frame["modified_pre"]
print(filtered_frame.shape)

In [None]:
print(filtered_frame["Pct_std"].str[0:3].unique())

In [None]:
filtered_frame["CanName"]= filtered_frame["CanName"].str.upper()
filtered_frame["ContestName"] = filtered_frame["ContestName"].str.upper()
filtered_frame["pivot_col"]=filtered_frame["CanName"]+filtered_frame["ContestName"]

In [None]:
pivoted_2016 = pd.pivot_table(filtered_frame, values=["CanVotes"], index=["Pct_std"],columns=["pivot_col"],aggfunc=sum)
pivoted_2016.columns = pivoted_2016.columns.droplevel(0)
pivoted_2016.reset_index(drop=False,inplace=True)
pivoted_2016.shape
print(len(pivoted_2016.columns))
print(pivoted_2016.columns)

In [None]:
pivoted_2016['Pct_std'].str[0:3].unique()

In [None]:
pivoted_2016.columns=(['countypct',"sen_wi_1",
                       "sen_wi_2",
                       "G16PRECCas",
                       "G16PRERTru",
                       "G16PRELJon",
                        "G16PREDCli",
                       "G16PREGSte",
                       "G16USSRRub",
                       "G16USSDMur",
                       "G16USSLSta",
                       "G16PREIDeL",
                       "sen_wi_3",
                       "sen_wi_4",  
                       "G16PREoth",
                       "sen_wi_5"])
print(pivoted_2016.columns)
print(pivoted_2016.shape)
pivoted_2016["G16USSOth"]=pivoted_2016["sen_wi_1"]+pivoted_2016["sen_wi_2"]+pivoted_2016["sen_wi_3"]+pivoted_2016["sen_wi_4"]+pivoted_2016["sen_wi_5"]
pivoted_2016.drop(["sen_wi_1","sen_wi_2","sen_wi_3","sen_wi_4","sen_wi_5"], axis=1,inplace=True)

In [None]:
pivoted_2016 = pivoted_2016.fillna(0)
print(sum(pivoted_2016["G16PREDCli"]))
pivoted_2016["county"]=pivoted_2016["countypct"].str[0:3]

In [None]:
print(sum(vest_fl_16["G16PREDCli"]))

In [None]:
vest_fl_16.groupby(["county"]).sum()

In [None]:
pd.set_option('display.max_rows', 70)
display(pivoted_2016.groupby(["county"]).sum()["G16PREDCli"]-vest_fl_16.groupby(["county"]).sum()["G16PREDCli"])

In [None]:
print(vest_fl_16.head())
print(pivoted_2016.head())

In [None]:
print(pivoted_2016.columns)

In [None]:
vest_fl_16["countypct"] = vest_fl_16["county"]+vest_fl_16["pct"].str.zfill(5)
print(vest_fl_16.head())

In [None]:
source_precinct_election_changes_dict = {"WAS00014":"WAS00017","WAS00016":"WAS00019","HAR00017":"HAR00009","HAR00015":"HAR00007",
    "PUT00031":"PUT00035","HAR00018":"HAR00010"}

pivoted_2016["countypct"]=pivoted_2016["countypct"].map(source_precinct_election_changes_dict).fillna(pivoted_2016["countypct"])


In [None]:
#Deal with 0 votes precincts
vest_empty = vest_fl_16[(vest_fl_16['G16PRERTru']==0) & (vest_fl_16['G16PREDCli']==0) &
              (vest_fl_16['G16PRELJon']==0) & (vest_fl_16['G16PRECCas']==0) &
              (vest_fl_16['G16PREGSte']==0) & (vest_fl_16['G16PREIDeL']==0) &
              (vest_fl_16['G16PREoth']==0) & (vest_fl_16['G16USSRRub']==0) &
              (vest_fl_16['G16USSDMur']==0) & (vest_fl_16['G16USSLSta']==0) &
              (vest_fl_16['G16USSOth']==0)]

#Create the zero vote precincts in the MEDSL file

new_precincts = pd.DataFrame(columns=pivoted_2016.columns)
new_precincts["countypct"]=vest_empty["countypct"]
new_precincts["county"]=vest_empty["countypct"].str[0:3]

col_list = ['G16PRECCas', 'G16PRERTru', 'G16PRELJon', 'G16PREDCli',
       'G16PREGSte', 'G16USSRRub', 'G16USSDMur', 'G16USSLSta', 'G16PREIDeL',
       'G16PREoth', 'G16USSOth']
for i in col_list:
    new_precincts[i]=0

print(new_precincts.head())

pivoted_2016= pivoted_2016.append(new_precincts)
pivoted_2016.reset_index(drop=True,inplace=True)
print(pivoted_2016.head())

In [None]:
check_election = pd.merge(vest_fl_16,pivoted_2016,how="outer",on="countypct",indicator=True)

print(check_election["_merge"].value_counts())
check_election[check_election["_merge"]=="left_only"].to_csv("./vest_election.csv")
check_election[check_election["_merge"]=="right_only"].to_csv("./source_election.csv")

In [None]:
merge these precincts:
UNI0001A
UNI0001B
UNI0002A
UNI0002B
UNI0003A
UNI0003B
UNI0004A
UNI0004C
UNI0005A
UNI0005C

call them this:
UNI1A & 1B
UNI2A & 2B
UNI3A & 3B
UNI4A & 4C
UNI5A & 5C   


election_precinct_merger(["UNI0001A","UNI0001B"],"UNI1A & 1B")
election_precinct_merger(["UNI0002A","UNI0002B"],"UNI2A & 2B")
election_precinct_merger(["UNI0003A","UNI0003B"],"UNI3A & 3B")
election_precinct_merger(["UNI0004A","UNI0004C"],"UNI4A & 4C")
election_precinct_merger(["UNI0005A","UNI0005C"],"UNI5A & 5C")

In [None]:
print(check_election[check_election["_merge"]=="both"])

In [None]:
print(sum(vest_fl_16["G16USSOth"]))
print(sum(pivoted_2016["G16USSOth"]))

In [None]:
print(pivoted_2016.columns)
print(vest_fl_16.columns)

In [None]:
def validater_row (df, column_List):
    matching_rows = 0
    different_rows = 0
    diff_list=[]
    diff_values = []
    max_diff = 0
    
    for j in range(0,len(df.index)):
        same = True
        for i in column_List:
            left_Data = i + "_x"
            right_Data = i + "_y"
            diff = abs(df.iloc[j][left_Data]-df.iloc[j][right_Data])
            if(diff != 0):
                print(df.iloc[j]['countypct'])
                print(i)
                diff_values.append(abs(diff))
                same = False
                if(np.isnan(diff)):
                    print("NaN value at diff is: ", df.iloc[j]['countypct'])
                    print(df.iloc[j][left_Data])
                    print(df.iloc[j][right_Data])
                if (diff>max_diff):
                    max_diff = diff
                    print("New max diff is: ", str(max_diff))
                    print(df.iloc[j]['countypct'])
        if(same != True):
            different_rows +=1
            diff_list.append(df.iloc[j]['countypct'])
        else:
            matching_rows +=1
    print("There are ", len(df.index)," total rows")
    print(different_rows," of these rows have election result differences")
    print(matching_rows," of these rows are the same")
    print("")
    print("The max difference between any one shared column in a row is: ", max_diff)
    if(len(diff_values)!=0):
        print("The average difference is: ", str(sum(diff_values)/len(diff_values)))
    count_big_diff = len([i for i in diff_values if i > 10])
    print("There are ", str(count_big_diff), "precinct results with a difference greater than 5")
    diff_list.sort()
    print(diff_list)

col_list = ['G16PRECCas', 'G16PRERTru', 'G16PRELJon', 'G16PREDCli',
       'G16PREGSte', 'G16USSRRub', 'G16USSDMur', 'G16USSLSta', 'G16PREIDeL']

#Took out Senate other and president other

validater_row(check_election[check_election["_merge"]=="both"],col_list)

In [None]:
diff_list = ['HAR00005', 'HAR00008', 'HAR00011', 'HAR00012', 'PAL01173', 'PAL01189', 'PAL01247', 'PAL02081', 'PAL02083', 'PAL02097', 'PAL02116', 'PAL02126', 'PAL04129', 'PAL05003', 'PAL05007', 'PAL05018', 'PAL05055', 'PAL05112', 'PAL05113', 'PAL05115', 'PAL05117', 'PAL05119', 'PAL06207', 'PAL07149', 'PUT00014', 'PUT00015', 'PUT00016', 'PUT00018', 'PUT00019', 'PUT00020', 'PUT00021', 'PUT00022', 'PUT00023', 'PUT00024', 'PUT00025', 'PUT00026', 'PUT00027', 'PUT00028', 'PUT00030', 'PUT00032', 'PUT00033', 'PUT00036', 'PUT00037', 'SAR00429', 'SAR00501', 'WAL00430', 'WAL00440', 'WAL00540', 'WAS00015']

In [None]:
print(check_election[check_election["_merge"]=="left_only"]["countypct"])

In [None]:
pd.set_option('display.max_columns', 500)
display(check_election[check_election["countypct"]=='WAL00420'])

In [None]:
print(len(vest_fl_16["county"].unique()))

### Shapefiles

The starting point was the Orlando Sentinel's precinct map for the 2016 presidential primary (http://interactive.orlandosentinel.com/elections/2016/presidential-primary/results/dem.html).  

Hillsborough, Lake, Miami-Dade, Orange, Palm Beach, and Pinellas Counties were updated directly with shapefiles downloaded from county Supervisor of Elections websites.  

Brevard, Marion, and Putnam Counties were updated with KML files from the respective county Supervisor of Elections websites, from their election result map pages.  

Columbia, DeSoto, Leon, Polk, and Osceola Counties updates received from Supervisor of Elections websites through personal contact.  

Highlands, Santa Rosa, Sarasota, and Volusia Counties had some precinct mergers, which were determined based on visual inspection of PDF precinct maps from the county websites.  

Charlotte, Hendry, Holmes, Jackson, Jefferson, Levy, Madison, and Walton Counties are from VTDs released by the Census Bureau's Redistricting Data Program.  

Hernando 99, Collier 450, and Palm Beach 8001/8002 are UOCAVA precincts with no real geography and votes were not redistributed.  

Precinct names were adjusted to align with the formatting used in the voter registration file (e.g., some precincts have leading spaces).  

#The below had shapefiles, but they were not able to load:
DAD,HIG,LAK,LEE,PAL,PIN,SUM

#Multiple files for BAY, CLL, SUM, SUW, VOL

For some of these files, I either exported a XlS file to csv to make it easier to load or 

In [None]:
GAD_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/GAD/deleted_columns_PctBlock.csv",error_bad_lines=False)
#print(GAD_pct.head())

GIL_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/GIL/GIL20121106v6_PctBlock.txt",sep="\t", header=None)
GIL_pct.reset_index(inplace=True,drop=True)
GIL_pct = GIL_pct[[0,1,2]]
GIL_pct.columns = ['County','Precinct','Block']
#print(GIL_pct.head())

HAM_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/HAM/HAM20140707_PctBlocka8ac771f-a98a-4718-bb62-f6dceab1369c.txt",sep="\t", header=None)
HAM_pct.reset_index(inplace=True,drop=True)
HAM_pct = HAM_pct[[0,1,2]]
HAM_pct.columns = ['County','Precinct','Block']
#print(HAM_pct.head())

JAC_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/JAC/cleaned_PctBlock.csv")
JAC_pct.columns = ['County','Precinct','Block']
JAC_pct["County"]="JAC"
#print(JAC_pct.head())

MAD_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/MAD/clean_PctBlock.csv")
MAD_pct.columns = ['County','Precinct','Block']
#print(MAD_pct.head())

MRN_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/MRN/MRN20160510PctBlocka03accb3-5931-4025-b0b3-ee84b1d78dce.txt",sep="\t", header=None)
MRN_pct.columns = ['County','Precinct','Block']
#print(MRN_pct.head())

TAY_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/TAY/TAY20120806_PctBlock0b8117f6-c1de-4b3f-8c25-1e78e2013cd7.txt",sep="\t", header=None)
TAY_pct.columns = ['County','Precinct','Block']
#print(TAY_pct.head())

DAD_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/DAD/DAD20150817_PctBlock8708ddd9-66e2-42f4-8249-d4aa549f0d92.txt",sep="\t")
DAD_pct=DAD_pct[["COUNTY CODE","PRECINCT NUMBER","CENSUS BLOCK"]]
DAD_pct.columns = ['County','Precinct','Block']
#print(DAD_pct.head())

HIG_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/HIG/HIG20160801_PctBlock2b83ba25-1180-477a-b954-b31f1605fd92.txt",sep="\t")
HIG_pct=HIG_pct[["County Code","Precinct Number","Census Block"]]
HIG_pct.columns = ['County','Precinct','Block']
HIG_pct["County"]="HIG"
#print(HIG_pct.head())

LAK_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/LAK/LAK20160420_PctBlock73e1ca5e-e9f7-48e1-b380-98b8401ad0f0.txt",sep="\t")
LAK_pct=LAK_pct[["County Code","Precinct Number","Census Block"]]
LAK_pct.columns = ['County','Precinct','Block']
LAK_pct["County"]="LAK"
#print(LAK_pct.head())

LEE_pct=pd.read_csv("/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pdv/pdv-fl/vest-fl-2016/raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/LEE/LEE20180813_PctBlock495f9750-05d2-4aa6-9b20-72a6a1fcef02.txt",sep="\t",header=None)
LEE_pct["County"]="LEE"
LEE_pct = LEE_pct[[1,2,'County']]
LEE_pct.columns = ['Precinct','Block','County']
LEE_pct = LEE_pct[['County','Precinct','Block']]
#print(LEE_pct.head())

PIN_pct=pd.read_csv("/Users/peterhorton/Documents/Redistricting_Data_Hub/Coding/pdv/pdv-fl/vest-fl-2016/raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/PIN/PIN20160503PCTBLOCK6239b364-d072-49e8-a23f-5162bcb5e8f7.TXT",sep=",")
PIN_pct["County"]="PIN"
PIN_pct.columns = ['Block','Precinct','County']
PIN_pct = PIN_pct[['County','Precinct','Block']]
#print(PIN_pct.head())

SUM_pct=pd.read_csv("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/SUM/SUM20150414_PctBlock5aacc620-010f-48b7-b03e-074bc85a3d2a.txt",sep='\t')
SUM_pct = SUM_pct[['County Code','PrecinctNu','CensusBloc']]
SUM_pct.columns = ['County','Precinct','Block']
SUM_pct["County"]="SUM"
#print(SUM_pct.head())

In [None]:
block_file_list = [GAD_pct,GIL_pct,HAM_pct,JAC_pct,MAD_pct,MRN_pct,TAY_pct,DAD_pct,HIG_pct,LAK_pct,LEE_pct,PIN_pct,SUM_pct]
block_counties = pd.concat(block_file_list)
block_counties["unique_ID"]=block_counties["County"]+block_counties["Precinct"].astype(str)


In [None]:
#Shapefiles built out of block assignments
fl_2010_census_blocks = gp.read_file("./raw-from-source/Census_Blocks/fl_2010_b_bound/fl_2010_b_bound.shp")

In [None]:
fl_2010_census_blocks.rename(columns={"GEOID": "Block"},inplace=True)
fl_2010_census_blocks["Block"] = fl_2010_census_blocks["Block"].astype(int)
fl_2010_census_blocks = fl_2010_census_blocks[["Block","geometry"]]
block_counties_merge = pd.merge(block_counties,fl_2010_census_blocks,on="Block",how="left")
block_counties_merge=gp.GeoDataFrame(block_counties_merge)
census_block_precincts = block_counties_merge.dissolve(by="unique_ID")
census_block_precincts.reset_index(inplace=True,drop=True)

In [None]:
#county_list = list(vest_fl_16["county"].unique())
#others=["SUM","PIN","LAK","LEE","HIG","DAD","TAY","MRN","MAD","JAC","HAM","GIL","GAD"]
#county_sources_list = list(set(county_list) - set(others))
#for i in county_sources_list:
    #print(i+"_pct="+i+"_pct.to_crs(vest_fl_16.crs)")

In [None]:
ALA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/ALA/ALA20121106v6_PctMap/ALA20121106v6_PctMap.shp")
BAK_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/BAK/BAK20121106v6_PctMap/BAK20121106v6_PctMap.shp")
BAY_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/BAK/BAK20121106v6_PctMap/BAK20121106v6_PctMap.shp")
BRA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/BRA/BRA20121106v6_PctMap/BRA20121106v5_PctMap.shp")
BRE_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/BRE/Post RD Precincts/Precincts_2016.shp")
BRO_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/BRO/PRECINCTS/PRECINCTS_region.shp")
CAL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/CAL/CAL20121106v6_PctMap/CAL20121106v6_PctMap.shp")
CHA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/CHA/CHA20121106v6_PctMap/CHA20121106v5_PctMap.shp")
CIT_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/CIT/CIT20170531_PctMap/PRECINCT05312017.shp")
CLA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/CLA/CLA20160503_PctMapcc84b5a6-3ade-4dd9-acd8-4c8157bcd343/CLA20160503_PctMap.shp")
CLA_pct = CLA_pct.dissolve(by="PRECINCT")
CLA_pct.reset_index(inplace=True)
CLL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/CLL/Shape Files/PRECINCT12021.shp")
CLM_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/CLM/CLM20121106v6_PctMap/CLM20121106v6_PctMap.shp")
DES_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/DES/DES20121106v6_PctMap/DES20121106v5_PctMap.shp")
DIX_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/DIX/DIX20121106v6_PctMap/DIX20121106v6_PctMap.shp")
DUV_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/DUV/DUV20120615_PctMap_region38f2bdfc-7be4-466e-ab20-98b2ad332567/DUV20120615_PctMap_region.shp")
ESC_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/ESC/Precinct_Map_2018/PRECINCT_MAP_022018.shp")
FLA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/FLA/FLA20121106v6_PctMap/FLA20121106v6_PctMap.shp")
FRA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/FRA/FRA20121106v6_PctMap/FRA20121106v6_PctMap.shp")
GLA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/GLA/GLA20121106v6_PctMap/GLA20121106v6_PctMap.shp")
GUL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/GUL/GUL20121106v6_PctMap/GUL20121106v6_PctMap.shp")
HAR_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/HAR/HAR20121106v6_PctMap/HAR20121106v6_PctMap.shp")
HEN_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/HEN/HEN20121106v6_PctMaped1303a5-dc65-41db-ac5f-a2bc3e07721a/HEN20121106v5_PctMap.shp")
HER_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/HER/HER20121106v6_PctMap/HER20121106v6_PctMap.shp")
HIL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/HIL/HIL20170620_PctMap9febfa30-eb46-4b2b-98d8-efb4bf232874/PctMap.shp")
HOL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/HOL/HOL20121106v6_PctMap/HOL20121106v6_PctMap.shp")
IND_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/IND/IND20121106v6_PctMap/IND20121106v6_PctMap.shp")
JEF_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/JEF/JEF20121106v6_PctMap/JEF20121106v6_PctMap.shp")
LAF_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/LAF/LAF20121106v6_PctMap/LAF20121106v6_PctMap.shp")
LEO_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/LEO/LEO20160608_PctMapf23ef83b-963f-4d2a-a377-6832ef66b253/LEO02160608_PctMap.shp")
LEV_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/LEV/LEV20121106v6_PctMap/LEV20121106v6_PctMap.shp")
LIB_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/LIB/LIB20121106v6_PctMap/LIB20121106v5_PctMap.shp")
MAN_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/MAN/MAN20121106v6_PctMap/MAN20121106v6_PctMap.shp")
MON_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/MON/MON20121106v6_PctMap/MON20121106v6_PctMap.shp")
MRT_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/MRT/MRT20121106v6_PctMap/MRT20121106v6_PctMap.shp")
NAS_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/NAS/NAS20121106v6_PctMap/NAS20121106v5_PctMap.shp")
OKA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/OKA/OKA20121106_PCTv6da58ba66-dc93-4a57-b652-54e0178dd8c0/OKA20121106v6_PctMap/OKA20121106v5_PctMap.shp")
OKE_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/OKE/OKE20120628_PctMap515d73e7-c7f9-43ac-83df-e27de5e49af7/2012 Precincts.shp")
ORA_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/ORA/ORA20160506_PctMapc9f3a643-0427-4f76-83e6-015be0bd518c/ORA20160506_PctMap_region.shp")
OSC_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/OSC/OSC20160614_PctMapee50a15b-49e5-40e9-b975-fc25e040399a/2016_06 Osceola Precincts.shp")
PAL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/PAL/SHAPEFILES 2012_region/SHAPEFILES 2012_region.shp")
PAS_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/PAS/ESRI Shapefiles/Pasco.shp")
POL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/POL/POL20160511_PctMapbb417d62-028d-4b78-adbf-344f474213b2/2016 Polk Precincts.shp")
PUT_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/PUT/PUT20160614PctMap98164384-dc27-48c1-8ae9-dfa69c6afdc0/PUT20160614PctMap.shp")
SAN_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/SAN/SAN20121106v6_PctMap/SAN20121106v6_PctMap.shp")
SAR_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/SAR/SAR20140109v6_PctMapf831e47b-44f3-4954-a8e8-052b3a084322/SarasotaCounty_Pcts_051613_region.shp")
SEM_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/SEM/SEM20121106v6_PctMap/SEM20121106v6_PctMap.shp")
STJ_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/STJ/STJ20121106v6_PctMap7902e8be-3a36-4698-9e89-0b6a34bc20c1/STJ20121106v6_PctMap.shp")
STL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/STL/STL20121106v6_PctMap/STL20121106v6_PctMap.shp")
SUW_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/SUW/SUW20121106v6_PctMap/SUW20121106v6_PctMap.shp")
UNI_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/UNI/UNI20121106v6_PctMap/UNI20121106v6_PctMap.shp")
VOL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/VOL/VOL20160721_PctMap05578db2-b193-44b2-988d-0ba4f6c63ead/VOL20160721_PctMap.shp")
WAK_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/WAK/WAK20121106v6_PctMap/WAK20121106v6_PctMap.shp")
WAL_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/WAL/WAL20121106v6_PctMap/WAL20121106v6_PctMap.shp")
WAS_pct=gp.read_file("./raw-from-source/Records_Request/Precinct and Polling Place Files - 2012-present - last checked 7-23-2020/WAS/WAS20121106v6_PctMap/WAS20121106v6_PctMap.shp")
WAS_pct=WAS_pct.dissolve(by="pct_std")
BRE_pct=BRE_pct.dissolve(by="EPrecinct")
OSC_pct=OSC_pct.dissolve(by="PCT2014")
WAS_pct.reset_index(inplace=True)
BRE_pct.reset_index(inplace=True)
OSC_pct.reset_index(inplace=True)

In [None]:
ALA_pct['new_county']='ALA'
BAK_pct['new_county']='BAK'
BAY_pct['new_county']='BAY'
BRA_pct['new_county']='BRA'
BRE_pct['new_county']='BRE'
BRO_pct['new_county']='BRO'
CAL_pct['new_county']='CAL'
CHA_pct['new_county']='CHA'
CIT_pct['new_county']='CIT'
CLA_pct['new_county']='CLA'
CLL_pct['new_county']='CLL'
CLM_pct['new_county']='CLM'
DES_pct['new_county']='DES'
DIX_pct['new_county']='DIX'
DUV_pct['new_county']='DUV'
ESC_pct['new_county']='ESC'
FLA_pct['new_county']='FLA'
FRA_pct['new_county']='FRA'
GLA_pct['new_county']='GLA'
GUL_pct['new_county']='GUL'
HAR_pct['new_county']='HAR'
HEN_pct['new_county']='HEN'
HER_pct['new_county']='HER'
HIL_pct['new_county']='HIL'
HOL_pct['new_county']='HOL'
IND_pct['new_county']='IND'
JEF_pct['new_county']='JEF'
LAF_pct['new_county']='LAF'
LEO_pct['new_county']='LEO'
LEV_pct['new_county']='LEV'
LIB_pct['new_county']='LIB'
MAN_pct['new_county']='MAN'
MON_pct['new_county']='MON'
MRT_pct['new_county']='MRT'
NAS_pct['new_county']='NAS'
OKA_pct['new_county']='OKA'
OKE_pct['new_county']='OKE'
ORA_pct['new_county']='ORA'
OSC_pct['new_county']='OSC'
PAL_pct['new_county']='PAL'
PAS_pct['new_county']='PAS'
POL_pct['new_county']='POL'
PUT_pct['new_county']='PUT'
SAN_pct['new_county']='SAN'
SAR_pct['new_county']='SAR'
SEM_pct['new_county']='SEM'
STJ_pct['new_county']='STJ'
STL_pct['new_county']='STL'
SUW_pct['new_county']='SUW'
UNI_pct['new_county']='UNI'
VOL_pct['new_county']='VOL'
WAK_pct['new_county']='WAK'
WAL_pct['new_county']='WAL'
WAS_pct['new_county']='WAS'
BAY_pct=BAY_pct.to_crs(fl_2010_census_blocks.crs)
WAS_pct=WAS_pct.to_crs(fl_2010_census_blocks.crs)
CLM_pct=CLM_pct.to_crs(fl_2010_census_blocks.crs)
PUT_pct=PUT_pct.to_crs(fl_2010_census_blocks.crs)
BAK_pct=BAK_pct.to_crs(fl_2010_census_blocks.crs)
LIB_pct=LIB_pct.to_crs(fl_2010_census_blocks.crs)
LAF_pct=LAF_pct.to_crs(fl_2010_census_blocks.crs)
HOL_pct=HOL_pct.to_crs(fl_2010_census_blocks.crs)
OSC_pct=OSC_pct.to_crs(fl_2010_census_blocks.crs)
POL_pct=POL_pct.to_crs(fl_2010_census_blocks.crs)
DES_pct=DES_pct.to_crs(fl_2010_census_blocks.crs)
OKA_pct=OKA_pct.to_crs(fl_2010_census_blocks.crs)
BRE_pct=BRE_pct.to_crs(fl_2010_census_blocks.crs)
SEM_pct=SEM_pct.to_crs(fl_2010_census_blocks.crs)
WAK_pct=WAK_pct.to_crs(fl_2010_census_blocks.crs)
JEF_pct=JEF_pct.to_crs(fl_2010_census_blocks.crs)
HER_pct=HER_pct.to_crs(fl_2010_census_blocks.crs)
FRA_pct=FRA_pct.to_crs(fl_2010_census_blocks.crs)
WAL_pct=WAL_pct.to_crs(fl_2010_census_blocks.crs)
BRO_pct=BRO_pct.to_crs(fl_2010_census_blocks.crs)
ALA_pct=ALA_pct.to_crs(fl_2010_census_blocks.crs)
STJ_pct=STJ_pct.to_crs(fl_2010_census_blocks.crs)
CLA_pct=CLA_pct.to_crs(fl_2010_census_blocks.crs)
PAS_pct=PAS_pct.to_crs(fl_2010_census_blocks.crs)
CAL_pct=CAL_pct.to_crs(fl_2010_census_blocks.crs)
DIX_pct=DIX_pct.to_crs(fl_2010_census_blocks.crs)
ESC_pct=ESC_pct.to_crs(fl_2010_census_blocks.crs)
ORA_pct=ORA_pct.to_crs(fl_2010_census_blocks.crs)
IND_pct=IND_pct.to_crs(fl_2010_census_blocks.crs)
GUL_pct=GUL_pct.to_crs(fl_2010_census_blocks.crs)
SUW_pct=SUW_pct.to_crs(fl_2010_census_blocks.crs)
STL_pct=STL_pct.to_crs(fl_2010_census_blocks.crs)
CHA_pct=CHA_pct.to_crs(fl_2010_census_blocks.crs)
FLA_pct=FLA_pct.to_crs(fl_2010_census_blocks.crs)
UNI_pct=UNI_pct.to_crs(fl_2010_census_blocks.crs)
LEO_pct=LEO_pct.to_crs(fl_2010_census_blocks.crs)
OKE_pct=OKE_pct.to_crs(fl_2010_census_blocks.crs)
MRT_pct=MRT_pct.to_crs(fl_2010_census_blocks.crs)
MON_pct=MON_pct.to_crs(fl_2010_census_blocks.crs)
MAN_pct=MAN_pct.to_crs(fl_2010_census_blocks.crs)
CIT_pct=CIT_pct.to_crs(fl_2010_census_blocks.crs)
BRA_pct=BRA_pct.to_crs(fl_2010_census_blocks.crs)
CLL_pct=CLL_pct.to_crs(fl_2010_census_blocks.crs)
HEN_pct=HEN_pct.to_crs(fl_2010_census_blocks.crs)
VOL_pct=VOL_pct.to_crs(fl_2010_census_blocks.crs)
NAS_pct=NAS_pct.to_crs(fl_2010_census_blocks.crs)
LEV_pct=LEV_pct.to_crs(fl_2010_census_blocks.crs)
HIL_pct=HIL_pct.to_crs(fl_2010_census_blocks.crs)
HAR_pct=HAR_pct.to_crs(fl_2010_census_blocks.crs)
GLA_pct=GLA_pct.to_crs(fl_2010_census_blocks.crs)
DUV_pct=DUV_pct.to_crs(fl_2010_census_blocks.crs)
SAN_pct=SAN_pct.to_crs(fl_2010_census_blocks.crs)
PAL_pct=PAL_pct.to_crs(fl_2010_census_blocks.crs)
SAR_pct=SAR_pct.to_crs(fl_2010_census_blocks.crs)

In [None]:
print(BRE_pct.head())

In [None]:
fl_2016_shapefiles = [ALA_pct,BAK_pct,BAY_pct,BRA_pct,BRE_pct,BRO_pct,CAL_pct,CHA_pct,CIT_pct,CLA_pct,CLL_pct,CLM_pct,
DES_pct,DIX_pct,DUV_pct,ESC_pct,FLA_pct,FRA_pct,GLA_pct,GUL_pct,HAR_pct,
HEN_pct,HER_pct,HIL_pct,HOL_pct,IND_pct,JEF_pct,LAF_pct,LEO_pct,LEV_pct,LIB_pct,MAN_pct,MON_pct,MRT_pct,NAS_pct,
OKA_pct,OKE_pct,ORA_pct,OSC_pct,PAL_pct,PAS_pct,POL_pct,PUT_pct,SAN_pct,SAR_pct,
SEM_pct,STJ_pct,STL_pct,SUW_pct,UNI_pct,VOL_pct,WAK_pct,WAL_pct,WAS_pct]

In [None]:
cleaned_fl_list = []
for i in fl_2016_shapefiles:
    i.rename(columns={"PRECINCT":"Precinct","PCT":"Precinct","DISTRICT":"Precinct",
        "PrecMay201":"Precinct","PCT2014":"Precinct",'PRECINCTID':"Precinct"},inplace=True)
    i=i[["Precinct","geometry","new_county"]]
    i.rename(columns={"new_county":"County"},inplace=True)
    cleaned_fl_list.append(i)
    

In [None]:
for i in cleaned_fl_list:
    if (i["County"].unique()==["BRE"]):
        i.loc[:,"Precinct"] = (i.loc[:,"Precinct"]).astype(int)

In [None]:
census_block_precincts=census_block_precincts[["Precinct","geometry","County"]]
print(census_block_precincts.head())

In [None]:
source_shapes = gp.GeoDataFrame(pd.concat(cleaned_fl_list))
two_sources = [source_shapes,census_block_precincts]
source_shapes_final = gp.GeoDataFrame(pd.concat(two_sources))
#source_shapes_final = gp.GeoDataFrame(pd.concat(source_shapes,census_block_precincts))
source_shapes_final.plot()

In [None]:
print(source_shapes_final[source_shapes_final["County"]=="BRE"])

In [None]:
source_shapes_final = source_shapes_final.to_crs(vest_fl_16.crs)

In [None]:
source_shapes_final.loc[:,"Precinct"]=source_shapes_final.loc[:,"Precinct"].astype(str)
source_shapes_final.loc[:,"Precinct"]=source_shapes_final.loc[:,"Precinct"].str.zfill(5)
source_shapes_final["countypct"]=source_shapes_final["County"]+source_shapes_final["Precinct"]

In [None]:
shapes_and_elections = pd.merge(source_shapes_final,pivoted_2016,on="countypct",how="outer",indicator=True)
print(shapes_and_elections["_merge"].value_counts())
shapes_and_elections[shapes_and_elections["_merge"]=="left_only"].to_csv("./shapefile_only.csv")
shapes_and_elections[shapes_and_elections["_merge"]=="right_only"].to_csv("./elections_only.csv")
both_shapes_and_elections = shapes_and_elections[shapes_and_elections["_merge"]=="both"]

In [None]:
final_validation = pd.merge(both_shapes_and_elections,vest_fl_16,on="countypct",how="outer",indicator="final_ind")
print(final_validation["final_ind"].value_counts())
both = final_validation[final_validation["final_ind"]=="both"]

In [None]:
source_geoms = gp.GeoDataFrame(final_validation[final_validation["final_ind"]=="both"],geometry="geometry_x")
vest_geoms = gp.GeoDataFrame(final_validation[final_validation["final_ind"]=="both"],geometry="geometry_y")
vals = source_geoms.geom_almost_equals(vest_geoms,decimal=0)

In [None]:
print(vals.value_counts())

In [None]:
#Show a random difference
test = both[~vals].sample(axis=0,n=1).index[0]
display(both.iloc[test]["geometry_x"])
display(both.iloc[test]["geometry_y"])

#display(gp.overlay(both.iloc[test]["geometry_x"],both.iloc[test]["geometry_y"],how="overlap"))

test = both[~vals].sample(axis=0,n=1).index[0]
display(both.iloc[test]["geometry_x"])
display(both.iloc[test]["geometry_y"])

In [None]:
#shp_difference = gp.overlay(source_geoms,vest_geoms,how='symmetric_difference')
shp_difference.plot()