In [1]:
import geopandas as gp
import pandas as pd
import os
import numpy as np
import re
import pdv_functions as pdv


# Florida 2022 Election Results Processing

Precinct-Level Data from [Florida Division of Elections](https://dos.myflorida.com/elections/data-statistics/elections-data/precinct-level-election-results/)

County-Level Data to Run Checks from [Florida Department of State Election Archive](https://results.elections.myflorida.com/Index.asp?ElectionDate=11/8/2022&DATAMODE=)

In [2]:
#This Data is from the FL department of state, and can only be downloaded county by county
all_files = os.listdir("./raw-from-source/2022-gen-outputofficial/")

li = []
for i in all_files:
    if i not in [".DS_Store","DAD_PctResults20221108_Recount.txt"]:
        ref = "./raw-from-source/2022-gen-outputofficial/"
        file_ref = ref+i
        file_prev = pd.read_csv(file_ref,sep="\t",engine='python',index_col=None, header=None, dtype = str)
        li.append(file_prev)
frame = pd.concat(li, axis=0, ignore_index=True)
print(frame.shape)

In [4]:
# Rename the columns in the file

col_rename_dict = {0: 'County Code (Three-character abbreviation)',
 1: 'County Name',
 2: 'Election Number',
 3: 'Election Date',
 4: 'Election Name',
 5: 'Unique Precinct Identifier',
 6: 'Precinct Polling Location',
 7: 'Total Registered Voters',
 8: 'Total Registered Republicans',
 9: 'Total Registered Democrats',
 10: 'Total Registered All Other Parties',
 11: 'Contest Name',
 12: 'District',
 13: 'Contest Code (Florida’s 6 digit contest codes)',
 14: 'Candidate/Retention/IssueName/WriteInsCast/OverVotes/UnderVotes',
 15: 'Candidate Party (abbreviation)',
 16: 'Candidate Florida Voter Registration System ID Number',
 17: 'DOE Assigned Candidate Number or Retention/Issue Number',
 18: 'Vote Total'}

## Compare the Dade Recount Data for State Leg 106:

Note: The totals seemed much too large here, so I just used the original

In [6]:
# dade_og = pd.read_csv("./raw-from-source/2022-gen-outputofficial/DAD_PctResults20221108.txt",sep="\t",engine='python',index_col=None, header=None)
# recount = pd.read_csv("./raw-from-source/2022-gen-outputofficial/DAD_PctResults20221108_Recount.txt",sep="\t",engine='python',index_col=None, header=None)
# frame = frame[~((frame[0]=="DAD") & (frame[12]==" District 106"))]
# frame = pd.concat([frame, recount])

In [7]:
# Rename the columns and clean the dataframe
frame.rename(columns = col_rename_dict, inplace = True)
frame['Vote Total'] = frame['Vote Total'].astype(int)

# Set the Unique Precinct Identifier to the Precinct Polling Location, where the Unique Identifier is "N/A"
frame["Unique Precinct Identifier"] = np.where(frame["Unique Precinct Identifier"].isna(), frame["Precinct Polling Location"], frame["Unique Precinct Identifier"])

In [13]:
# Confirm there are no more "N/A" unique precinct identifiers
frame[frame["Unique Precinct Identifier"].isna()]

Unnamed: 0,County Code (Three-character abbreviation),County Name,Election Number,Election Date,Election Name,Unique Precinct Identifier,Precinct Polling Location,Total Registered Voters,Total Registered Republicans,Total Registered Democrats,Total Registered All Other Parties,Contest Name,District,Contest Code (Florida’s 6 digit contest codes),Candidate/Retention/IssueName/WriteInsCast/OverVotes/UnderVotes,Candidate Party (abbreviation),Candidate Florida Voter Registration System ID Number,DOE Assigned Candidate Number or Retention/Issue Number,Vote Total


## Filter Down to Relevant Races

In [16]:
races_list = ['United States Senator', 'Representative in Congress',
       'Governor and Lieutenant Governor', 'Attorney General',
       'Chief Financial Officer', 'Commissioner of Agriculture',
       'State Senator', 'State Representative',
 'Retention of Charles T. Canady','Retention of John D. Couriel','Retention of Jamie Grosshans',
 'Retention of Jorge Labarga','Retention of Ricky Polston',
 'Amendment No. 1: Limitation on Assessment of Real Property Used for Residential Purposes',
 'Amendment No. 2: Abolishing the Constitution Revision Commission',
 'Amendment No. 3: Additional Homestead Property Tax Exemption for Specified Critical Public Services Workforce']

In [17]:
#Filter to the presidential results
frame =frame[frame['Contest Name'].isin(races_list)]

#Filter out the OverVotes and UnderVotes
filtered_frame = frame[~frame['Candidate/Retention/IssueName/WriteInsCast/OverVotes/UnderVotes'].isin(['OverVotes', 'UnderVotes'])]

#Clean up the precinct column
filtered_frame['Precinct Polling Location'] = filtered_frame['Precinct Polling Location'].astype(str)

# #Deal with Dade County - note I did this for FL 2020 to join with the shapefile
# filtered_frame["mod_col"]=filtered_frame.apply(lambda row:int(float(row['Precinct Polling Location'].split("PRECINCT ")[1])) if row['County Code (Three-character abbreviation)']=="DAD" else row['Unique Precinct Identifier'], axis=1)

#Make the precinct column at least 4 digits
filtered_frame["mod_col"] = filtered_frame["Unique Precinct Identifier"].astype(str).str.zfill(4)
filtered_frame["modified_pre"]=filtered_frame["mod_col"].str.zfill(4)

#Make a column with the 3 letter county code and the precincts
filtered_frame["pct_std"]=filtered_frame['County Code (Three-character abbreviation)']+"-"+filtered_frame["modified_pre"]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_frame['Precinct Polling Location'] = filtered_frame['Precinct Polling Location'].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_frame["mod_col"] = filtered_frame["Unique Precinct Identifier"].astype(str).str.zfill(4)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filte

In [18]:
# Cast the contest name and code columns to string to make the pivot col
filtered_frame["Contest Name"] = filtered_frame["Contest Name"].astype(str)
filtered_frame["Contest Code (Florida’s 6 digit contest codes)"] = filtered_frame["Candidate/Retention/IssueName/WriteInsCast/OverVotes/UnderVotes"].astype(str)

# Fill in the blank candidate party with "WRI"
filtered_frame['Candidate Party (abbreviation)'] = filtered_frame['Candidate Party (abbreviation)'].map({' ':'WRI'}).fillna(filtered_frame['Candidate Party (abbreviation)'])

# Create the pivot column with the contest name and the candidate
filtered_frame["pivot_col"] = filtered_frame["Contest Name"] + "-:-" + filtered_frame["Contest Code (Florida’s 6 digit contest codes)"] + "-:-" + "PARTY:" +filtered_frame['Candidate Party (abbreviation)']

# Where it exists, add in the district to the pivot column
filtered_frame["pivot_col"] = np.where(filtered_frame["District"]!=" ",filtered_frame["pivot_col"] + "-:-" + filtered_frame["District"], filtered_frame["pivot_col"])

# Removing all of the specific party registration data, as it looks like the state doesn't have that
for val in ['Total Registered Republicans','Total Registered Democrats', 'Total Registered All Other Parties']:
    print(filtered_frame[val].unique())
    
filtered_frame.drop(['Election Number', 'Election Date', 'Total Registered Republicans','Total Registered Democrats', 'Total Registered All Other Parties','Candidate Florida Voter Registration System ID Number','DOE Assigned Candidate Number or Retention/Issue Number'], axis = 1, inplace = True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_frame["Contest Name"] = filtered_frame["Contest Name"].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_frame["Contest Code (Florida’s 6 digit contest codes)"] = filtered_frame["Candidate/Retention/IssueName/WriteInsCast/OverVotes/UnderVotes"].astype(str)


In [25]:
#Pivot the data so that each row has all the results from that precinct
pivoted_2022 = pd.pivot_table(filtered_frame, values=['Vote Total'], index=["pct_std",'County Code (Three-character abbreviation)','County Name',"Unique Precinct Identifier","Precinct Polling Location"],columns=['pivot_col'],aggfunc=sum)

#Clean up the indices
pivoted_2022.reset_index(inplace=True,drop=False)

pivoted_2022.columns = [' '.join(col).strip() for col in pivoted_2022.columns.values]

pivoted_2022.columns = pivoted_2022.columns.str.replace("Vote Total ","")

pivoted_2022 = pivoted_2022.fillna(0)

# Rename Columns

In [27]:
keep_names = ['pct_std', 'County Code (Three-character abbreviation)', 'County Name',
       'Unique Precinct Identifier', 'Precinct Polling Location']

In [28]:
def get_race(contest):
    if "President" in contest:
        level = "PRE"
    elif ("Representative in Congress" in contest or "State Senator" in contest or "State Representative" in contest):
        contest_info = get_level_dist(contest)
        level = contest_info[0]+contest_info[1]        
    else:
        print(contest)
        raise ValueError
    return level

In [30]:
def get_election_type(contest):
    return "G"


def get_race(contest):
    mod_level = ""
    level = contest.split("-:-")[0]
    level_change_dict = {
        'Attorney General':'ATG',
        'Court of Appeals Judge':'CAJ',
        'Governor':'GOV',
        'State Controller':'CNT',
        'STATE QUESTION':'SQ',
        'Secretary of State':'SOS',
        'State Representative':'SL',
        'State Senator':'SU',
        'President':'PRE',
        'United States Senator':'USS',
        'Amendment':'A',
        'State Treasurer':'TRE',
        'Retention of':'SCJ',
        'Representative in Congress':'CON',
        'Chief Financial Officer':'CFO',
        'Commissioner of Agriculture':'COA'}
    for val in level_change_dict.keys():
        if val in level:
            mod_level = level_change_dict[val]
            break
    if mod_level == "":
        print("NO CONTEST", contest)
    if mod_level == 'A':
        mod_level += level.split(":")[0].split(" ")[-1]
        
    return mod_level

def get_party(contest):
    if "Amendment" in contest:
        return ""
    elif "Retention of" in contest:
        return contest.split("-:-")[1][0].upper()
    elif "PARTY:DEM" in contest:
        return "D"
    elif "PARTY:REP" in contest:
        return "R"
    elif "PARTY:LPF" in contest:
        return "L"
    ## Reform -> F
    elif "PARTY:REF" in contest:
        return "O"
    elif "PARTY:PSL" in contest:
        return "S"
    elif "PARTY:GRE" in contest:
        return "G"
    elif "PARTY:CPF" in contest:
        return "C"
    elif "PARTY:WRI" in contest:
        return "O"
    elif "PARTY:NPA" or "PARTY:NOP" in contest:
        return "N"
    else:
        print(contest)
        return ValueError
    
def get_name(contest):
    if "No for Rejection" in contest:
        return "NO"
    elif "Yes for Approval" in contest:
        return "YES"
    
    if "Retention of" in contest:
        candidate = contest.split("-:-")[0]
        return candidate.upper().split(" ")[-1][0:3]
    else:
        candidate = contest.split("-:-")[1]
        if candidate == "None Of These Candidates":
            return "WRI"
        candidate = candidate.upper()
        if "Governor" in contest:
            likely_last = candidate.split(" ")[0]
        else:
            likely_last = candidate.split(" ")[-1]
        if likely_last in ["JR","III","II","SR"]:
            return candidate.split(" ")[-2][0:3]
        else:
            return likely_last[0:3]
    
def get_district(contest):
    district_string = contest.split("-:-")[3]
    district_string = district_string.strip()
    likely = district_string.split(" ")[1]
    if "Representative in Congress" in contest or 'State Senator' in contest:
        level = 2
    elif 'State Representative' in contest:
        level = 3
    return likely.zfill(level)

contest_name_change_dict = {}

duplicate_value_list = []
for contest in pivoted_2022.columns:
    if contest not in keep_names:
        # Add in a condition about the 20
#         print("E-type", get_election_type(contest))
#         print("Race", get_race(contest))
#         print("Party", get_party(contest))
#         print("Name", get_name(contest))
        if "Representative in Congress" in contest or 'State Representative' in contest or 'State Senator' in contest:
            value = get_election_type(contest) + get_race(contest) + get_district(contest) + get_party(contest) + get_name(contest)
        else:
            value = get_election_type(contest) + "22" + get_race(contest) + get_party(contest) + get_name(contest)
        if value in contest_name_change_dict.values():
            duplicate_value_list.append(value)
        print(len(value))
        contest_name_change_dict[contest] = value 

if len(duplicate_value_list) > 0:
    print("DUPLICATE VALUES")
    for val in [(k,v) for k, v in contest_name_change_dict.items() if v in duplicate_value_list]:
        print(val)

7
8
7
8
7
8
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
10
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
9
10
10
10
10
10
10


In [32]:
# Clean up edge cases
contest_name_change_dict['Representative in Congress-:-Corinna Balderramos Robinson-:-PARTY:DEM-:- District 21'] = 'GCON21DBAL'
contest_name_change_dict['State Representative-:-Christian De La Torre-:-PARTY:REP-:- District 46']= 'GSL046RDEL'
contest_name_change_dict['State Representative-:-Karen Gonzalez Pittman-:-PARTY:REP-:- District 65']= 'GSL065RGON'
contest_name_change_dict['State Representative-:-LaVon Bracy Davis-:-PARTY:DEM-:- District 40']= 'GSL040DBRA'
contest_name_change_dict['State Representative-:-Maura Cruz Lanz-:-PARTY:REP-:- District 64']='GSL064RCRU'
contest_name_change_dict['State Representative-:-Yvonne Hayes Hinson-:-PARTY:DEM-:- District 21']='GSL021DHAY'

# Create a sorted dictionary to sort the columns of the final dataframe
sorted_contest_name_change_dict = dict(sorted(contest_name_change_dict.items(), key=lambda x:x[1]))

In [35]:
# Export the candidate name change dictionary for the README
holder = pd.DataFrame(sorted_contest_name_change_dict.values(),sorted_contest_name_change_dict.keys())
holder.to_csv("./field_names.csv", index = True)

In [36]:
# Rename the dataframe using this dictionary
pivoted_2022.rename(columns = contest_name_change_dict, inplace = True)

# Define a reversed version of the dictionary
contest_name_change_dict_rev = {v:k for k,v in contest_name_change_dict.items()}

## Add a COUNTYFP column

In [40]:
FIPS_dict = {'Alachua': '001',
 'Baker': '003',
 'Bay': '005',
 'Bradford': '007',
 'Brevard': '009',
 'Broward': '011',
 'Calhoun': '013',
 'Charlotte': '015',
 'Citrus': '017',
 'Clay': '019',
 'Collier': '021',
 'Columbia': '023',
 'Desoto': '027',
 'Dixie': '029',
 'Duval': '031',
 'Escambia': '033',
 'Flagler': '035',
 'Franklin': '037',
 'Gadsden': '039',
 'Gilchrist': '041',
 'Glades': '043',
 'Gulf': '045',
 'Hamilton': '047',
 'Hardee': '049',
 'Hendry': '051',
 'Hernando': '053',
 'Highlands': '055',
 'Hillsborough': '057',
 'Holmes': '059',
 'Indian River': '061',
 'Jackson': '063',
 'Jefferson': '065',
 'Lafayette': '067',
 'Lake': '069',
 'Lee': '071',
 'Leon': '073',
 'Levy': '075',
 'Liberty': '077',
 'Madison': '079',
 'Manatee': '081',
 'Marion': '083',
 'Martin': '085',
 'Miami-Dade': '086',
 'Monroe': '087',
 'Nassau': '089',
 'Okaloosa': '091',
 'Okeechobee': '093',
 'Orange': '095',
 'Osceola': '097',
 'Palm Beach': '099',
 'Pasco': '101',
 'Pinellas': '103',
 'Polk': '105',
 'Putnam': '107',
 'St. Johns': '109',
 'St. Lucie': '111',
 'Santa Rosa': '113',
 'Sarasota': '115',
 'Seminole': '117',
 'Sumter': '119',
 'Suwannee': '121',
 'Taylor': '123',
 'Union': '125',
 'Volusia': '127',
 'Wakulla': '129',
 'Walton': '131',
 'Washington': '133'}

In [41]:
pivoted_2022["COUNTYFP"] = pivoted_2022["County Name"].map(FIPS_dict).fillna(pivoted_2022["County Name"])
print(pivoted_2022["COUNTYFP"].unique())

## Additional Cleaning

In [43]:
pivoted_2022.rename(columns = {"pct_std":"UNIQUE_ID"}, inplace = True)
pivoted_2022[pivoted_2022["UNIQUE_ID"].str.contains("nan")]
for col in list(contest_name_change_dict.values()):
    pivoted_2022[col] = pivoted_2022[col].astype(int)

## Election Totals Checks

In [49]:
alt_source = pd.read_csv("./raw-from-source/11082022Election.txt", encoding = "latin-1", sep="\t")

interested_races = ['United States Senator', 'United States Representative',
       'Governor', 'Attorney General', 'Chief Financial Officer',
       'Commissioner of Agriculture',  'State Senator',
       'State Representative',
       'Shall Justice Ricky Polston be retained in Office?',
       'Shall Justice Charles T. Canady be retained in Office?',
       'Shall Justice John D. Couriel be retained in Office?',
       'Shall Justice Jamie Grosshans be retained in Office?',
       'Shall Justice Jorge Labarga be retained in Office?',
      
       'Additional Homestead Property Tax Exemption for Specified Critical Public Services Workforce',
       'Limitation on Assessment of Real Property Used for Residential Purposes',
       'Abolishing the Constitution Revision Commission']

alt_source = alt_source[alt_source["OfficeDesc"].isin(interested_races)]

# Add in the District Number where relevant
alt_source["OfficeDesc_Detailed"] = alt_source.apply(lambda x: x["OfficeDesc"] + " "+str(int(x["Juris1num"])) if not pd.isna(x["Juris1num"]) else x["OfficeDesc"], axis = 1)

In [56]:
# Combine the office, candidate first name and candidate last name, clean where all of that info isn't available
alt_source["cand_col"] = alt_source["OfficeDesc_Detailed"] + "-:-" + alt_source["CanNameLast"] + ", " + alt_source["CanNameFirst"] + "-:-" + alt_source["PartyCode"]
alt_source.loc[alt_source["CanNameFirst"].isna(),"cand_col"] = alt_source["OfficeDesc_Detailed"] + "-:-" + alt_source["CanNameLast"]  + "-:-" + alt_source["PartyCode"]

In [59]:
pivoted_alt = pd.pivot_table(alt_source, index = "CountyName", columns = "cand_col", values = "CanVotes", aggfunc = sum)

pivoted_alt.reset_index(inplace = True, drop = False)

pivoted_alt = pivoted_alt.fillna(0)

In [63]:
def get_election_type(contest):
    return "G"


def get_race(contest):
    mod_level = ""
    level = contest.split("-:-")[0]
    level_change_dict = {
        'Attorney General':'ATG',
        'Court of Appeals Judge':'CAJ',
        'Lieutenant Governor':'LTG',
        'Governor':'GOV',
        'State Controller':'CNT',
        'STATE QUESTION':'SQ',
        'Secretary of State':'SOS',
        'State Representative':'SL',
        'State Senator':'SU',
        'President':'PRE',
        'United States Senator':'USS',
        'Amendment':'A',
        'State Treasurer':'TRE',
        'retained in Office':'SCJ',
        'United States Representative':'CON',
        'Chief Financial Officer':'CFO',
        'Commissioner of Agriculture':'COA'}
    for val in level_change_dict.keys():
        if val in level:
            mod_level = level_change_dict[val]
            break
    if mod_level == "":
        print("NO CONTEST", contest)
    if mod_level == 'A':
        mod_level += level.split(":")[0].split(" ")[-1]
        
    return mod_level

def get_party(contest):
    contest = contest.split("-:-")[2]
    if "Amendment" in contest:
        return ""
    elif "Retention of" in contest:
        return contest.split("-:-")[1][0].upper()
    elif "DEM" in contest:
        return "D"
    elif "REP" in contest:
        return "R"
    elif "LPF" in contest:
        return "L"
    ## Reform -> F
    elif "REF" in contest:
        return "O"
    elif "PSL" in contest:
        return "S"
    elif "GRE" in contest:
        return "G"
    elif "CPF" in contest:
        return "C"
    elif "WRI" in contest:
        return "O"
    elif "NPA" or "NOP" in contest:
        return "N"
    else:
        print(contest)
        return ValueError
    
def get_name(contest):
    if "No for Rejection" in contest:
        return "NO"
    elif "Yes for Approval" in contest:
        return "YES"
    
    if "Retention of" in contest:
        candidate = contest.split("-:-")[0]
        return candidate.upper().split(" ")[-1][0:3]
    else:
        candidate = contest.split("-:-")[1]
        if candidate == "None Of These Candidates":
            return "WRI"
        candidate = candidate.upper()
#         candidate = candidate.replace(" ", "")
        if "Governor" in contest:
            likely_last = candidate.split(" ")[1]
        else:
            likely_last = candidate.split(" ")[0]
        if likely_last in ["JR","III","II","SR"]:
            return candidate.split(" ")[-2][0:3]
        else:
            return likely_last[0:3]
    
def get_district(contest):
    district_string = contest.split("-:-")[0]
    district_string = district_string.strip()
    likely = district_string.split(" ")[-1]
    if 'State Representative' in contest:
        level = 3
    else:
        level = 2

    return likely.zfill(level)

contest_name_change_dict_second = {}

duplicate_value_list = []
for contest in pivoted_alt.columns:
    if contest != "CountyName":
        print(contest)
        print("E-type", get_election_type(contest))
        print("Race", get_race(contest))
        print("Party", get_party(contest))
        print("Name", get_name(contest))
        if "United States Representative" in contest or 'State Representative' in contest or 'State Senator' in contest:
            value = get_election_type(contest) + get_race(contest) + get_district(contest) + get_party(contest) + get_name(contest)
        else:
            value = get_election_type(contest) + "22" + get_race(contest) + get_party(contest) + get_name(contest)
        if value in contest_name_change_dict_second.values():
            duplicate_value_list.append(value)
        print(len(value))
        contest_name_change_dict_second[contest] = value 

if len(duplicate_value_list) > 0:
    print("DUPLICATE VALUES")
    for val in [(k,v) for k, v in contest_name_change_dict_second.items() if v in duplicate_value_list]:
        print(val)

Abolishing the Constitution Revision Commission-:-No for Rejection-:-NOP
E-type G
NO CONTEST Abolishing the Constitution Revision Commission-:-No for Rejection-:-NOP
Race 
Party N
Name NO
NO CONTEST Abolishing the Constitution Revision Commission-:-No for Rejection-:-NOP
6
Abolishing the Constitution Revision Commission-:-Yes for Approval-:-NOP
E-type G
NO CONTEST Abolishing the Constitution Revision Commission-:-Yes for Approval-:-NOP
Race 
Party N
Name YES
NO CONTEST Abolishing the Constitution Revision Commission-:-Yes for Approval-:-NOP
7
Additional Homestead Property Tax Exemption for Specified Critical Public Services Workforce-:-No for Rejection-:-NOP
E-type G
NO CONTEST Additional Homestead Property Tax Exemption for Specified Critical Public Services Workforce-:-No for Rejection-:-NOP
Race 
Party N
Name NO
NO CONTEST Additional Homestead Property Tax Exemption for Specified Critical Public Services Workforce-:-No for Rejection-:-NOP
6
Additional Homestead Property Tax Exemptio

In [64]:
# Deal with edge cases
contest_name_change_dict_second['Abolishing the Constitution Revision Commission-:-No for Rejection-:-NOP']='G22A2NO'
contest_name_change_dict_second['Abolishing the Constitution Revision Commission-:-Yes for Approval-:-NOP']='G22A2YES'
contest_name_change_dict_second['Additional Homestead Property Tax Exemption for Specified Critical Public Services Workforce-:-No for Rejection-:-NOP']= 'G22A3NO'
contest_name_change_dict_second['Additional Homestead Property Tax Exemption for Specified Critical Public Services Workforce-:-Yes for Approval-:-NOP']= 'G22A3YES'
contest_name_change_dict_second['Limitation on Assessment of Real Property Used for Residential Purposes-:-No for Rejection-:-NOP']= 'G22A1NO'
contest_name_change_dict_second['Limitation on Assessment of Real Property Used for Residential Purposes-:-Yes for Approval-:-NOP']= 'G22A1YES'
contest_name_change_dict_second['Shall Justice Charles T. Canady be retained in Office?-:-No-:-NOP']= 'G22SCJNCAN'
contest_name_change_dict_second['Shall Justice Charles T. Canady be retained in Office?-:-Yes-:-NOP']= 'G22SCJYCAN'
contest_name_change_dict_second['Shall Justice Jamie Grosshans be retained in Office?-:-No-:-NOP']= 'G22SCJNGRO'
contest_name_change_dict_second['Shall Justice Jamie Grosshans be retained in Office?-:-Yes-:-NOP']= 'G22SCJYGRO'
contest_name_change_dict_second['Shall Justice John D. Couriel be retained in Office?-:-No-:-NOP']= 'G22SCJNCOU'
contest_name_change_dict_second['Shall Justice John D. Couriel be retained in Office?-:-Yes-:-NOP']= 'G22SCJYCOU'
contest_name_change_dict_second['Shall Justice Jorge Labarga be retained in Office?-:-No-:-NOP']= 'G22SCJNLAB'
contest_name_change_dict_second['Shall Justice Jorge Labarga be retained in Office?-:-Yes-:-NOP']= 'G22SCJYLAB'
contest_name_change_dict_second['Shall Justice Ricky Polston be retained in Office?-:-No-:-NOP']= 'G22SCJNPOL'
contest_name_change_dict_second['Shall Justice Ricky Polston be retained in Office?-:-Yes-:-NOP']= 'G22SCJYPOL'
contest_name_change_dict_second['State Representative 46-:-De La Torre, Christian-:-REP'] = 'GSL046RDEL' 

In [66]:
pivoted_alt.rename(columns = contest_name_change_dict_second, inplace = True)
contest_name_change_dict_second_rev = {v:k for k,v in contest_name_change_dict_second.items()}

# Compare Statewide Totals

In [69]:
{i:contest_name_change_dict_second_rev[i] for i in list(pivoted_alt.columns) if i not in list(pivoted_2022.columns) and i !="CountyName"}

{'GSL015OSTE': 'State Representative 15-:-Steckloff, Jerry-:-WRI',
 'GSL016OHAR': 'State Representative 16-:-Hartley, Richard-:-WRI',
 'GSL016OMOO': 'State Representative 16-:-Moore, Harley-:-WRI',
 'GSL003OMAD': 'State Representative 3-:-Maddox, Sandra-:-WRI',
 'GSL030OBAK': 'State Representative 30-:-Baker, Vic-:-WRI',
 'GSU33OVAL': 'State Senator 33-:-Valenta, Robert-:-WRI',
 'GSU05OCOO': 'State Senator 5-:-Cooper, Patrick-:-WRI',
 'GCON12OSMI': 'United States Representative 12-:-Smith, Charles-:-WRI',
 'GCON13OCUR': 'United States Representative 13-:-Curnow, Jacob-:-WRI',
 'GCON13OYOU': 'United States Representative 13-:-Young, Dwight-:-WRI',
 'GCON16OHAR': 'United States Representative 16-:-Hartman, Ralph-:-WRI',
 'GCON18OSER': 'United States Representative 18-:-Serratore, Leonard-:-WRI',
 'GCON19OPOS': 'United States Representative 19-:-Post, Patrick-:-WRI',
 'GCON28OSCH': 'United States Representative 28-:-Schaffer, Jeremiah-:-WRI',
 'GCON04OKON': 'United States Representative 4

In [70]:
ignore_list = ['UNIQUE_ID',
 'County Code (Three-character abbreviation)',
 'County Name','COUNTYFP','pct_std', 'County Code (Three-character abbreviation)', 'County Name',
       'Unique Precinct Identifier', 'Precinct Polling Location']

In [71]:
{i:contest_name_change_dict_rev[i] for i in list(pivoted_2022.columns) if i not in list(pivoted_alt.columns) and i not in ignore_list}

{'G22CFOOWRI': 'Chief Financial Officer-:-WriteinVotes-:-PARTY:WRI',
 'G22GOVOWRI': 'Governor and Lieutenant Governor-:-WriteinVotes-:-PARTY:WRI',
 'GCON12OWRI': 'Representative in Congress-:-WriteinVotes-:-PARTY:WRI-:- District 12',
 'GCON13OWRI': 'Representative in Congress-:-WriteinVotes-:-PARTY:WRI-:- District 13',
 'GCON16OWRI': 'Representative in Congress-:-WriteinVotes-:-PARTY:WRI-:- District 16',
 'GCON18OWRI': 'Representative in Congress-:-WriteinVotes-:-PARTY:WRI-:- District 18',
 'GCON19OWRI': 'Representative in Congress-:-WriteinVotes-:-PARTY:WRI-:- District 19',
 'GCON28OWRI': 'Representative in Congress-:-WriteinVotes-:-PARTY:WRI-:- District 28',
 'GCON04OWRI': 'Representative in Congress-:-WriteinVotes-:-PARTY:WRI-:- District 4',
 'GCON07OWRI': 'Representative in Congress-:-WriteinVotes-:-PARTY:WRI-:- District 7',
 'GSL015OWRI': 'State Representative-:-WriteinVotes-:-PARTY:WRI-:- District 15',
 'GSL016OWRI': 'State Representative-:-WriteinVotes-:-PARTY:WRI-:- District 16

In [72]:
# Define a list of columns that appear in both dataframes
shared_cols = [i for i in list(pivoted_2022.columns) if i in list(pivoted_alt.columns)]

In [73]:
unshared_cols = [i for i in list(pivoted_2022.columns) if i not in list(pivoted_alt.columns)]

In [76]:
pdv.statewide_totals_check(pivoted_2022, "Precinct-Level", pivoted_alt, "County-Level", shared_cols)

***Statewide Totals Check***
G22A1NO has a difference of -1.0 votes
	Precinct-Level: 2997124 votes
	County-Level: 2997125.0 votes
G22A1YES has a difference of -1.0 votes
	Precinct-Level: 4015948 votes
	County-Level: 4015949.0 votes
G22A2NO is equal 	 both dataframes 3206717
G22A2YES has a difference of -1.0 votes
	Precinct-Level: 3744872 votes
	County-Level: 3744873.0 votes
G22A3NO has a difference of 1.0 votes
	Precinct-Level: 2968687 votes
	County-Level: 2968686.0 votes
G22A3YES has a difference of -4.0 votes
	Precinct-Level: 4215538 votes
	County-Level: 4215542.0 votes
G22ATGDAYA has a difference of 1.0 votes
	Precinct-Level: 3025944 votes
	County-Level: 3025943.0 votes
G22ATGRMOO is equal 	 both dataframes 4651279
G22CFODHAT is equal 	 both dataframes 3085697
G22CFORPAT is equal 	 both dataframes 4528811
G22COADBLE is equal 	 both dataframes 3095786
G22COARSIM has a difference of -1.0 votes
	Precinct-Level: 4510643 votes
	County-Level: 4510644.0 votes
G22GOVDCRI is equal 	 both dat

In [77]:
# Minor renaming for the county-level check
pivoted_alt.rename(columns = {"CountyName":"County Name"}, inplace = True)

In [78]:
pdv.county_totals_check(pivoted_2022, "Precinct-Level", pivoted_alt, "County-Level", shared_cols, "County Name", method = "county")

***Countywide Totals Check***

Miami-Dade contains differences in these races:
	GSL106RBAS has a difference of 2.0 vote(s)
		Precinct-Level: 26416 vote(s)
		County-Level: 26414.0 vote(s)
	GSL106DLEO has a difference of 2.0 vote(s)
		Precinct-Level: 26176 vote(s)
		County-Level: 26174.0 vote(s)


  holder_1 = partner_df.groupby(county_col).sum()


Seminole contains differences in these races:
	G22A1NO has a difference of -1.0 vote(s)
		Precinct-Level: 79274 vote(s)
		County-Level: 79275.0 vote(s)
	G22A1YES has a difference of -1.0 vote(s)
		Precinct-Level: 86117 vote(s)
		County-Level: 86118.0 vote(s)
	G22A2YES has a difference of -1.0 vote(s)
		Precinct-Level: 80874 vote(s)
		County-Level: 80875.0 vote(s)
	G22A3NO has a difference of 1.0 vote(s)
		Precinct-Level: 65155 vote(s)
		County-Level: 65154.0 vote(s)
	G22A3YES has a difference of -4.0 vote(s)
		Precinct-Level: 104623 vote(s)
		County-Level: 104627.0 vote(s)
	G22ATGDAYA has a difference of 1.0 vote(s)
		Precinct-Level: 76193 vote(s)
		County-Level: 76192.0 vote(s)
	G22COARSIM has a difference of -1.0 vote(s)
		Precinct-Level: 99282 vote(s)
		County-Level: 99283.0 vote(s)
	G22GOVRDES has a difference of -1.0 vote(s)
		Precinct-Level: 102190 vote(s)
		County-Level: 102191.0 vote(s)
	GCON07RMIL has a difference of -1.0 vote(s)
		Precinct-Level: 98275 vote(s)
		County-Level:

# See If We Can Deduce the OWRI candidates

The county-level data source did not have a catch-all "Other/Write-In" category for these votes, but it did have votes for particular candidates in these races. This is an attempt to see whether these votes are equal. It turns out they are not

In [101]:
check_write_ins = []

pivoted_alt["G22USSOWRI"] = pivoted_alt['G22USSOEKP'] + pivoted_alt['G22USSOGRA'] + pivoted_alt['G22USSOKNE'] + pivoted_alt['G22USSOQUI']
pivoted_alt['GSL016OWRI'] = pivoted_alt['GSL016OHAR'] + pivoted_alt['GSL016OMOO']

for name in ['GSL015OSTE',
 'GSL003OMAD',
 'GSL030OBAK',
 'GSU33OVAL',
 'GSU05OCOO',
 'GCON12OSMI',
 'GCON13OCUR',
 'GCON13OYOU',
 'GCON16OHAR',
 'GCON18OSER',
 'GCON19OPOS',
 'GCON28OSCH',
 'GCON04OKON',
 'GCON07OPOM']:
    check_write_ins.append(name[:-4]+"OWRI")
    pivoted_alt[name[:-4]+"OWRI"] = pivoted_alt[name]

In [80]:
pdv.statewide_totals_check(pivoted_2022, "Precinct-Level", pivoted_alt, "County-Level", check_write_ins)

***Statewide Totals Check***
GSL015OWRI has a difference of 4019.0 votes
	Precinct-Level: 4752 votes
	County-Level: 733.0 votes
GSL003OWRI has a difference of 1970.0 votes
	Precinct-Level: 2173 votes
	County-Level: 203.0 votes
GSL030OWRI has a difference of 3466.0 votes
	Precinct-Level: 3591 votes
	County-Level: 125.0 votes
GSU33OWRI has a difference of 5548.0 votes
	Precinct-Level: 7241 votes
	County-Level: 1693.0 votes
GSU05OWRI has a difference of 450.0 votes
	Precinct-Level: 450 votes
	County-Level: 0.0 votes
GCON12OWRI has a difference of 437.0 votes
	Precinct-Level: 441 votes
	County-Level: 4.0 votes
GCON13OWRI has a difference of 320.0 votes
	Precinct-Level: 337 votes
	County-Level: 17.0 votes
GCON13OWRI has a difference of 320.0 votes
	Precinct-Level: 337 votes
	County-Level: 17.0 votes
GCON16OWRI has a difference of 337.0 votes
	Precinct-Level: 358 votes
	County-Level: 21.0 votes
GCON18OWRI has a difference of 1152.0 votes
	Precinct-Level: 1310 votes
	County-Level: 158.0 votes

In [81]:
pdv.county_totals_check(pivoted_2022, "Precinct-Level", pivoted_alt, "County-Level", check_write_ins, "County Name", method = "county")

***Countywide Totals Check***

Brevard contains differences in these races:
	GSL030OWRI has a difference of 1090.0 vote(s)
		Precinct-Level: 1206 vote(s)
		County-Level: 116.0 vote(s)
Citrus contains differences in these races:
	GCON12OWRI has a difference of 69.0 vote(s)
		Precinct-Level: 70 vote(s)
		County-Level: 1.0 vote(s)
Clay contains differences in these races:
	GCON04OWRI has a difference of 97.0 vote(s)
		Precinct-Level: 101 vote(s)
		County-Level: 4.0 vote(s)
Collier contains differences in these races:
	GCON18OWRI has a difference of 5.0 vote(s)
		Precinct-Level: 5 vote(s)
		County-Level: 0.0 vote(s)
	GCON19OWRI has a difference of 68.0 vote(s)
		Precinct-Level: 73 vote(s)
		County-Level: 5.0 vote(s)
Miami-Dade contains differences in these races:
	GCON28OWRI has a difference of 266.0 vote(s)
		Precinct-Level: 288 vote(s)
		County-Level: 22.0 vote(s)
Desoto contains differences in these races:
	GCON18OWRI has a difference of 18.0 vote(s)
		Precinct-Level: 21 vote(s)
		Count

  holder_1 = partner_df.groupby(county_col).sum()


# Finalize File Order and Export

In [83]:
pivoted_2022.rename(columns = {'Unique Precinct Identifier':'PREC_ID', 
                               'County Name':'CNTY_NAME',
                              'County Code (Three-character abbreviation)':'CNTY_CODE',
                              'Precinct Polling Location':'POLL_LOC'}, inplace = True)

In [85]:
pivoted_2022 = pivoted_2022[['UNIQUE_ID', 'COUNTYFP', 'CNTY_CODE', 'CNTY_NAME', 'PREC_ID',
       'POLL_LOC']+list(sorted_contest_name_change_dict.values())]

In [87]:
pivoted_2022["UNIQUE_ID"] = np.where(pivoted_2022["UNIQUE_ID"]=="WAS-0001", pivoted_2022["UNIQUE_ID"]+"-"+pivoted_2022["POLL_LOC"],pivoted_2022["UNIQUE_ID"])

In [97]:
pivoted_2022.loc[pivoted_2022["POLL_LOC"]=="nan","POLL_LOC"] = "No Location Provided"

In [99]:
if not os.path.exists("./fl_2022_gen_prec/"):
    os.mkdir("./fl_2022_gen_prec/")

pivoted_2022.to_csv("./fl_2022_gen_prec/fl_2022_gen_prec.csv", index = False)