In [1]:
import pandas as pd
import geopandas as gp
import numpy as np

# Georiga 2018 to 2020 Precinct Crosswalks

Using VAP mod (total population minus adult incarcerated population) to generate ratios where needed


## Background:
- Received a request for Georiga precinct crosswalks for any years we had available from 2018 onwards
- As of November 16, 2022, we only have Georgia precinct data available for 2018 and 2020, so these are the two years we used

## Approach:
- Download block-level disaggregated 2018 and 2020 Georgia precinct elections data
- Groupby 2018 precinct and track the different 2020 precinct assignments for all the blocks assigned to a given 2018 precinct.
- In cases where there are multiple 2020 precinct assignments for blocks with the same 2018 precinct assignment, create a ratio using VAP mod (total population minus adult incarcerated population)
- Remove instances where VAP mod ratio is zero.
- Export to the users desired format of a three-column dataframe with the following columns:
    - Precinct ID 2018
    - Precinct ID 2020
    - Population ratio
- Note: Precinct ID 2018 column was cleaned

## Links to datasets used:
[2020 General Election Results Disaggregated to 2020 Census Blocks](https://redistrictingdatahub.org/dataset/georgia-2020-general-election-results-disaggregated-to-the-2020-block/)

[2018 General Election Results Disaggregated to 2020 Census Blocks](https://redistrictingdatahub.org/dataset/georgia-2018-general-election-results-disaggregated-to-the-2020-block/)

[VEST 2018 Precinct Boundaries and Election Results](https://redistrictingdatahub.org/dataset/vest-2018-georgia-precinct-and-election-results/)  
    - Note, this was used to clean the PRECINCTID column in the 2018 disaggregated file


For a full 'raw-from-source' file, contact info@redistrictingdatahub.org


In [2]:
# Load VEST file
vest_ga_18 = gp.read_file("./raw-from-source/ga_vest_18/ga_vest_18.shp")

# Load in the disaggregated files
ga_20_block = gp.read_file("./raw-from-source/ga_2020_gen_2020_blocks/ga_2020_gen_2020_blocks.shp")
ga_18_block = gp.read_file("./raw-from-source/ga_2018_gen_2020_blocks/ga_2018_gen_2020_blocks.shp")

In [3]:
# Get the duplicate "DISTRICT" values for VEST's 2018 file
ser = vest_ga_18["DISTRICT"].value_counts()
dup_ids_list = ser[ser>1].index

# Take a look, noticing that PRECINCT_N is unique across these
vest_ga_18[vest_ga_18["DISTRICT"].isin(dup_ids_list)]

# Define a new UNIQUE_ID column, where the value 
vest_ga_18["UNIQUE_ID"] = np.where(vest_ga_18["DISTRICT"].isin(dup_ids_list), vest_ga_18["DISTRICT"]+"-"+vest_ga_18["PRECINCT_N"], vest_ga_18["DISTRICT"])

# Print out the value counts for this UNIQUE_ID column
vest_ga_18["UNIQUE_ID"].value_counts(dropna=False)

# Define a column to match what the PRECINCTID in the disaggregated file is
vest_ga_18["OTHER_ID"] = vest_ga_18["DISTRICT"] +" - "+ vest_ga_18["DISTRICT"] + " - " + vest_ga_18.index.astype(str)

In [4]:
[i for i in vest_ga_18["OTHER_ID"].unique() if i not in list(ga_18_block["PRECINCTID"].unique())]

['215FTBEN3 - 215FTBEN3 - 1482',
 '215FTBEN2 - 215FTBEN2 - 1581',
 '215FTBEN4 - 215FTBEN4 - 1582',
 '215FTBEN1 - 215FTBEN1 - 1584',
 '215ZZZZZZ - 215ZZZZZZ - 1589',
 '051XFTPU - 051XFTPU - 1712',
 '029FTSTEW - 029FTSTEW - 1795']

In [5]:
# The way the column was defined above doesn't exactly 
other_id_updates = {'215FTBEN3 - 215FTBEN3 - 1482':'215FTBEN3 - None - 1482',
 '215FTBEN2 - 215FTBEN2 - 1581':'215FTBEN2 - None - 1581',
 '215FTBEN4 - 215FTBEN4 - 1582':'215FTBEN4 - None - 1582',
 '215FTBEN1 - 215FTBEN1 - 1584':'215FTBEN1 - None - 1584',
 '215ZZZZZZ - 215ZZZZZZ - 1589':'215ZZZZZZ - None - 1589',
 '051XFTPU - 051XFTPU - 1712':'051XFTPU - None - 1712',
 '029FTSTEW - 029FTSTEW - 1795':'029FTSTEW - None - 1795'}

# Update the "OTHER_ID" column
vest_ga_18["OTHER_ID"] = vest_ga_18["OTHER_ID"].map(other_id_updates).fillna(vest_ga_18["OTHER_ID"])

# Create a dictionary to map from the old name to the new one
id_update_dict = dict(zip(vest_ga_18["OTHER_ID"], vest_ga_18["UNIQUE_ID"]))

In [None]:
# Join the 2018 and 2020 block-level files
combined = pd.merge(ga_18_block, ga_20_block, how = "outer", on = "GEOID20", indicator = True)

# Create two different aggregations

# This creates sets following an aggregation by 2018 precinct
agg_test_set = combined.groupby(["PRECINCTID_x"]).agg(set)
agg_test_set.reset_index(inplace = True, drop = False)

# This creates a sum following an aggregation by 2018 precinct
agg_test_sum_x = combined.groupby(["PRECINCTID_x"]).agg(sum)
agg_test_sum_x.reset_index(inplace = True, drop = False)

In [None]:
# Flag instances where there is more than one 2020 precinct associated with a 2018 precinct
agg_test_set["2020_prec_split"] = agg_test_set["PRECINCTID_y"].apply(lambda x: len(x) > 1, True, False)

agg_test_set[agg_test_set["2020_prec_split"]==False][["PRECINCTID_x","PRECINCTID_y"]]

agg_test_set[agg_test_set["2020_prec_split"]==True][["PRECINCTID_x","PRECINCTID_y"]]

# Split the dataframe into 2018 precincts that match perfectly into 2020 precincts and those that don't
splits = agg_test_set[agg_test_set["2020_prec_split"]==True]
no_splits = agg_test_set[agg_test_set["2020_prec_split"]==False]

In [None]:
def create_ratios(prec_early, prec_later_set, precision = 3):
    '''
    Function to create a population ratio using VAP mod
    '''
    
    # Returning a dictionary
    ratio_dict = {}
    
    # The denominator is the total VAP_MOD for that 2018 precinct
    denom = agg_test_sum_x.loc[agg_test_sum_x["PRECINCTID_x"]==prec_early,"VAP_MOD_x"].values[0]
    
    # Confirm that the denominator equals the equivalent value in the combined dataframe
    denom_check = sum(combined.loc[(combined["PRECINCTID_y"].isin(prec_later_set))&(combined["PRECINCTID_x"]==prec_early),"VAP_MOD_x"].values)
    
    assert(denom == denom_check)
    
    # Iterate over the later precincts that it is split by
    for val in prec_later_set:
        
        # Get the VAP mod for the blocks that are mapped to that later precinct
        num = sum(combined.loc[(combined["PRECINCTID_y"]==val)&(combined["PRECINCTID_x"]==prec_early),"VAP_MOD_x"].values)
        
        # if the VAP mod is not zero, create a function, rounded to three decimal places
        if num != 0:
            ratio_dict[val] = round(num/denom, precision)
    
    # Return the dictionary
    return ratio_dict

In [None]:
# When there aren't splits, just create a ratio of 1 (faster than calling the function)
no_splits["crosswalk_dict"] = no_splits.apply(lambda x: {list(x["PRECINCTID_y"])[0]:1}, axis = 1)

# Where there are splits, call the above function
splits["crosswalk_dict"] = splits.apply(lambda x: create_ratios(x["PRECINCTID_x"], x["PRECINCTID_y"]), axis = 1)

# Combine the two together
recombined = pd.concat([splits, no_splits])

In [None]:
# Rename the columns
recombined.rename(columns = {"PRECINCTID_x":"2018 Precinct ID"}, inplace = True)
recombined.rename(columns = {"crosswalk_dict":"Ratios of 2020 Precincts"}, inplace = True)
recombined.reset_index(drop = True, inplace = True)

# Drop "N/A" values
recombined = recombined[recombined["2018 Precinct ID"]!="N/A"]

In [None]:
recombined[["2018 Precinct ID", "Ratios of 2020 Precincts"]]

In [None]:
# Reformat so that each row is a precinct relationship with a population value
new_df_dict = {}

new_df_dict["Precinct ID 2018"] = []
new_df_dict["Precinct ID 2020"] = []
new_df_dict["Population ratio"] = []

for index, val in recombined[["2018 Precinct ID", "Ratios of 2020 Precincts"]].iterrows():
    prec_2018 = val["2018 Precinct ID"]
    for prec_2020 in val["Ratios of 2020 Precincts"]:
        new_df_dict["Precinct ID 2018"].append(prec_2018)
        new_df_dict["Precinct ID 2020"].append(prec_2020)   
        new_df_dict["Population ratio"].append(val["Ratios of 2020 Precincts"][prec_2020])  

df = pd.DataFrame(new_df_dict)
df["Precinct ID 2018"] = df["Precinct ID 2018"].map(id_update_dict).fillna("NO VALUE")
df[df["Precinct ID 2018"]=="NO VALUE"]

In [None]:
# Take a look
df

# Sanity check, population ratio should sum to 2658 (the number of precincts)
sum(df["Population ratio"])

In [None]:
# Export to csv
df.to_csv("./Georgia_2018_2020_Precinct_Crosswalk.csv", index = False)