In [1]:
import os
import io
import pandas as pd
pd.set_option("display.max_columns", None)
import geopandas as gp
import boto3
import maup
maup.progress.enabled = True
from s3_paths import *
from shapely.geometry import MultiPolygon, Polygon
from shapely.ops import unary_union
import warnings
warnings.filterwarnings('ignore', 'GeoSeries.isna', UserWarning)


#Function to adjust Census VAP to account for incarcerated pop by subtracting and then assign 0 to any negative values
def modify_P0040001_P0050003(block_gdf):
    block_gdf['P0040001-P0050003'] = block_gdf['P0040001'] - block_gdf['P0050003']
    print("Error raised iff the number of incarcerated blocks with non-zero population does NOT equal the number of modified vap blocks.")
    assert (len(block_gdf[block_gdf['P0050003']!=0]) - len(block_gdf[block_gdf['P0040001']!=block_gdf['P0040001-P0050003']])) == 0
    block_gdf.loc[block_gdf['P0040001-P0050003']<0, 'P0040001-P0050003'] = 0

    return block_gdf


#Functions for running maup
def check_valid_rows(block_gdf, precinct_gdf):
    prec_geom = precinct_gdf.geometry
    valid_rows = precinct_gdf[~(prec_geom.isna() | prec_geom.is_empty)]
    print('valid precinct rows: ', valid_rows.shape)

    block_geom = block_gdf.geometry
    block_valid_rows = block_gdf[~(block_geom.isna() | block_geom.is_empty)]
    print('valid block rows: ', block_valid_rows.shape)
    

def fix_buffer(gdf):
    """
    return (GeoDataFrame) with the 'buffer(0) trick' applied
    :gdf: (GeoDataFrame) object
    Can be useful when trying to mitigate 'self-intersection' issues
    """
    buffered = gdf.buffer(0)
    gdf.drop(columns=["geometry"])
    # gdf['geometry'] = gdf.apply(lambda x: x.geometry.buffer(0), axis=1)
    gdf["geometry"] = buffered
    return gdf


def maup_assignment_series(block_gdf, precinct_gdf):
    block_gdf = block_gdf.to_crs(precinct_gdf.crs)
    print('block_gdf shape: ', block_gdf.shape, '\n precinct_gdf shape: ', precinct_gdf.shape)

    block_gdf["maup_assignment"] = maup.assign(
        fix_buffer(block_gdf), fix_buffer(precinct_gdf)).fillna(0) #In case of OH, na are 0 pop
    print(block_gdf[block_gdf['maup_assignment'].isna()].shape[0], " null assignments in maup series")
    return block_gdf


def maup_pre_vote_setup(block_gdf, precinct_gdf, PRECID_block):
    block_gdf = block_gdf.to_crs(precinct_gdf.crs)
    bgdf = maup_assignment_series(block_gdf, precinct_gdf)
    #assert bgdf['maup_assignment'].isna().any() == False  #-- Cut out for OH because all but two are 0 pop
    bgdf[PRECID_block] = bgdf['maup_assignment'].map(lambda idx: str(precinct_gdf.loc[idx]["UNIQUE_ID"]))
    
    return bgdf

    
def maup_assign_labels(block_gdf, precinct_gdf, precinctid_input, distid_input):
    #Assign precinct IDs to block file using assign series
    block_gdf[precinctid_input] = block_gdf["maup_assignment"].map(
        lambda idx: str(precinct_gdf.loc[idx]["UNIQUE_ID"])
    )
    if "CONG_DIST" in precinct_gdf.columns:
        block_gdf[distid_input] = block_gdf["maup_assignment"].map(
            lambda idx: str(precinct_gdf.loc[idx]["CONG_DIST"])
        )
    if "SLDL_DIST" in precinct_gdf.columns:
        block_gdf[distid_input] = block_gdf["maup_assignment"].map(
            lambda idx: str(precinct_gdf.loc[idx]["SLDL_DIST"])
        )
    if "SLDU_DIST" in precinct_gdf.columns:
        block_gdf[distid_input] = block_gdf["maup_assignment"].map(
            lambda idx: str(precinct_gdf.loc[idx]["SLDU_DIST"])
        )
    print("Label assignment complete")

    return block_gdf


def election_cols(gdf):
    election_cols = list(gdf.columns[gdf.columns.str.startswith('G')])
    return election_cols 


def assign_votes(variables, election_columns, precinct_gdf, block_gdf):
    precinct_gdf[variables] = block_gdf[variables].groupby(block_gdf["maup_assignment"]).sum()
    print(variables, ' added to precinct_gdf based on maup assignment')
    bl_to_prec_weights = block_gdf[variables] / block_gdf["maup_assignment"].map(precinct_gdf[variables])
    block_votes = block_gdf.copy(deep=True)
    block_votes[election_columns] = maup.prorate(
        block_gdf["maup_assignment"], precinct_gdf[election_columns], bl_to_prec_weights
    )
    return block_votes


def run_maup(block_gdf, precinct_gdf, precinctid_input, pop_variable):
    #Assign identifiers
    block_gdf = maup_assignment_series(block_gdf, precinct_gdf)
    block_gdf = maup_assign_labels(block_gdf, precinct_gdf, precinctid_input)

    #Assign votes
    election_columns = election_cols(precinct_gdf)
    block_votes = assign_votes(pop_variable, election_columns, precinct_gdf, block_gdf)
    
    return block_votes   


def column_total_check(election_columns, block_gdf, precinct_gdf):
    mismatch_list = []
    for val in election_columns:
        vote_dif = block_gdf[val].sum()-precinct_gdf[val].sum()
        if (abs(vote_dif) <=1e-1):
            #print(val+": EQUAL", ' - total: ', 'block:', str(block_gdf[val].sum()), 'prec:', str(precinct_gdf[val].sum()), 'diff:', block_gdf[val].sum()-precinct_gdf[val].sum())
            continue
        else:
            mismatch_list.append(val)
            print(val+": DIFFERENCE OF " + str(vote_dif)+ " VOTES", ' - block total: ', str(block_gdf[val].sum()), ', precinct total: ', str(precinct_gdf[val].sum()))  
    print("Mismatch list: ", mismatch_list)
            
def precinct_sum_check(prec_gdf, block_gdf, blk_prec_id):
    prec_gpbyprec = prec_gdf.groupby(['UNIQUE_ID']).sum()
    blk_gpbyprec = block_gdf.groupby([blk_prec_id]).sum()
    prec_w_difs_list = []
    
    prec_gpbyprec.sort_values(by='UNIQUE_ID',inplace=True)
    prec_gpbyprec.reset_index(inplace = True)
    blk_gpbyprec.sort_values(by= blk_prec_id, inplace=True)
    blk_gpbyprec.reset_index(inplace = True)
    
    for i in range(0, prec_gdf['UNIQUE_ID'].nunique()):
        for col in election_cols(prec_gdf):
            prec_diff = round(blk_gpbyprec[col].iloc[i], 1) - round(prec_gpbyprec[col].iloc[i], 1)
            if (prec_diff!=0):
                prec_w_difs_list.append(prec_gpbyprec.index[i])
                print("prec id: ", blk_gpbyprec.index[i], prec_gpbyprec.index[i], col, "block: ", round(blk_gpbyprec[col].iloc[i],1), 
                      "prec: ", round(prec_gpbyprec[col].iloc[i],1), "diff: ", prec_diff)
    print("precs to check: ", set(prec_w_difs_list))
    print("Precinct check complete!")



In [2]:
##Step 0: Load in relevant gdfs 
s3_client = boto3.client('s3')
census_block_gdf = gp.read_file(f'zip+{census_block}')[['GEOID20', 'COUNTYFP20', 'NAME20','P0010001', 'P0020001', 'P0040001', 'P0050003', 'geometry']]
census_block_gdf = modify_P0040001_P0050003(census_block_gdf)
oh_blocks = census_block_gdf.copy()

Error raised iff the number of incarcerated blocks with non-zero population does NOT equal the number of modified vap blocks.


In [3]:
#Step 0: Load in relevant gdfs 
pber_st = gp.read_file("../../pber_collection/OH/oh_2022_gen_prec_shp/oh_2022_gen_prec_st.shp")

In [4]:
bprec = maup_pre_vote_setup(oh_blocks, pber_st, "PREC")
bprec_votes = assign_votes("P0040001-P0050003", election_cols(pber_st), pber_st, bprec)

block_gdf shape:  (276428, 9) 
 precinct_gdf shape:  (8941, 26)


100%|██████████| 8941/8941 [00:50<00:00, 176.09it/s]
100%|██████████| 8941/8941 [01:49<00:00, 81.62it/s] 

  geometries = geometries[geometries.area > area_cutoff]

  return assign_to_max(intersections(sources, targets, area_cutoff=0).area)


0  null assignments in maup series
P0040001-P0050003  added to precinct_gdf based on maup assignment


In [5]:
election_col_list = ['G22ATGDCRO','G22ATGRYOS',
 'G22AUDDSAP',
 'G22AUDRFAB',
 'G22CJUDBRU',
 'G22CJURKEN',
 'G22GOVDWHA',
 'G22GOVRDEW',
 'G22JUSDJAM',
 'G22JUSDZAY',
 'G22JUSRDEW',
 'G22JUSRFIS',
 'G22SOSDCLA',
 'G22SOSOMAR',
 'G22SOSRLAR',
 'G22TREDSCH',
 'G22TRERSPR',
 'G22USSDRYA',
 'G22USSRVAN']
column_total_check(election_col_list, bprec_votes, pber_st)

Mismatch list:  []


In [6]:
precinct_sum_check(pber_st[pber_st.columns[pber_st.columns!="geometry"]], bprec_votes[bprec_votes.columns[bprec_votes.columns!="geometry"]], "PREC")

precs to check:  set()
Precinct check complete!


In [7]:
bprec_votes["VAP_MOD"] = bprec_votes["P0040001-P0050003"]
bprec_votes["PRECINCTID"] = bprec_votes["PREC"]
bprec_votes_export = bprec_votes[['GEOID20', 'NAME20','COUNTYFP20', 'PRECINCTID', 'VAP_MOD',
       'G22ATGDCRO', 'G22ATGRYOS', 'G22AUDDSAP', 'G22AUDRFAB', 'G22CJUDBRU',
       'G22CJURKEN', 'G22GOVDWHA', 'G22GOVRDEW', 'G22JUSDJAM', 'G22JUSDZAY',
       'G22JUSRDEW', 'G22JUSRFIS', 'G22SOSDCLA', 'G22SOSOMAR', 'G22SOSRLAR',
       'G22TREDSCH', 'G22TRERSPR', 'G22USSDRYA', 'G22USSRVAN','geometry']].round(2).fillna(0)

In [8]:
bprec_votes_export.to_file("./oh_2022_gen_2020_blocks/oh_2022_gen_2020_blocks.shp")

In [9]:
load_check = gp.read_file("./oh_2022_gen_2020_blocks/oh_2022_gen_2020_blocks.shp")

In [None]:
load_check.isna().any()