I've downloaded a bunch of yearly ACS data from the IPUMS NHGIS, but since this is only a 5 year average of a sample survey, the smallest geographic unit reported at is the block group, not the block. 

However, blocks nest nicely into block groups, so I should be able to disaggregate from block groups to blocks using maup and prorating by total population, which seems like a reasonable assumption to make.

* `ADK5E001` = `pop`: Total population
* `ADK5E001` = `TotPop`: Total population (again)
* `ADK5E004` = `BlackPop`: Total Black population
* `ADK5E012` = `HispPop`: Total Hispanic Population

In [10]:
import geopandas
import numpy as np
import maup
import warnings

maup.progress.enabled = True

# Create block group level shp file with population and vap data
# Block group shp is constant throughout decade, so that's not passed in
# vap_f_name = str, file name of zipped csv with VAP by race data (downloaded from Census)
# out_f_name = str, path where to write shp file, must be .shp
def blck_grp_merge_vap(vap_f_name, out_f_name):
    # read in blocks shapefile, stays same every year
    blck_grp = geopandas.read_file("zip://C:/Users/madie/OneDrive/data/ipums/VA_blck_grp_2019.zip")
    # keep only the useful cols
    blck_grp = blck_grp[["GEOID", "GISJOIN", "geometry"]].copy()
    # read in population data csv
    data = pd.read_csv("C:/Users/madie/OneDrive/data/ipums/VA_blck_grp_2015_pop.zip")
    # keep only the relevant columns for total Black and Hispanic population (and vap)
    data = data[["GISJOIN", "ADK5E001", "ADK5E001", "ADK5E004", "ADK5E012"]].copy()
    # rename these cols to something more intelligible
    data.columns = ["GISJOIN", "pop", "TotPop", "BlackPop", "HispPop"]
    # merge the population data into the blocks shapefile
    blck_grp = blck_grp.merge(data, on='GISJOIN')

    #read in csv file with vap data
    vap = pd.read_csv(vap_f_name, encoding="latin1")
    # split up GEONAME columns on commas into 4 different things
    vap[["blck_grp", "tract", "county", "state"]] = vap["GEONAME"].str.split(pat=",", expand=True)
    # remove leading and trailing spaces from state col
    vap['state'] = vap['state'].str.strip()
    # filter to only incude virginia block groups
    vap = vap.loc[vap['state'] == "Virginia"]
    # group by unique identifier then by racial group
    vap = vap.set_index(['geoid', "lnnumber"])
    vap = vap[["CVAP_EST"]]
    # "pivot" with geoid as row and lnnumber (race) as col
    df_vap = vap.unstack()
    # remove top level col name
    df_vap = df_vap.droplevel(None, axis=1)
    df_vap.columns.name = None
    df_vap = df_vap.reset_index()
    # filter to only include geoid, total, Black, Hispanic
    df_vap = df_vap.filter(items=["geoid", 1, 5, 13])
    # rename cols
    df_vap.columns = ["geoid", "VAP", "BlackVAP", "HISPVAP"]
    # reformat geoid to match that in other table
    df_vap[["prefix", "GEOID"]] = df_vap["geoid"].str.split(pat="US", expand=True)
    df_vap = df_vap.drop(columns=["prefix", "geoid"])

    # merge in VAP
    blck_grp = blck_grp.merge(df_vap, on='GEOID')
    # write to shp file
    blck_grp.to_file(out_f_name)

In [11]:
# 2015: done, 2017: done, 2019: done
vap_f_name = "C:/Users/madie/OneDrive/data/census/VA_blockgroup_2015-2019_vap.zip" # change for year
out_f_name = "C:/Users/madie/OneDrive/data/blck_grp/VA_blck_grp_2019_pop_vap.shp" # change for year
blck_grp_merge_vap(vap_f_name, out_f_name)

Now I need to disaggregate from the block group level down to the block level. 

In [12]:
# turn off annoying geoseries isna warnings
warnings.filterwarnings('ignore', 'GeoSeries.isna', UserWarning)

# Given block groups, disaggregate down to block level, prorating by total relative population 
# Blocks shp are constant throughout decade, so not passed in
# blck_grp_f_name = str, file name of zipped block group level shp file
# out_f_name = str, path where to write final block level shp file, must end in .shp
def disaggregate(blck_grp_f_name, out_f_name):
    # read in files
    blck_grp = geopandas.read_file(blck_grp_f_name)
    blocks = geopandas.read_file("zip://C:/Users/madie/OneDrive/data/ipums/VA_block_2010_pop.zip")
    # remove unnecessary colmns
    blocks = blocks.drop(columns=["vap", "TotPop", "BlackPop", "HispPop", "VAP_1", "BlackVAP", "HispVAP"])

    # change crs of shp files to be projected (using VA north)
    blocks = blocks.to_crs(epsg=2283)
    blck_grp = blck_grp.to_crs(epsg=2283)
    # remove any bowties (little imperfections in the polygons)
    blocks.geometry = blocks.buffer(0)
    blck_grp.geometry = blck_grp.buffer(0)
    # reset indices
    blocks = blocks.reset_index(drop = True)
    blck_grp = blck_grp.reset_index(drop = True)

    # assign blocks to block groups
    pop_cols = ["pop", "TotPop", "BlackPop", "HispPop", "VAP", "BlackVAP", "HISPVAP"]
    assignment = maup.assign(blocks, blck_grp)
    # We prorate the population totals according to each block's share of the overall
    # block group population:
    denom = assignment.map(blck_grp["pop"])
    weights = blocks["pop"] / denom
    prorated = maup.prorate(assignment, blck_grp[pop_cols], weights)

    # Add the prorated vote totals as columns on the `blocks` GeoDataFrame:
    blocks[pop_cols] = prorated
    blocks.to_file(out_f_name)

In [13]:
blck_grp_f_name = "zip://C:/Users/madie/OneDrive/data/blck_grp/VA_blck_grp_2017_pop_vap.zip"
out_f_name = "C:/Users/madie/OneDrive/data/ipums/VA_block_2017_data/VA_block_2017_data.shp"
disaggregate(blck_grp_f_name, out_f_name)

100%|██████████████████████████████████████████████████████████████████████████████| 5321/5321 [02:10<00:00, 40.92it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 5321/5321 [06:57<00:00, 12.75it/s]


In [14]:
blck_grp_f_name = "zip://C:/Users/madie/OneDrive/data/blck_grp/VA_blck_grp_2019_pop_vap.zip"
out_f_name = "C:/Users/madie/OneDrive/data/ipums/VA_block_2019_data/VA_block_2019_data.shp"
disaggregate(blck_grp_f_name, out_f_name)

100%|██████████████████████████████████████████████████████████████████████████████| 5321/5321 [02:16<00:00, 38.93it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 5321/5321 [07:39<00:00, 11.58it/s]
