I've downloaded a bunch of yearly ACS data from the IPUMS NHGIS, but since this is only a 5 year average of a sample survey, the smallest geographic unit reported at is the block group, not the block. 

However, blocks nest nicely into block groups, so I should be able to disaggregate from block groups to blocks using maup and prorating by total population, which seems like a reasonable assumption to make.

* `ADK5E001` = `pop`: Total population
* `ADK5E001` = `TotPop`: Total population (again)
* `ADK5E004` = `BlackPop`: Total Black population
* `ADK5E012` = `HispPop`: Total Hispanic Population

In [None]:
import geopandas
import pandas as pd

# read in blocks shapefile
blck_grp = geopandas.read_file("zip://C:/Users/madie/OneDrive/data/ipums/VA_blck_grp_2019.zip")
# keep only the useful cols
blck_grp = blck_grp[["GEOID", "GISJOIN", "geometry"]].copy()
# read in population data csv
data = pd.read_csv("C:/Users/madie/OneDrive/data/ipums/VA_blck_grp_2015_pop.zip")
# keep only the relevant columns for total Black and Hispanic population (and vap)
data = data[["GISJOIN", "ADK5E001", "ADK5E001", "ADK5E004", "ADK5E012"]].copy()
# rename these cols to something more intelligible
data.columns = ["GISJOIN", "pop", "TotPop", "BlackPop", "HispPop"]
# merge the population data into the blocks shapefile
blck_grp = blck_grp.merge(data, on='GISJOIN')
blck_grp

Ok, so that's looking good. All that's missing is the total VAP and VAP by race. The census is required by law to publish this specific table every year, but it rather annoyingly is not available from IPUMS, so I had to download it from the census directly. 

In [None]:
#read in csv file
vap = pd.read_csv("C:/Users/madie/OneDrive/data/census/VA_blockgroup_2011-2015_vap.zip", encoding="latin1")
# split up GEONAME columns on commas into 4 different things
vap[["blck_grp", "tract", "county", "state"]] = vap["GEONAME"].str.split(pat=",", expand=True)
# remove leading and trailing spaces from state col
vap['state'] = vap['state'].str.strip()
# filter to only incude virginia block groups
vap = vap.loc[vap['state'] == "Virginia"]
# group by unique identifier then by racial group
vap = vap.set_index(['geoid', "lnnumber"])
vap = vap[["CVAP_EST"]]
# "pivot" with geoid as row and lnnumber (race) as col
df_vap = vap.unstack()
# remove top level col name
df_vap = df_vap.droplevel(None, axis=1)
df_vap.columns.name = None
df_vap = df_vap.reset_index()
# filter to only include geoid, total, Black, Hispanic
df_vap = df_vap.filter(items=["geoid", 1, 5, 13])
# rename cols
df_vap.columns = ["geoid", "VAP", "BlackVAP", "HISPVAP"]
# reformat geoid to match that in other table
df_vap[["prefix", "GEOID"]] = df_vap["geoid"].str.split(pat="US", expand=True)
df_vap = df_vap.drop(columns=["prefix", "geoid"])
df_vap

In [None]:
# merge in VAP
blck_grp = blck_grp.merge(df_vap, on='GEOID')
blck_grp

In [None]:
blck_grp.to_file("C:/Users/madie/OneDrive/data/blck_grp/VA_blck_grp_2015_pop_vap.shp")

Now I need to disaggregate from the block group level down to the block level. 

In [16]:
import geopandas
import numpy as np
import maup
import warnings

blck_grp = geopandas.read_file("C:/Users/madie/OneDrive/data/blck_grp/VA_blck_grp_2015_pop_vap.shp")
blocks = geopandas.read_file("zip://C:/Users/madie/OneDrive/data/ipums/VA_block_2010_pop.zip")

# remove unnecessary colmns
blocks = blocks.drop(columns=["vap", "TotPop", "BlackPop", "HispPop", "VAP_1", "BlackVAP", "HispVAP"])

# turn off annoying geoseries isna warnings
warnings.filterwarnings('ignore', 'GeoSeries.isna', UserWarning)

# change crs of shp files to be projected (using VA north)
blocks = blocks.to_crs(epsg=2283)
blck_grp = blck_grp.to_crs(epsg=2283)

# remove any bowties (little imperfections in the polygons)
blocks.geometry = blocks.buffer(0)
blck_grp.geometry = blck_grp.buffer(0)

blocks = blocks.reset_index(drop = True)
blck_grp = blck_grp.reset_index(drop = True)

# assign blocks to block groups
pop_cols = ["pop", "TotPop", "BlackPop", "HispPop", "VAP", "BlackVAP", "HISPVAP"]
with maup.progress():
    assignment = maup.assign(blocks, blck_grp)

# We prorate the population totals according to each block's share of the overall
# block group population:
with maup.progress():
    denom = assignment.map(blck_grp["pop"]) #, na_action='ignore')

weights = blocks["pop"] / denom
with maup.progress():
    prorated = maup.prorate(assignment, blck_grp[pop_cols], weights)

# Add the prorated vote totals as columns on the `blocks` GeoDataFrame:
blocks[pop_cols] = prorated
# We'll call .round(2) to round the values for display purposes.
blocks[pop_cols].round(2)

100%|██████████████████████████████████████████████████████████████████████████████| 5321/5321 [01:04<00:00, 82.88it/s]
100%|██████████████████████████████████████████████████████████████████████████████| 5321/5321 [03:32<00:00, 25.09it/s]


Unnamed: 0,pop,TotPop,BlackPop,HispPop,VAP,BlackVAP,HISPVAP
0,97.0,97.0,7.51,0.98,81.00,3.42,0.00
1,6.0,6.0,0.46,0.06,5.01,0.21,0.00
2,228.0,228.0,17.66,2.29,190.38,8.03,0.00
3,17.0,17.0,1.32,0.17,14.20,0.60,0.00
4,0.0,0.0,0.00,0.00,0.00,0.00,0.00
...,...,...,...,...,...,...,...
281225,0.0,0.0,0.00,0.00,0.00,0.00,0.00
281226,4.0,4.0,0.07,0.25,3.62,0.05,0.14
281227,0.0,0.0,0.00,0.00,0.00,0.00,0.00
281228,4.0,4.0,0.00,0.05,3.10,0.01,0.05


In [17]:
blocks.to_file("C:/Users/madie/OneDrive/data/ipums/VA_block_2015_data/VA_block_2015_data.shp")