# 2020 -> 2010 Crosswalk

Author: Mo Al Elew

**What notebook does/produces:**

Produces 2022 American Community Survey (ACS) racial demographics data by block group interpolated to 2010 block groups

**Issue/problem being solved:**

Citizens sets rate relativities by 2010 block groups and we want to use recent data which is released using 2020 geographies.

**Strategy to solve:**

1. Read in 2022 ACS data by block group
2. Read in crosswalk weights for 2020 block groups to 2010 block groups 
3. Merge on the weights to the ACS data
4. Weight the population counts (bg_count * bg_weight)
5. Group by 2010 block group and sum the weighted counts
6. Calculate percentages
7. Merge weighted demographics and TIGER GIS data

**Sources:**

- data.census.gov
- https://www.nhgis.org/geographic-crosswalks 
- https://www2.census.gov/geo/tiger/TIGER2010/


In [1]:
import zipfile

import geopandas as gpd
import numpy as np
import pandas as pd
import geopandas as gpd

# Constants

In [2]:
BG_2020_DATA = "./outputs/block_group_demographics.csv"

CW_DATA_FP = "./inputs/crosswalk/nhgis_bg2020_bg2010_26.zip"
CW_DATA_FILE_NAME = "nhgis_bg2020_bg2010_26.csv"
CW_COL_ID_AND_WEIGHT = ["bg2020ge", "bg2010ge", "wt_pop", "parea"]
GIS_COL_RENAMES = {"GEOID10": "geo_id", "NAMELSAD10": "geo_name"}

GIS_COL = [
    "geo_id",
    "geo_name",
    "tot_pop_wt",
    "white_tot_wt",
    "black_tot_wt",
    "aian_tot_wt",
    "asian_tot_wt",
    "other_tot_wt",
    "latin_tot_wt",
    "white_pct_wt",
    "black_pct_wt",
    "aian_pct_wt",
    "asian_pct_wt",
    "other_pct_wt",
    "latin_pct_wt",
    "geometry",
]

INDEX_COLS = ["geo_id", "geo_name", "bg2010ge"]

DATA_EXPORT_FP = "./outputs/bg_demographics_2020_2010.csv"
MAP_EXPORT_FP = "./outputs/bg_demographics_2020_2010.geojson"

# Read data

## CW weights

In [3]:
def read_data_file(zip_fp, data_file_name, **read_csv_options):
    with zipfile.ZipFile(zip_fp) as z:
        with z.open(data_file_name) as f:
            return pd.read_csv(f, **read_csv_options)


DF_CW_BG_TRACTS = read_data_file(
    CW_DATA_FP, CW_DATA_FILE_NAME, dtype={"bg2020ge": str, "bg2010ge": str}
)
DF_CW_BG_TRACTS

Unnamed: 0,bg2020gj,bg2020ge,bg2010gj,bg2010ge,wt_pop,wt_adult,wt_hh,wt_hu,parea
0,G26000100001001,260010001001,G26000100001001,260010001001,1.000000,1.00000,1.000000,1.000000,1.000000
1,G26000100001002,260010001002,G26000109701001,260019701001,0.029412,0.02358,0.014981,0.012366,0.017712
2,G26000100001002,260010001002,G26000100001002,260010001002,0.970588,0.97642,0.985019,0.987634,0.982288
3,G26000109701001,260019701001,G26000109701001,260019701001,1.000000,1.00000,1.000000,1.000000,1.000000
4,G26000109701002,260019701002,G26000109701002,260019701002,1.000000,1.00000,1.000000,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...
12104,G26016503807002,261653807002,G26016503807002,261653807002,1.000000,1.00000,1.000000,1.000000,1.000000
12105,G26016503807003,261653807003,G26016503807003,261653807003,1.000000,1.00000,1.000000,1.000000,1.000000
12106,G26016503808001,261653808001,G26016503808001,261653808001,1.000000,1.00000,1.000000,1.000000,1.000000
12107,G26016503808002,261653808002,G26016503808002,261653808002,1.000000,1.00000,1.000000,1.000000,1.000000


## Demographic data

In [4]:
GIS_COLS = [
    "geo_id",
    "geo_name",
    "white_tot",
    "black_tot",
    "aian_tot",
    "asian_tot",
    "other_tot",
    "latin_tot",
]


GDF_BG_2020 = pd.read_csv(BG_2020_DATA, dtype={"geo_id": str})
GDF_BG_2020 = GDF_BG_2020[GIS_COLS].copy()
GDF_BG_2020

Unnamed: 0,geo_id,geo_name,white_tot,black_tot,aian_tot,asian_tot,other_tot,latin_tot
0,1500000US260010001001,Block Group 1; Census Tract 1; Alcona County; ...,591,8,0,0,17,16
1,1500000US260010001002,Block Group 2; Census Tract 1; Alcona County; ...,988,0,0,0,0,20
2,1500000US260019701001,Block Group 1; Census Tract 9701; Alcona Count...,950,0,0,18,5,14
3,1500000US260019701002,Block Group 2; Census Tract 9701; Alcona Count...,1284,9,11,2,0,51
4,1500000US260019704001,Block Group 1; Census Tract 9704; Alcona Count...,1277,2,4,0,4,19
...,...,...,...,...,...,...,...,...
8381,1500000US261653807002,Block Group 2; Census Tract 3807; Wexford Coun...,1330,29,0,0,0,25
8382,1500000US261653807003,Block Group 3; Census Tract 3807; Wexford Coun...,427,22,0,0,0,0
8383,1500000US261653808001,Block Group 1; Census Tract 3808; Wexford Coun...,524,0,0,0,0,25
8384,1500000US261653808002,Block Group 2; Census Tract 3808; Wexford Coun...,533,13,0,58,0,0


# Merge CW weights

In [5]:
df_cw_weight = DF_CW_BG_TRACTS[CW_COL_ID_AND_WEIGHT].copy()
bg_2020_ge_len = len(df_cw_weight["bg2020ge"][0])
df_cw_weight

Unnamed: 0,bg2020ge,bg2010ge,wt_pop,parea
0,260010001001,260010001001,1.000000,1.000000
1,260010001002,260019701001,0.029412,0.017712
2,260010001002,260010001002,0.970588,0.982288
3,260019701001,260019701001,1.000000,1.000000
4,260019701002,260019701002,1.000000,1.000000
...,...,...,...,...
12104,261653807002,261653807002,1.000000,1.000000
12105,261653807003,261653807003,1.000000,1.000000
12106,261653808001,261653808001,1.000000,1.000000
12107,261653808002,261653808002,1.000000,1.000000


In [6]:
GDF_BG_2020["geo_id"] = GDF_BG_2020["geo_id"].str.slice(-1 * bg_2020_ge_len)

In [7]:
df_race_bg_cw_merged = GDF_BG_2020.merge(
    df_cw_weight, left_on="geo_id", right_on="bg2020ge", how="outer", validate="1:m"
)
assert df_race_bg_cw_merged["wt_pop"].isnull().sum() == 0

# Clean data

## Column names

In [8]:
count_col_names = [col for col in df_race_bg_cw_merged.columns if "tot" in col]
count_col_names

['white_tot', 'black_tot', 'aian_tot', 'asian_tot', 'other_tot', 'latin_tot']

In [9]:
count_wt_col_names = [col + "_wt" for col in count_col_names]
count_wt_col_names

['white_tot_wt',
 'black_tot_wt',
 'aian_tot_wt',
 'asian_tot_wt',
 'other_tot_wt',
 'latin_tot_wt']

In [10]:
pct_wt_col_names = [col.replace("tot", "pct") for col in count_wt_col_names]
pct_wt_col_names

['white_pct_wt',
 'black_pct_wt',
 'aian_pct_wt',
 'asian_pct_wt',
 'other_pct_wt',
 'latin_pct_wt']

## Weight total

In [11]:
df_race_bg_cw_merged[count_wt_col_names] = np.round(
    df_race_bg_cw_merged[count_col_names].to_numpy()
    * df_race_bg_cw_merged[["wt_pop"]].to_numpy()
)
df_race_wt = df_race_bg_cw_merged[INDEX_COLS + count_wt_col_names].copy()
df_race_wt

Unnamed: 0,geo_id,geo_name,bg2010ge,white_tot_wt,black_tot_wt,aian_tot_wt,asian_tot_wt,other_tot_wt,latin_tot_wt
0,260010001001,Block Group 1; Census Tract 1; Alcona County; ...,260010001001,591.0,8.0,0.0,0.0,17.0,16.0
1,260010001002,Block Group 2; Census Tract 1; Alcona County; ...,260019701001,29.0,0.0,0.0,0.0,0.0,1.0
2,260010001002,Block Group 2; Census Tract 1; Alcona County; ...,260010001002,959.0,0.0,0.0,0.0,0.0,19.0
3,260019701001,Block Group 1; Census Tract 9701; Alcona Count...,260019701001,950.0,0.0,0.0,18.0,5.0,14.0
4,260019701002,Block Group 2; Census Tract 9701; Alcona Count...,260019701002,1284.0,9.0,11.0,2.0,0.0,51.0
...,...,...,...,...,...,...,...,...,...
12104,261653807002,Block Group 2; Census Tract 3807; Wexford Coun...,261653807002,1330.0,29.0,0.0,0.0,0.0,25.0
12105,261653807003,Block Group 3; Census Tract 3807; Wexford Coun...,261653807003,427.0,22.0,0.0,0.0,0.0,0.0
12106,261653808001,Block Group 1; Census Tract 3808; Wexford Coun...,261653808001,524.0,0.0,0.0,0.0,0.0,25.0
12107,261653808002,Block Group 2; Census Tract 3808; Wexford Coun...,261653808002,533.0,13.0,0.0,58.0,0.0,0.0


## Group by 2010 block group

In [12]:
df_bg_race_wt = df_race_wt.groupby("bg2010ge").sum(numeric_only=True)
df_bg_race_wt["tot_pop_wt"] = df_bg_race_wt[count_wt_col_names].sum(axis=1)
df_bg_race_wt

Unnamed: 0_level_0,white_tot_wt,black_tot_wt,aian_tot_wt,asian_tot_wt,other_tot_wt,latin_tot_wt,tot_pop_wt
bg2010ge,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
260010001001,591.0,8.0,0.0,0.0,17.0,16.0,632.0
260010001002,959.0,0.0,0.0,0.0,0.0,19.0,978.0
260019701001,979.0,0.0,0.0,18.0,5.0,15.0,1017.0
260019701002,1284.0,9.0,11.0,2.0,0.0,51.0,1357.0
260019704001,1277.0,2.0,4.0,0.0,4.0,19.0,1306.0
...,...,...,...,...,...,...,...
261653807002,1330.0,29.0,0.0,0.0,0.0,25.0,1384.0
261653807003,427.0,22.0,0.0,0.0,0.0,0.0,449.0
261653808001,524.0,0.0,0.0,0.0,0.0,25.0,549.0
261653808002,533.0,13.0,0.0,58.0,0.0,0.0,604.0


## Calculate percentages

In [13]:
# for bg with a total population of zero, replace with a placeholder value one to retain data and avoid divide by zero
tot_pop_divisor = df_bg_race_wt["tot_pop_wt"].replace({0: 1})
df_bg_race_wt[pct_wt_col_names] = np.round(
    df_bg_race_wt[count_wt_col_names].to_numpy()
    / pd.DataFrame(tot_pop_divisor).to_numpy(),
    3,
)
df_bg_race_wt

Unnamed: 0_level_0,white_tot_wt,black_tot_wt,aian_tot_wt,asian_tot_wt,other_tot_wt,latin_tot_wt,tot_pop_wt,white_pct_wt,black_pct_wt,aian_pct_wt,asian_pct_wt,other_pct_wt,latin_pct_wt
bg2010ge,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
260010001001,591.0,8.0,0.0,0.0,17.0,16.0,632.0,0.935,0.013,0.000,0.000,0.027,0.025
260010001002,959.0,0.0,0.0,0.0,0.0,19.0,978.0,0.981,0.000,0.000,0.000,0.000,0.019
260019701001,979.0,0.0,0.0,18.0,5.0,15.0,1017.0,0.963,0.000,0.000,0.018,0.005,0.015
260019701002,1284.0,9.0,11.0,2.0,0.0,51.0,1357.0,0.946,0.007,0.008,0.001,0.000,0.038
260019704001,1277.0,2.0,4.0,0.0,4.0,19.0,1306.0,0.978,0.002,0.003,0.000,0.003,0.015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
261653807002,1330.0,29.0,0.0,0.0,0.0,25.0,1384.0,0.961,0.021,0.000,0.000,0.000,0.018
261653807003,427.0,22.0,0.0,0.0,0.0,0.0,449.0,0.951,0.049,0.000,0.000,0.000,0.000
261653808001,524.0,0.0,0.0,0.0,0.0,25.0,549.0,0.954,0.000,0.000,0.000,0.000,0.046
261653808002,533.0,13.0,0.0,58.0,0.0,0.0,604.0,0.882,0.022,0.000,0.096,0.000,0.000


In [14]:
df_bg_race_wt[(df_bg_race_wt["white_pct_wt"] > 100)]

Unnamed: 0_level_0,white_tot_wt,black_tot_wt,aian_tot_wt,asian_tot_wt,other_tot_wt,latin_tot_wt,tot_pop_wt,white_pct_wt,black_pct_wt,aian_pct_wt,asian_pct_wt,other_pct_wt,latin_pct_wt
bg2010ge,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1


# Merge demographics and GIS data

In [15]:
GDF_BG_GIS = gpd.read_file("./inputs/tiger_files/tl_2010_26_bg10.zip")
gdf_bg_gis = GDF_BG_GIS[["GEOID10", "NAMELSAD10", "geometry"]]

In [16]:
gdf_2020_to_2010_cw = gdf_bg_gis.merge(
    df_bg_race_wt, left_on="GEOID10", right_on="bg2010ge", validate="1:1", how="left"
)
gdf_2020_to_2010_cw = gdf_2020_to_2010_cw.rename(columns=GIS_COL_RENAMES)
gdf_2020_to_2010_cw = gdf_2020_to_2010_cw[GIS_COL].copy()
assert gdf_2020_to_2010_cw["tot_pop_wt"].isnull().sum() == 0
gdf_2020_to_2010_cw

Unnamed: 0,geo_id,geo_name,tot_pop_wt,white_tot_wt,black_tot_wt,aian_tot_wt,asian_tot_wt,other_tot_wt,latin_tot_wt,white_pct_wt,black_pct_wt,aian_pct_wt,asian_pct_wt,other_pct_wt,latin_pct_wt,geometry
0,260690002003,Block Group 3,1629.0,1581.0,0.0,0.0,0.0,5.0,43.0,0.971,0.000,0.000,0.000,0.003,0.026,"POLYGON ((-83.59161 44.27922, -83.59314 44.279..."
1,260690005001,Block Group 1,485.0,437.0,0.0,0.0,9.0,0.0,39.0,0.901,0.000,0.000,0.019,0.000,0.080,"POLYGON ((-83.50198 44.27641, -83.50254 44.276..."
2,260690005003,Block Group 3,687.0,669.0,0.0,0.0,18.0,0.0,0.0,0.974,0.000,0.000,0.026,0.000,0.000,"POLYGON ((-83.49249 44.28678, -83.49282 44.287..."
3,260690007001,Block Group 1,1524.0,1487.0,0.0,9.0,0.0,2.0,26.0,0.976,0.000,0.006,0.000,0.001,0.017,"POLYGON ((-83.83290 44.36319, -83.82857 44.363..."
4,260690007002,Block Group 2,1040.0,1036.0,0.0,4.0,0.0,0.0,0.0,0.996,0.000,0.004,0.000,0.000,0.000,"POLYGON ((-83.73661 44.27771, -83.74075 44.277..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8200,261614121002,Block Group 2,897.0,860.0,23.0,0.0,0.0,0.0,14.0,0.959,0.026,0.000,0.000,0.000,0.016,"POLYGON ((-83.58200 42.25373, -83.58204 42.253..."
8201,261614121004,Block Group 4,406.0,231.0,157.0,0.0,0.0,0.0,18.0,0.569,0.387,0.000,0.000,0.000,0.044,"POLYGON ((-83.54410 42.26245, -83.54373 42.262..."
8202,261614119001,Block Group 1,182.0,135.0,16.0,2.0,0.0,0.0,29.0,0.742,0.088,0.011,0.000,0.000,0.159,"POLYGON ((-83.54429 42.25153, -83.54499 42.251..."
8203,261614119002,Block Group 2,508.0,378.0,43.0,6.0,0.0,0.0,81.0,0.744,0.085,0.012,0.000,0.000,0.159,"POLYGON ((-83.56358 42.24824, -83.56588 42.247..."


# Export

In [17]:
df_bg_race_wt.to_csv(DATA_EXPORT_FP)
df_bg_race_wt

Unnamed: 0_level_0,white_tot_wt,black_tot_wt,aian_tot_wt,asian_tot_wt,other_tot_wt,latin_tot_wt,tot_pop_wt,white_pct_wt,black_pct_wt,aian_pct_wt,asian_pct_wt,other_pct_wt,latin_pct_wt
bg2010ge,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
260010001001,591.0,8.0,0.0,0.0,17.0,16.0,632.0,0.935,0.013,0.000,0.000,0.027,0.025
260010001002,959.0,0.0,0.0,0.0,0.0,19.0,978.0,0.981,0.000,0.000,0.000,0.000,0.019
260019701001,979.0,0.0,0.0,18.0,5.0,15.0,1017.0,0.963,0.000,0.000,0.018,0.005,0.015
260019701002,1284.0,9.0,11.0,2.0,0.0,51.0,1357.0,0.946,0.007,0.008,0.001,0.000,0.038
260019704001,1277.0,2.0,4.0,0.0,4.0,19.0,1306.0,0.978,0.002,0.003,0.000,0.003,0.015
...,...,...,...,...,...,...,...,...,...,...,...,...,...
261653807002,1330.0,29.0,0.0,0.0,0.0,25.0,1384.0,0.961,0.021,0.000,0.000,0.000,0.018
261653807003,427.0,22.0,0.0,0.0,0.0,0.0,449.0,0.951,0.049,0.000,0.000,0.000,0.000
261653808001,524.0,0.0,0.0,0.0,0.0,25.0,549.0,0.954,0.000,0.000,0.000,0.000,0.046
261653808002,533.0,13.0,0.0,58.0,0.0,0.0,604.0,0.882,0.022,0.000,0.096,0.000,0.000


In [18]:
gdf_2020_to_2010_cw.to_file(MAP_EXPORT_FP, index=False, driver="GeoJSON")
gdf_2020_to_2010_cw

Unnamed: 0,geo_id,geo_name,tot_pop_wt,white_tot_wt,black_tot_wt,aian_tot_wt,asian_tot_wt,other_tot_wt,latin_tot_wt,white_pct_wt,black_pct_wt,aian_pct_wt,asian_pct_wt,other_pct_wt,latin_pct_wt,geometry
0,260690002003,Block Group 3,1629.0,1581.0,0.0,0.0,0.0,5.0,43.0,0.971,0.000,0.000,0.000,0.003,0.026,"POLYGON ((-83.59161 44.27922, -83.59314 44.279..."
1,260690005001,Block Group 1,485.0,437.0,0.0,0.0,9.0,0.0,39.0,0.901,0.000,0.000,0.019,0.000,0.080,"POLYGON ((-83.50198 44.27641, -83.50254 44.276..."
2,260690005003,Block Group 3,687.0,669.0,0.0,0.0,18.0,0.0,0.0,0.974,0.000,0.000,0.026,0.000,0.000,"POLYGON ((-83.49249 44.28678, -83.49282 44.287..."
3,260690007001,Block Group 1,1524.0,1487.0,0.0,9.0,0.0,2.0,26.0,0.976,0.000,0.006,0.000,0.001,0.017,"POLYGON ((-83.83290 44.36319, -83.82857 44.363..."
4,260690007002,Block Group 2,1040.0,1036.0,0.0,4.0,0.0,0.0,0.0,0.996,0.000,0.004,0.000,0.000,0.000,"POLYGON ((-83.73661 44.27771, -83.74075 44.277..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8200,261614121002,Block Group 2,897.0,860.0,23.0,0.0,0.0,0.0,14.0,0.959,0.026,0.000,0.000,0.000,0.016,"POLYGON ((-83.58200 42.25373, -83.58204 42.253..."
8201,261614121004,Block Group 4,406.0,231.0,157.0,0.0,0.0,0.0,18.0,0.569,0.387,0.000,0.000,0.000,0.044,"POLYGON ((-83.54410 42.26245, -83.54373 42.262..."
8202,261614119001,Block Group 1,182.0,135.0,16.0,2.0,0.0,0.0,29.0,0.742,0.088,0.011,0.000,0.000,0.159,"POLYGON ((-83.54429 42.25153, -83.54499 42.251..."
8203,261614119002,Block Group 2,508.0,378.0,43.0,6.0,0.0,0.0,81.0,0.744,0.085,0.012,0.000,0.000,0.159,"POLYGON ((-83.56358 42.24824, -83.56588 42.247..."
