In [1]:
import os
import pandas as pd

# Wisconsin_PL_ZCTA_02_01_2023

## Background:
- We received a request for aggregated Public Law 94-171 data to ZCTA boundaries for Wisconsin

## Approach:
- Load in ZCTA BAF from the Census and PL data and join together.
- Query PL data to the fields listed above and rename.
- For blocks where the ZCTA assignment is null, assign 'NO ZCTA'.
- Group data by ZCTAs to aggregate block level data to ZCTAs.

## Links to datasets used:
- [Wisconsin block PL 94-171 2020 from the RDH](https://redistrictingdatahub.org/dataset/wisconsin-block-pl-94171-2020/)
- [ZCTA Block Assignment file from the US Census](https://www2.census.gov/geo/docs/maps-data/data/rel2020/zcta520/tab20_zcta520_tabblock20_natl.txt)

For a full 'raw-from-source' file, contact info@redistrictingdatahub.org

Import BAF and clean

In [2]:
baf = pd.read_csv(os.path.join(os.getcwd(),'tab20_zcta520_tabblock20_natl.txt'),delimiter='|')
baf['GEOID20'] = baf['GEOID_TABBLOCK_20'].apply(lambda x: str(x).zfill(15))
baf['GEOID_ZCTA5_20'].fillna('N/A')
baf['ZCTA'] = baf['GEOID_ZCTA5_20'].apply(lambda x: str(x).split('.')[0] if x!='N/A' else str(x))
baf = baf[['GEOID20','ZCTA']]
baf.head()

  baf = pd.read_csv(os.path.join(os.getcwd(),'tab20_zcta520_tabblock20_natl.txt'),delimiter='|')


Unnamed: 0,GEOID20,ZCTA
0,10030101001007,
1,10030101001008,
2,10030101001009,
3,10030101001010,
4,10030101001011,


Query baf to Wisconsin

In [3]:
wi_baf = baf[baf['GEOID20'].str.startswith('55')]
wi_baf.reset_index(inplace=True, drop=True)
wi_baf['ZCTA'].fillna('N/A')
wi_baf.head()

Unnamed: 0,GEOID20,ZCTA
0,550039503001001,
1,550039508001000,
2,550039508001002,
3,550039508001003,
4,550039508001008,


Read in PL data and and query to interested columns and rename

In [4]:
wi_pl = pd.read_csv(os.path.join(os.getcwd(),'wi_pl2020_b.csv'))
wi_pl['GEOID20'] = wi_pl['GEOCODE'].astype(str)
rename_dict = {'P0040001': 'TOT_VAP20', 'P0040002': 'HSP_VAP20', 'P0040005': 'WHT_VAP20', 'P0040006': 'BLK_VAP20', 'P0040007': 'AIA_VAP20', 'P0040008': 'ASN_VAP20', 'P0040009': 'HPI_VAP20', 'P0040010': 'OTH_VAP20', 'P0040011': '2OM_VAP20', 'P0020001': 'TOT_POP20', 'P0020002': 'HSP_POP20', 'P0020005': 'WHT_POP20', 'P0020006': 'BLK_POP20', 'P0020007': 'AIA_POP20', 'P0020008': 'ASN_POP20', 'P0020009': 'HPI_POP20', 'P0020010': 'OTH_POP20', 'P0020011': '2OM_POP20'}
wi_pl.rename(columns=rename_dict,inplace=True)
wi_pl = wi_pl[['GEOID20','TOT_POP20', 'HSP_POP20', 'WHT_POP20', 'BLK_POP20', 'AIA_POP20', 'ASN_POP20', 'HPI_POP20', 'OTH_POP20', '2OM_POP20', 'TOT_VAP20', 'HSP_VAP20', 'WHT_VAP20', 'BLK_VAP20', 'AIA_VAP20', 'ASN_VAP20', 'HPI_VAP20', 'OTH_VAP20', '2OM_VAP20']]
wi_pl.head()

  wi_pl = pd.read_csv(os.path.join(os.getcwd(),'wi_pl2020_b.csv'))


Unnamed: 0,GEOID20,TOT_POP20,HSP_POP20,WHT_POP20,BLK_POP20,AIA_POP20,ASN_POP20,HPI_POP20,OTH_POP20,2OM_POP20,TOT_VAP20,HSP_VAP20,WHT_VAP20,BLK_VAP20,AIA_VAP20,ASN_VAP20,HPI_VAP20,OTH_VAP20,2OM_VAP20
0,550019501001000,5,0,5,0,0,0,0,0,0,5,0,5,0,0,0,0,0,0
1,550019501001001,6,0,5,0,0,0,0,0,1,6,0,5,0,0,0,0,0,1
2,550019501001002,12,0,12,0,0,0,0,0,0,9,0,9,0,0,0,0,0,0
3,550019501001003,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4,550019501001004,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


Merge PL data and BAF and confirm they all join

In [5]:
wi = pd.merge(wi_baf,wi_pl,on='GEOID20',how='outer',indicator=True)
print(len(wi[wi['_merge']!='both']))
wi['ZCTA'] = wi['ZCTA'].apply(lambda x: 'NO ZCTA' if str(x) == 'nan' else str(x))

0


In [6]:
Group by ZCTA

SyntaxError: invalid syntax (160542364.py, line 1)

In [None]:
zcta = wi.groupby('ZCTA').sum()
zcta.reset_index(inplace=True,drop=False)
zcta.head()

Extract data

In [None]:
zcta.to_csv('./wi_zcta_demographics.csv',index=False)