In [1]:
import os
import pandas as pd
import requests
import numpy as np

# Wisconsin_PL_ZCTA_02_01_2023

## Background:
- We received a request for aggregated Public Law 94-171 data to ZCTA boundaries by county for Wisconsin.

## Approach:
- Load in ZCTA BAF from the Census and PL data and join together.
- Query PL data to the fields listed above and rename.
- For blocks where the ZCTA assignment is null, assign 'NO ZCTA'.
- Retrieve county name (using FIPS) from the Census. 
- Group data by ZCTAs for each county to aggregate block level data to ZCTAs. Please note that this means that ZCTAs which cross counties would only be shown in partiality and not for the entire ZCTA.

## Links to datasets used:
- [Wisconsin block PL 94-171 2020 from the RDH](https://redistrictingdatahub.org/dataset/wisconsin-block-pl-94171-2020/)
- [ZCTA Block Assignment file from the US Census](https://www2.census.gov/geo/docs/maps-data/data/rel2020/zcta520/tab20_zcta520_tabblock20_natl.txt)

For a full 'raw-from-source' file, contact info@redistrictingdatahub.org

Import BAF and clean

In [2]:
baf = pd.read_csv(os.path.join(os.getcwd(),'tab20_zcta520_tabblock20_natl.txt'),delimiter='|')
baf['GEOID20'] = baf['GEOID_TABBLOCK_20'].apply(lambda x: str(x).zfill(15))
baf['GEOID_ZCTA5_20'].fillna('N/A')
baf['ZCTA'] = baf['GEOID_ZCTA5_20'].apply(lambda x: str(x).split('.')[0] if x!='N/A' else str(x))
baf = baf[['GEOID20','ZCTA']]
baf.head()

  baf = pd.read_csv(os.path.join(os.getcwd(),'tab20_zcta520_tabblock20_natl.txt'),delimiter='|')


Unnamed: 0,GEOID20,ZCTA
0,10030101001007,
1,10030101001008,
2,10030101001009,
3,10030101001010,
4,10030101001011,


Query baf to Wisconsin

In [3]:
wi_baf = baf[baf['GEOID20'].str.startswith('55')]
wi_baf.reset_index(inplace=True, drop=True)
wi_baf['ZCTA'].fillna('N/A')
wi_baf.head()

Unnamed: 0,GEOID20,ZCTA
0,550039503001001,
1,550039508001000,
2,550039508001002,
3,550039508001003,
4,550039508001008,


Retrieve dictionary of county fips to names using the US Census API

In [4]:
def counties_dictionary(state_fips='55'):
    """Inputs: state fips code
    Process: Retrieves a list of counties in the given state from the Census API.  
    Outputs: A list of county fips codes in the state. """
    #uses the fips input into the census api
    resp = requests.get(
        
        "https://api.census.gov/data/2020/dec/pl"
        "?get=NAME&for=county:*&in=state:{}".format(state_fips)  #uses the fips input to locate the state
    )
    #retrieves the data as a json 
    header, *rows = resp.json()
    #county column is "county"
    county_column_index = header.index("county")
    county_fips = set(row[county_column_index] for row in rows) #sequence of counties 
    county_name_index = header.index("NAME")
    county_names = set(row[county_name_index] for row in rows)
    county_fips = np.array(list(county_fips))
    county_names = np.array(list(county_names))
    df = pd.DataFrame({'COUNTYFP20': county_fips, 'COUNTY': county_names}) #make pd dataframe of arrays
    df['COUNTY'] = df['COUNTY'].apply(lambda x: str(x).split(',')[0])
    df['COUNTYFP20']=state_fips + df['COUNTYFP20']
    df['COUNTYFP20'] = df['COUNTYFP20'].astype(str)
    county_dictionary = dict(zip(list(df['COUNTYFP20']),list(df['COUNTY'])))
    return county_dictionary
                             
wi_counties = counties_dictionary()
print(wi_counties)

{'55141': 'Eau Claire County', '55017': 'Columbia County', '55121': 'Crawford County', '55023': 'Oneida County', '55131': 'Price County', '55005': 'Brown County', '55095': 'Green Lake County', '55123': 'Calumet County', '55089': 'Walworth County', '55078': 'Iron County', '55119': 'Marathon County', '55053': 'Dodge County', '55047': 'Oconto County', '55109': 'Kewaunee County', '55091': 'Manitowoc County', '55133': 'Shawano County', '55073': 'Adams County', '55051': 'Winnebago County', '55003': 'Lincoln County', '55087': 'Lafayette County', '55045': 'Rock County', '55117': 'Taylor County', '55115': 'Grant County', '55009': 'Richland County', '55139': 'Jackson County', '55041': 'Green County', '55011': 'Bayfield County', '55105': 'Outagamie County', '55135': 'Waukesha County', '55063': 'Portage County', '55001': 'Dane County', '55069': 'La Crosse County', '55099': 'Barron County', '55129': 'Vilas County', '55113': 'Ashland County', '55043': 'Clark County', '55103': 'Waupaca County', '5501

Read in PL data and and query to interested columns and rename

In [5]:
wi_pl = pd.read_csv(os.path.join(os.getcwd(),'wi_pl2020_b.csv'))
wi_pl['GEOID20'] = wi_pl['GEOCODE'].astype(str)
rename_dict = {'P0040001': 'TOT_VAP20', 'P0040002': 'HSP_VAP20', 'P0040005': 'WHT_VAP20', 'P0040006': 'BLK_VAP20', 'P0040007': 'AIA_VAP20', 'P0040008': 'ASN_VAP20', 'P0040009': 'HPI_VAP20', 'P0040010': 'OTH_VAP20', 'P0040011': '2OM_VAP20', 'P0020001': 'TOT_POP20', 'P0020002': 'HSP_POP20', 'P0020005': 'WHT_POP20', 'P0020006': 'BLK_POP20', 'P0020007': 'AIA_POP20', 'P0020008': 'ASN_POP20', 'P0020009': 'HPI_POP20', 'P0020010': 'OTH_POP20', 'P0020011': '2OM_POP20'}
wi_pl.rename(columns=rename_dict,inplace=True)
wi_pl['COUNTYFP20'] = wi_pl['GEOID20'].apply(lambda x: str(x)[0:5])
wi_pl['COUNTY'] = wi_pl['COUNTYFP20'].apply(lambda x: wi_counties.get(str(x)))
wi_pl = wi_pl[['GEOID20','COUNTYFP20','COUNTY','TOT_POP20', 'HSP_POP20', 'WHT_POP20', 'BLK_POP20', 'AIA_POP20', 'ASN_POP20', 'HPI_POP20', 'OTH_POP20', '2OM_POP20', 'TOT_VAP20', 'HSP_VAP20', 'WHT_VAP20', 'BLK_VAP20', 'AIA_VAP20', 'ASN_VAP20', 'HPI_VAP20', 'OTH_VAP20', '2OM_VAP20']]
wi_pl.head()

  wi_pl = pd.read_csv(os.path.join(os.getcwd(),'wi_pl2020_b.csv'))


Unnamed: 0,GEOID20,COUNTYFP20,COUNTY,TOT_POP20,HSP_POP20,WHT_POP20,BLK_POP20,AIA_POP20,ASN_POP20,HPI_POP20,...,2OM_POP20,TOT_VAP20,HSP_VAP20,WHT_VAP20,BLK_VAP20,AIA_VAP20,ASN_VAP20,HPI_VAP20,OTH_VAP20,2OM_VAP20
0,550019501001000,55001,Dane County,5,0,5,0,0,0,0,...,0,5,0,5,0,0,0,0,0,0
1,550019501001001,55001,Dane County,6,0,5,0,0,0,0,...,1,6,0,5,0,0,0,0,0,1
2,550019501001002,55001,Dane County,12,0,12,0,0,0,0,...,0,9,0,9,0,0,0,0,0,0
3,550019501001003,55001,Dane County,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,550019501001004,55001,Dane County,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


Merge PL data and BAF and confirm they all join

In [6]:
wi = pd.merge(wi_baf,wi_pl,on='GEOID20',how='outer',indicator=True)
print(len(wi[wi['_merge']!='both']))
wi['ZCTA'] = wi['ZCTA'].apply(lambda x: 'NO ZCTA' if str(x) == 'nan' else str(x))

0


Group by ZCTA and county

In [7]:
wi['COUNTY_ZCTA'] = wi['COUNTY'] + ' - '+ wi['ZCTA']
wi_reverse_co_dict = dict(zip(list(wi_counties.values()),list(wi_counties.keys())))
zcta = wi.groupby('COUNTY_ZCTA').sum()
zcta.reset_index(inplace=True,drop=False)
zcta['COUNTY'] = zcta['COUNTY_ZCTA'].apply(lambda x: str(x).split(' - ')[0])
zcta['ZCTA'] = zcta['COUNTY_ZCTA'].apply(lambda x: str(x).split(' - ')[1])
zcta['COUNTYFP20'] = zcta['COUNTY'].apply(lambda x: wi_reverse_co_dict.get(str(x)))
zcta_order = ['COUNTY_ZCTA','COUNTY','COUNTYFP20','ZCTA']+list(rename_dict.values())
zcta = zcta[zcta_order]
zcta.head()

Unnamed: 0,COUNTY_ZCTA,COUNTY,COUNTYFP20,ZCTA,TOT_VAP20,HSP_VAP20,WHT_VAP20,BLK_VAP20,AIA_VAP20,ASN_VAP20,...,2OM_VAP20,TOT_POP20,HSP_POP20,WHT_POP20,BLK_POP20,AIA_POP20,ASN_POP20,HPI_POP20,OTH_POP20,2OM_POP20
0,Adams County - 54401,Adams County,55073,54401,24822,597,20858,248,109,2308,...,617,31727,1078,25220,362,144,3683,8,110,1122
1,Adams County - 54403,Adams County,55073,54403,19363,515,16776,232,107,1116,...,574,24712,871,20522,332,130,1754,10,51,1042
2,Adams County - 54405,Adams County,55073,54405,782,215,553,1,1,5,...,5,1085,336,729,2,2,7,0,2,7
3,Adams County - 54408,Adams County,55073,54408,541,2,516,0,5,2,...,10,673,4,633,2,6,4,0,6,18
4,Adams County - 54409,Adams County,55073,54409,110,0,105,0,1,0,...,4,130,0,125,0,1,0,0,0,4


Extract data

In [8]:
zcta.to_csv('./wi_2020_demo_2020_zcta.csv',index=False)