# Sorting ADI by US Census Bureau's FIPS Code and County

### August 31, 2023

This script takes an ADI file downloaded from the Neighborhood Atlas https://www.neighborhoodatlas.medicine.wisc.edu/ and a county-Federal Information Processing Standard (FIPS) file from the 2020 US Census Bureau and converts each to a dataframe, "adidf" and "zipdf", respectively. 

The zipdf is first parsed to extract the FIPS codes and county information before the two dataframes are merged. The dataframes are merged on the basis of the 2 digit state fips code and the 3 digit fips county code. ADI state rank can then be grouped by county with the median ADI computed for each. The median values are stored in a dictionary by mapping it to the county and fips code keys. 

Source:
Kind AJH, Buckingham W. Making Neighborhood Disadvantage Metrics Accessible: The Neighborhood Atlas. New England Journal of Medicine, 2018. 378: 2456-2458. DOI: 10.1056/NEJMp1802313. PMCID: PMC6051533. AND University of Wisconsin School of Medicine and Public Health. 2021 Area Deprivation Index v.4. Downloaded from https://www.neighborhoodatlas.medicine.wisc.edu/ 2023-09-13

In [208]:
import pandas as pd
import csv
import numpy as np

#adi retrieved from https://www.neighborhoodatlas.medicine.wisc.edu
path_to_adi = "/Users/mavoeg/Desktop/SARS-CoV-2/Wisconsin/WI_Data_Counties/adi-download/WI_2021_ADI_zip-code.csv"

#fips codes retrieved from 2020 Census Bureau county equivalent
# https://www.census.gov/library/reference/code-lists/ansi.html#cou
path_to_county_zip = "/Users/mavoeg/Desktop/SARS-CoV-2/Wisconsin/WI_Data_Counties/adi-download/FIPS_code-censbur.txt"


# parse fips data
cb_fips= pd.read_csv(path_to_county_zip, sep="|", usecols=[1, 2, 3], 
                           header=None, names=['fips1', 'fips2', 'county'])
cb_fips['fips'] = cb_fips['fips1'].astype(str) + cb_fips['fips2'].astype(str)
cb_fips['county'] = cb_fips['county'].str.lower().str.split(' county').str[0]
cb_fips.drop(columns=['fips1', 'fips2'], inplace=True)

adidf = pd.read_csv(path_to_adi, header=0).dropna(subset=['FIPS'])
adidf['FIPS_short'] = adidf['FIPS'].astype(int).astype(str).str[:5]

print('\n\nThere are ', adidf.shape[0], ' records in the adidf.') 



There are  1242859  records in the adidf.


In [209]:
# Merge the fips census bureau & adi dataframes
result_df = pd.merge(adidf, cb_fips, left_on='FIPS_short', 
                     right_on='fips', how='left').drop(columns=['FIPS_short', 'fips'])


# count only numeric adi scores as some adi values may have non-numeric codes, 
#these show in the preview as 'NaN' after running this chunk

# from the Neighborhood Atlas:
#"When a Census block group falls into one or more of the suppression 
#criteria, ADI rank is replaced with a code describing the suppression reason"

result_df['ADI_STATERNK'] = pd.to_numeric(result_df['ADI_STATERNK'], errors='coerce')

# Filter out rows where ADI_STATERNK is NaN
result_df = result_df[result_df['ADI_STATERNK'].notna()]

#Remove duplicate rows based on the BENE_ZIP_CD column
result_df = result_df.drop_duplicates(subset='BENE_ZIP_CD')

In [210]:
# Compute median ADI_STATERNK by county
median_adi = result_df.groupby('county')['ADI_STATERNK'].median()
result_df['Median'] = result_df['county'].map(median_adi)

result_df.dropna(subset=['county'], inplace=True)
result_df.dropna(thresh=3, inplace=True)

In [211]:
# assign ADI Groups by quintile
def assign_adi_group(median):
    if median >= 8.1:
        return "most disadvantaged"
    elif median >= 6.1:
        return "disadvantaged"
    elif median >= 4.1:
        return 'middle'
    elif median >= 2.1:
        return 'advantaged'
    else:
        return "least disadvantaged"

result_df['ADI_Group'] = result_df['Median'].apply(assign_adi_group) # apply the function to the reultdf
adi_dict = dict(zip(result_df['county'], result_df['ADI_Group'])) 


In [213]:
# This chunk of code is for quality assurance
print(len(adi_dict.keys())) # 72 counties
print(result_df)

fipscode_to_check = '55140'  # Replace with the fips code to check
value = adidf.loc[adidf['FIPS_short'] == fipscode_to_check, 'ADI_STATERNK'].values

# double check the adi state rank value by locating the zipcode
if len(value) > 0:
    print(f"The ADI_STATERNK value for FIPs code {county} {fipscode_to_check} is: {value[0]}")
else:
    print(f"No data available for FIPs code {fipscode_to_check}")

72
                  GISJOIN TYPE  BENE_ZIP_CD ADI_NATRANK  ADI_STATERNK  \
0         G55011700112002         530010001          57           5.0   
29        G55011700112002         530010002          57           5.0   
58        G55011700112002         530010003          57           5.0   
87        G55011700112002         530010004          57           5.0   
116       G55011700112002         530010005          57           5.0   
...                   ...  ...          ...         ...           ...   
39445074  G55013900022014         549869804          51           4.0   
39445096  G55013900022014         549869805          51           4.0   
39445118  G55013900022014         549869806          51           4.0   
39445140  G55013900022012         549869998          51           4.0   
39445162  G55013900022014         549869999          51           4.0   

                  FIPS     county  Median ADI_Group  
0         5.511701e+11  sheboygan     5.0    middle  
29        5.

In [214]:
# check the results of the adi grouping
result_df.to_csv('/Users/mavoeg/phylo_data/sept_2023/county-adi-totals.csv', sep=',')


# Calculate summary statistics
summary_stats = result_df.groupby('county').describe()

# Display summary statistics
print(summary_stats)

# Save to a CSV file
summary_stats.to_csv('/Users/mavoeg/Desktop/test.csv', sep=',')


          BENE_ZIP_CD                                                         \
                count          mean           std          min           25%   
county                                                                         
adams          6099.0  5.407408e+08  2.869871e+06  539100001.0  5.391094e+08   
ashland        6974.0  5.470107e+08  1.383446e+06  545140001.0  5.454601e+08   
barron        13603.0  5.480926e+08  1.005555e+06  540042200.0  5.480570e+08   
bayfield      11432.0  5.484388e+08  2.816516e+05  545173407.0  5.482091e+08   
brown         32768.0  5.424595e+08  8.054645e+05  541109200.0  5.416209e+08   
...               ...           ...           ...          ...           ...   
waukesha      62810.0  5.310198e+08  5.965055e+05  530050001.0  5.305139e+08   
waupaca       13314.0  5.495861e+08  3.529714e+05  541709400.0  5.494592e+08   
waushara       9452.0  5.496562e+08  4.217883e+05  539648300.0  5.496073e+08   
winnebago     20156.0  5.493416e+08  2.8

In [215]:
# METHOD 2 urban/rural high/low vax categories.

# Assigning counties to Rural High-Low categories, use both upper and lowercase

m2rural_mid = ['Jefferson', 'jefferson'] # rural county which is between high and low vaccination
m2urban_mid = ['Washington', 'washington'] # urban county which is between high and low vaccination
mid = ['Jefferson', 'jefferson', 'Washington', 'washington'] # mid vax group

m2rural_high = ["Ashland",
"Bayfield",
"Crawford",
"Door",
'Eau Claire',
"Forest",
"Iron",
'La Crosse',
"Lafayette", 
'Marathon',
"Manitowoc",
"Menominee", 
"Oneida",
'Outagamie',
"Portage",
"Price",
"Richland", "Racine",
'Rock',
"Sauk",
'Sheboygan',
"Trempealeau", 
"Vilas",
"Washburn",
'Winnebago',
"Wood", 'ashland',
'bayfield',
'crawford',
'door',
'eau claire',
'forest',
'iron',
'la crosse',
'lafayette',
'marathon',
'manitowoc',
'menominee',
'oneida',
'outagamie',
'portage',
'price',
'richland', 'racine',
'rock',
'sauk',
'sheboygan',
'trempealeau',
'vilas',
'washburn',
'winnebago',
'wood']

m2rural_low = ["Adams", "Barron", 
"Buffalo",
"Burnett",
'Calumet',
'Chippewa',
"Clark",
"Dodge", 
"Dunn",
"Florence",
'Fond du Lac',
"Grant",
"Green Lake",
"Jackson", 
"Juneau",
"Langlade",
"Lincoln",
"Marinette",
"Marquette", 
"Monroe",
"Pepin",
"Polk",
"Rusk",
"Sawyer",
"Shawano",
"Taylor",
"Vernon",
"Walworth",
"Waupaca",
"Waushara", 'adams',
'barron',
'buffalo',
'burnett',
'calumet',
'chippewa',
'clark',
'dodge',
'dunn',
'florence',
'fond du lac',
'grant',
'green lake',
'jackson',
'juneau',
'langlade',
'lincoln',
'marinette',
'marquette',
'monroe',
'pepin',
'polk',
'rusk',
'sawyer',
'shawano',
'taylor',
'vernon',
'walworth',
'waupaca',
'waushara']

# Assigning counties to Urban High-Low Cateogries 
m2urban_high = ['Brown',
'Columbia',
'Dane',
'Green',
'Iowa',
'Kenosha',
'Milwaukee',
'Ozaukee',
'Waukesha','brown', 'columbia', 'dane', 'green', 'iowa', 'kenosha', 'milwaukee', 'ozaukee', 'waukesha']

m2urban_low = ['Douglas',
'Kewaunee',
'Oconto',
'Pierce',
'St. Croix', 'Saint Croix', 
'douglas',
'kewaunee',
'oconto',
'pierce',
'st.croix',
'douglas', 'kewaunee', 'oconto', 'pierce', 
'st. croix', 'saint croix', 'saint croix',
'douglas', 'kewaunee', 'oconto', 'pierce', 'st.croix']


In [216]:
# from m1m2_urbrur_hilo_vaxgroup_seqcounts py script:
# use dictionary unpacking to create the vax groups
m2county_category_dict = {
    **{county: "rural high-vax" for county in m2rural_high},
    **{county: "rural low-vax" for county in m2rural_low},
    **{county: "urban high-vax" for county in m2urban_high},
    **{county: "urban low-vax" for county in m2urban_low},
    **{county: "mid-vax" for county in mid}
}


In [217]:
# Add columns m1_urb_rur and m2_urb_rur "urban", "rural" by the new urban rural definition using the lists above
#result_df['m2_urbrur_hilo'] = result_df.county.map(m2county_category_dict)
result_df['vax_group'] = result_df['county'].map(m2county_category_dict)


result_df['overall_group'] = result_df['vax_group'] + ' ' + result_df['ADI_Group']

In [218]:
print(result_df)

                  GISJOIN TYPE  BENE_ZIP_CD ADI_NATRANK  ADI_STATERNK  \
0         G55011700112002         530010001          57           5.0   
29        G55011700112002         530010002          57           5.0   
58        G55011700112002         530010003          57           5.0   
87        G55011700112002         530010004          57           5.0   
116       G55011700112002         530010005          57           5.0   
...                   ...  ...          ...         ...           ...   
39445074  G55013900022014         549869804          51           4.0   
39445096  G55013900022014         549869805          51           4.0   
39445118  G55013900022014         549869806          51           4.0   
39445140  G55013900022012         549869998          51           4.0   
39445162  G55013900022014         549869999          51           4.0   

                  FIPS     county  Median ADI_Group       vax_group  \
0         5.511701e+11  sheboygan     5.0    middle 

In [219]:
# group the dataframe by 'county'
grouped = result_df.groupby('county').first()

# make counties in county column lowercase
grouped.index = grouped.index.astype(str).str.lower()

# convert the grouped dataframe to dictionary
adi_dict = grouped[['overall_group', 'ADI_Group', 'Median', 'vax_group']].to_dict(orient='index')
print(adi_dict)

{'adams': {'overall_group': 'rural low-vax most disadvantaged', 'ADI_Group': 'most disadvantaged', 'Median': 9.0, 'vax_group': 'rural low-vax'}, 'ashland': {'overall_group': 'rural high-vax disadvantaged', 'ADI_Group': 'disadvantaged', 'Median': 8.0, 'vax_group': 'rural high-vax'}, 'barron': {'overall_group': 'rural low-vax disadvantaged', 'ADI_Group': 'disadvantaged', 'Median': 7.0, 'vax_group': 'rural low-vax'}, 'bayfield': {'overall_group': 'rural high-vax middle', 'ADI_Group': 'middle', 'Median': 5.0, 'vax_group': 'rural high-vax'}, 'brown': {'overall_group': 'urban high-vax advantaged', 'ADI_Group': 'advantaged', 'Median': 4.0, 'vax_group': 'urban high-vax'}, 'buffalo': {'overall_group': 'rural low-vax middle', 'ADI_Group': 'middle', 'Median': 6.0, 'vax_group': 'rural low-vax'}, 'burnett': {'overall_group': 'rural low-vax disadvantaged', 'ADI_Group': 'disadvantaged', 'Median': 7.0, 'vax_group': 'rural low-vax'}, 'calumet': {'overall_group': 'rural low-vax middle', 'ADI_Group': 'mi

In [220]:
adi_result = pd.DataFrame.from_dict(adi_dict, orient ='index')
# save dictionary results to file
adi_result.to_csv('/Users/mavoeg/phylo_data/sept_2023/adi_group_cbfip.csv', index=True)

