In [2]:
# dependencies
import pandas as pd
from sqlalchemy import create_engine

# path for config file (project root folder)
import sys
sys.path.append('../../../food-insecurity-machine-learning/')

from config import protocol, username, password, host, port, database_name

# National & State Summary - ETL

### Data to populate 5 charts comparing sociodemographic factors between US Census Tracts that are food insecure or not based on the LAhalfand10 flag: 
1. % low income population
2. % non-productive age groups (minors & seniors) by population
3. % race/ethnicity distribution by population
4. % households HUNV
5. % SNAP housing units

[documentation](https://www.ers.usda.gov/data-products/food-access-research-atlas/documentation/)


In [3]:
# Read the food access data from the S3 bucket into a DataFrame
df = pd.read_csv("https://gtbootcamp20230221.s3.amazonaws.com/FoodAccessResearchAtlasData2019.csv", dtype={'CensusTract': str})
df.head()

Unnamed: 0,CensusTract,State,County,Urban,Pop2010,OHU2010,GroupQuartersFlag,NUMGQTRS,PCTGQTRS,LILATracts_1And10,...,TractSeniors,TractWhite,TractBlack,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP
0,1001020100,Alabama,Autauga County,1,1912,693,0,0.0,0.0,0,...,221.0,1622.0,217.0,14.0,0.0,14.0,45.0,44.0,6.0,102.0
1,1001020200,Alabama,Autauga County,1,2170,743,0,181.0,8.34,1,...,214.0,888.0,1217.0,5.0,0.0,5.0,55.0,75.0,89.0,156.0
2,1001020300,Alabama,Autauga County,1,3373,1256,0,0.0,0.0,0,...,439.0,2576.0,647.0,17.0,5.0,11.0,117.0,87.0,99.0,172.0
3,1001020400,Alabama,Autauga County,1,4386,1722,0,0.0,0.0,0,...,904.0,4086.0,193.0,18.0,4.0,11.0,74.0,85.0,21.0,98.0
4,1001020500,Alabama,Autauga County,1,10766,4082,0,181.0,1.68,0,...,1126.0,8666.0,1437.0,296.0,9.0,48.0,310.0,355.0,230.0,339.0


In [4]:
# create state FIP column for mapping
df['StateFIPS'] = df['CensusTract'].str.slice(0,2)
df.head()

Unnamed: 0,CensusTract,State,County,Urban,Pop2010,OHU2010,GroupQuartersFlag,NUMGQTRS,PCTGQTRS,LILATracts_1And10,...,TractWhite,TractBlack,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP,StateFIPS
0,1001020100,Alabama,Autauga County,1,1912,693,0,0.0,0.0,0,...,1622.0,217.0,14.0,0.0,14.0,45.0,44.0,6.0,102.0,1
1,1001020200,Alabama,Autauga County,1,2170,743,0,181.0,8.34,1,...,888.0,1217.0,5.0,0.0,5.0,55.0,75.0,89.0,156.0,1
2,1001020300,Alabama,Autauga County,1,3373,1256,0,0.0,0.0,0,...,2576.0,647.0,17.0,5.0,11.0,117.0,87.0,99.0,172.0,1
3,1001020400,Alabama,Autauga County,1,4386,1722,0,0.0,0.0,0,...,4086.0,193.0,18.0,4.0,11.0,74.0,85.0,21.0,98.0,1
4,1001020500,Alabama,Autauga County,1,10766,4082,0,181.0,1.68,0,...,8666.0,1437.0,296.0,9.0,48.0,310.0,355.0,230.0,339.0,1


In [5]:
# show names of columns
print(df.columns.tolist())

['CensusTract', 'State', 'County', 'Urban', 'Pop2010', 'OHU2010', 'GroupQuartersFlag', 'NUMGQTRS', 'PCTGQTRS', 'LILATracts_1And10', 'LILATracts_halfAnd10', 'LILATracts_1And20', 'LILATracts_Vehicle', 'HUNVFlag', 'LowIncomeTracts', 'PovertyRate', 'MedianFamilyIncome', 'LA1and10', 'LAhalfand10', 'LA1and20', 'LATracts_half', 'LATracts1', 'LATracts10', 'LATracts20', 'LATractsVehicle_20', 'LAPOP1_10', 'LAPOP05_10', 'LAPOP1_20', 'LALOWI1_10', 'LALOWI05_10', 'LALOWI1_20', 'lapophalf', 'lapophalfshare', 'lalowihalf', 'lalowihalfshare', 'lakidshalf', 'lakidshalfshare', 'laseniorshalf', 'laseniorshalfshare', 'lawhitehalf', 'lawhitehalfshare', 'lablackhalf', 'lablackhalfshare', 'laasianhalf', 'laasianhalfshare', 'lanhopihalf', 'lanhopihalfshare', 'laaianhalf', 'laaianhalfshare', 'laomultirhalf', 'laomultirhalfshare', 'lahisphalf', 'lahisphalfshare', 'lahunvhalf', 'lahunvhalfshare', 'lasnaphalf', 'lasnaphalfshare', 'lapop1', 'lapop1share', 'lalowi1', 'lalowi1share', 'lakids1', 'lakids1share', 'lase

In [6]:
# function to extract unique values from text column
def unique_text(x):
    return ' '.join(list(set(x)))

In [7]:
# summarize total number of population or households in selected features by STATE and LAhalfand10 (flag for low food access)
state_summary_df = df.groupby(['State','LAhalfand10']).agg({
    'Pop2010': 'sum',
    'OHU2010': 'sum',
    'lapophalf': 'sum',
    'lalowihalf': 'sum',
    'lakidshalf': 'sum',
    'laseniorshalf': 'sum',
    'lawhitehalf': 'sum',
    'lablackhalf': 'sum',
    'laasianhalf': 'sum',
    'lanhopihalf': 'sum',
    'laaianhalf': 'sum',
    'laomultirhalf': 'sum',
    'lahisphalf': 'sum',
    'lahunvhalf': 'sum',
    'lasnaphalf': 'sum',
    'TractLOWI': 'sum',
    'TractKids': 'sum',
    'TractSeniors': 'sum',
    'TractWhite': 'sum',
    'TractBlack': 'sum',
    'TractAsian': 'sum',
    'TractNHOPI': 'sum',
    'TractAIAN': 'sum',
    'TractOMultir': 'sum',
    'TractHispanic': 'sum',
    'TractHUNV': 'sum',
    'TractSNAP': 'sum',
    'StateFIPS': unique_text
})

# state_summary_df['StateFIPS'] = df['StateFIPS']
# reset index; transform State and LAhalfand10 from index fields to columns
state_summary_df.reset_index(inplace=True)

# change column name
state_summary_df.rename(columns={'State':'Jurisdiction'}, inplace=True)

# display dataframe
state_summary_df

Unnamed: 0,Jurisdiction,LAhalfand10,Pop2010,OHU2010,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,TractWhite,TractBlack,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP,StateFIPS
0,Alabama,0,2011849,781527,1849451.0,686801.0,440820.0,260712.0,1498129.0,266327.0,...,1596873.0,321048.0,13113.0,1094.0,15662.0,64059.0,66205.0,38773.0,104751.0,01
1,Alabama,1,2767887,1102264,2247996.0,866493.0,536843.0,300285.0,1380105.0,746262.0,...,1678521.0,930263.0,40482.0,1963.0,12556.0,104102.0,119397.0,76587.0,164852.0,01
2,Alaska,0,186563,67331,162208.0,39637.0,43279.0,11935.0,123228.0,2684.0,...,136745.0,2989.0,5396.0,735.0,18974.0,14265.0,7935.0,4215.0,5371.0,02
3,Alaska,1,523668,190727,408808.0,103606.0,108282.0,30785.0,269025.0,14310.0,...,336630.0,20273.0,32721.0,6674.0,78812.0,48558.0,31307.0,18259.0,20484.0,02
4,Arizona,0,857778,321996,514911.0,162071.0,121937.0,83225.0,388682.0,12292.0,...,617568.0,31972.0,19973.0,2419.0,60861.0,124985.0,245099.0,24916.0,43053.0,04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97,West Virginia,1,921047,386789,750972.0,286984.0,156678.0,122285.0,694982.0,31467.0,...,846547.0,42880.0,8959.0,289.0,2015.0,20357.0,13740.0,34875.0,60530.0,54
98,Wisconsin,0,1953902,765486,1511624.0,335913.0,357276.0,218200.0,1436804.0,14898.0,...,1740172.0,93087.0,23524.0,561.0,23374.0,73184.0,107933.0,48700.0,81651.0,55
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,3161895.0,266061.0,105710.0,1266.0,31152.0,167000.0,228123.0,109439.0,187980.0,55
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,120061.0,640.0,756.0,103.0,8315.0,7895.0,13073.0,1580.0,3681.0,56


In [8]:
# summarize total number of population or households in selected features for US by LAhalfand10 (flag for low food access)
us_summary_df = df.groupby(['LAhalfand10']).agg({
    'Pop2010': 'sum',
    'OHU2010': 'sum',
    'lapophalf': 'sum',
    'lalowihalf': 'sum',
    'lakidshalf': 'sum',
    'laseniorshalf': 'sum',
    'lawhitehalf': 'sum',
    'lablackhalf': 'sum',
    'laasianhalf': 'sum',
    'lanhopihalf': 'sum',
    'laaianhalf': 'sum',
    'laomultirhalf': 'sum',
    'lahisphalf': 'sum',
    'lahunvhalf': 'sum',
    'lasnaphalf': 'sum',
    'TractLOWI': 'sum',
    'TractKids': 'sum',
    'TractSeniors': 'sum',
    'TractWhite': 'sum',
    'TractBlack': 'sum',
    'TractAsian': 'sum',
    'TractNHOPI': 'sum',
    'TractAIAN': 'sum',
    'TractOMultir': 'sum',
    'TractHispanic': 'sum',
    'TractHUNV': 'sum',
    'TractSNAP': 'sum'
})
# reset index; transform LAhalfand10 from index field to column
us_summary_df.reset_index(inplace=True)

# add Jurisdiction column
us_summary_df['Jurisdiction'] = 'United States'
us_summary_df['StateFIPS'] = '00'

# display data
us_summary_df


Unnamed: 0,LAhalfand10,Pop2010,OHU2010,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,laasianhalf,...,TractBlack,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP,Jurisdiction,StateFIPS
0,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,647229.0,...,10152811.0,3909156.0,129476.0,943163.0,8389823.0,14436336.0,4542964.0,4715999.0,United States,0
1,1,217613053,81982340,159273701.0,48027446.0,39061810.0,20715139.0,116738059.0,20745886.0,7173398.0,...,28776503.0,10765064.0,410535.0,1968964.0,19726331.0,36040953.0,5879872.0,9916554.0,United States,0


In [9]:
# calculate total population and total household by state and US
state_pop_df = df.groupby('State').agg({
    'Pop2010': 'sum',
    'OHU2010': 'sum'
})

us_total = {
    'Jurisdiction': ['United States'],
    'Pop_Total': [df.Pop2010.sum()],
    'OHU_Total': [df.OHU2010.sum()]
}

us_pop_df = pd.DataFrame(us_total)
state_pop_df.reset_index(inplace=True)
state_pop_df = state_pop_df.rename(columns={'State':'Jurisdiction','Pop2010': 'Pop_Total','OHU2010': 'OHU_Total'})
state_pop_df = state_pop_df.append(us_pop_df).reset_index(drop=True)

state_pop_df

  state_pop_df = state_pop_df.append(us_pop_df).reset_index(drop=True)


Unnamed: 0,Jurisdiction,Pop_Total,OHU_Total
0,Alabama,4779736,1883791
1,Alaska,710231,258058
2,Arizona,6392017,2380990
3,Arkansas,2915918,1147084
4,California,37253956,12577498
5,Colorado,5029196,1972868
6,Connecticut,3574097,1371087
7,Delaware,897934,342297
8,District of Columbia,601723,266707
9,Florida,18801310,7420802


In [10]:
# concatenate state and US total data
summary_df = pd.concat([state_summary_df,us_summary_df])
summary_df.reset_index(inplace=True, drop=True)
summary_df.tail()

Unnamed: 0,Jurisdiction,LAhalfand10,Pop2010,OHU2010,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,TractWhite,TractBlack,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP,StateFIPS
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,3161895.0,266061.0,105710.0,1266.0,31152.0,167000.0,228123.0,109439.0,187980.0,55
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,120061.0,640.0,756.0,103.0,8315.0,7895.0,13073.0,1580.0,3681.0,56
101,Wyoming,1,425856,174618,349661.0,93332.0,82496.0,43344.0,321720.0,3332.0,...,391218.0,4108.0,3670.0,324.0,5021.0,21515.0,37158.0,7293.0,9558.0,56
102,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,67587011.0,10152811.0,3909156.0,129476.0,943163.0,8389823.0,14436336.0,4542964.0,4715999.0,0
103,United States,1,217613053,81982340,159273701.0,48027446.0,39061810.0,20715139.0,116738059.0,20745886.0,...,155965656.0,28776503.0,10765064.0,410535.0,1968964.0,19726331.0,36040953.0,5879872.0,9916554.0,0


In [11]:
# merge total population and total households
summary_df = pd.merge(summary_df, state_pop_df, how='left', on='Jurisdiction')
summary_df.tail()

Unnamed: 0,Jurisdiction,LAhalfand10,Pop2010,OHU2010,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP,StateFIPS,Pop_Total,OHU_Total
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,105710.0,1266.0,31152.0,167000.0,228123.0,109439.0,187980.0,55,5686986,2279768
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,756.0,103.0,8315.0,7895.0,13073.0,1580.0,3681.0,56,563626,226879
101,Wyoming,1,425856,174618,349661.0,93332.0,82496.0,43344.0,321720.0,3332.0,...,3670.0,324.0,5021.0,21515.0,37158.0,7293.0,9558.0,56,563626,226879
102,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,3909156.0,129476.0,943163.0,8389823.0,14436336.0,4542964.0,4715999.0,0,308745538,116716292
103,United States,1,217613053,81982340,159273701.0,48027446.0,39061810.0,20715139.0,116738059.0,20745886.0,...,10765064.0,410535.0,1968964.0,19726331.0,36040953.0,5879872.0,9916554.0,0,308745538,116716292


In [12]:
# rename columns for population and households by category
summary_df = summary_df.rename(columns={
    'Pop2010': 'Pop_byFlag',
    'OHU2010': 'OHU_byFlag'
})

summary_df

Unnamed: 0,Jurisdiction,LAhalfand10,Pop_byFlag,OHU_byFlag,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP,StateFIPS,Pop_Total,OHU_Total
0,Alabama,0,2011849,781527,1849451.0,686801.0,440820.0,260712.0,1498129.0,266327.0,...,13113.0,1094.0,15662.0,64059.0,66205.0,38773.0,104751.0,01,4779736,1883791
1,Alabama,1,2767887,1102264,2247996.0,866493.0,536843.0,300285.0,1380105.0,746262.0,...,40482.0,1963.0,12556.0,104102.0,119397.0,76587.0,164852.0,01,4779736,1883791
2,Alaska,0,186563,67331,162208.0,39637.0,43279.0,11935.0,123228.0,2684.0,...,5396.0,735.0,18974.0,14265.0,7935.0,4215.0,5371.0,02,710231,258058
3,Alaska,1,523668,190727,408808.0,103606.0,108282.0,30785.0,269025.0,14310.0,...,32721.0,6674.0,78812.0,48558.0,31307.0,18259.0,20484.0,02,710231,258058
4,Arizona,0,857778,321996,514911.0,162071.0,121937.0,83225.0,388682.0,12292.0,...,19973.0,2419.0,60861.0,124985.0,245099.0,24916.0,43053.0,04,6392017,2380990
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,105710.0,1266.0,31152.0,167000.0,228123.0,109439.0,187980.0,55,5686986,2279768
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,756.0,103.0,8315.0,7895.0,13073.0,1580.0,3681.0,56,563626,226879
101,Wyoming,1,425856,174618,349661.0,93332.0,82496.0,43344.0,321720.0,3332.0,...,3670.0,324.0,5021.0,21515.0,37158.0,7293.0,9558.0,56,563626,226879
102,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,3909156.0,129476.0,943163.0,8389823.0,14436336.0,4542964.0,4715999.0,00,308745538,116716292


## Calculate percentages from totals and add to summary_df
1. % low income population 
    - TractLOWI/Pop2010
2. % non-productive age groups (minors & seniors) by population
    - TractKids/Pop2010
    - TractSeniors/Pop2010
    - (TractSeniors + TractKids)/Pop2010
3. % race/ethnicity distribution by population
    - TractWhite/Pop2010
    - TractBlack/Pop2010
    - TractAsian/Pop2010
    - TractNHOPI/Pop2010
    - TractAIAN/Pop2010
    - TractOMultir/Pop2010
    - TractHispanic/Pop2010
4. % households HUNV
    - TractHUNV/OHU2010
5. % SNAP housing units
    - TractSNAP/OHU2010

# % low income population 

In [13]:
# % low income distribution by population - statewide
summary_df['LOWI_per'] = summary_df['TractLOWI']/summary_df['Pop_byFlag']

# % low income distribution by population - within 1/2 radius of food market
summary_df['LOWI_perhalf'] = summary_df['lalowihalf']/summary_df['lapophalf']
summary_df.reset_index(inplace=True, drop=True)

summary_df

Unnamed: 0,Jurisdiction,LAhalfand10,Pop_byFlag,OHU_byFlag,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP,StateFIPS,Pop_Total,OHU_Total,LOWI_per,LOWI_perhalf
0,Alabama,0,2011849,781527,1849451.0,686801.0,440820.0,260712.0,1498129.0,266327.0,...,15662.0,64059.0,66205.0,38773.0,104751.0,01,4779736,1883791,0.373203,0.371354
1,Alabama,1,2767887,1102264,2247996.0,866493.0,536843.0,300285.0,1380105.0,746262.0,...,12556.0,104102.0,119397.0,76587.0,164852.0,01,4779736,1883791,0.376495,0.385451
2,Alaska,0,186563,67331,162208.0,39637.0,43279.0,11935.0,123228.0,2684.0,...,18974.0,14265.0,7935.0,4215.0,5371.0,02,710231,258058,0.227119,0.244359
3,Alaska,1,523668,190727,408808.0,103606.0,108282.0,30785.0,269025.0,14310.0,...,78812.0,48558.0,31307.0,18259.0,20484.0,02,710231,258058,0.256997,0.253434
4,Arizona,0,857778,321996,514911.0,162071.0,121937.0,83225.0,388682.0,12292.0,...,60861.0,124985.0,245099.0,24916.0,43053.0,04,6392017,2380990,0.404045,0.314755
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,31152.0,167000.0,228123.0,109439.0,187980.0,55,5686986,2279768,0.283872,0.278845
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,8315.0,7895.0,13073.0,1580.0,3681.0,56,563626,226879,0.286303,0.281291
101,Wyoming,1,425856,174618,349661.0,93332.0,82496.0,43344.0,321720.0,3332.0,...,5021.0,21515.0,37158.0,7293.0,9558.0,56,563626,226879,0.274999,0.266921
102,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,943163.0,8389823.0,14436336.0,4542964.0,4715999.0,00,308745538,116716292,0.344635,0.302097


# % non-productive age groups (minors & seniors) by population

In [14]:

# % non-productive age groups (minors & seniors) by population - statewide
kids_seniors = summary_df['TractKids'] + summary_df['TractSeniors']
summary_df['Kids_per'] = summary_df['TractKids']/summary_df['Pop_byFlag']
summary_df['Seniors_per'] = summary_df['TractSeniors']/summary_df['Pop_byFlag']
summary_df['Non_prod_ages_per'] = kids_seniors/summary_df['Pop_byFlag']

# % non-productive age groups (minors & seniors) by population - within 1/2 mile radius of food market
kids_seniors_half = summary_df['lakidshalf'] + summary_df['laseniorshalf']
summary_df['Kids_perhalf'] = summary_df['lakidshalf']/summary_df['lapophalf']
summary_df['Seniors_perhalf'] = summary_df['laseniorshalf']/summary_df['lapophalf']
summary_df['Non_prod_ages_perhalf'] = kids_seniors_half/summary_df['lapophalf']

summary_df.reset_index(inplace=True, drop=True)

summary_df

Unnamed: 0,Jurisdiction,LAhalfand10,Pop_byFlag,OHU_byFlag,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,Pop_Total,OHU_Total,LOWI_per,LOWI_perhalf,Kids_per,Seniors_per,Non_prod_ages_per,Kids_perhalf,Seniors_perhalf,Non_prod_ages_perhalf
0,Alabama,0,2011849,781527,1849451.0,686801.0,440820.0,260712.0,1498129.0,266327.0,...,4779736,1883791,0.373203,0.371354,0.237576,0.142030,0.379607,0.238352,0.140967,0.379319
1,Alabama,1,2767887,1102264,2247996.0,866493.0,536843.0,300285.0,1380105.0,746262.0,...,4779736,1883791,0.376495,0.385451,0.236459,0.134416,0.370875,0.238810,0.133579,0.372389
2,Alaska,0,186563,67331,162208.0,39637.0,43279.0,11935.0,123228.0,2684.0,...,710231,258058,0.227119,0.244359,0.247686,0.074506,0.322191,0.266812,0.073578,0.340390
3,Alaska,1,523668,190727,408808.0,103606.0,108282.0,30785.0,269025.0,14310.0,...,710231,258058,0.256997,0.253434,0.263656,0.077595,0.341251,0.264873,0.075304,0.340177
4,Arizona,0,857778,321996,514911.0,162071.0,121937.0,83225.0,388682.0,12292.0,...,6392017,2380990,0.404045,0.314755,0.243877,0.136837,0.380714,0.236812,0.161630,0.398442
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,5686986,2279768,0.283872,0.278845,0.235594,0.136675,0.372269,0.238946,0.135726,0.374671
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,563626,226879,0.286303,0.281291,0.262844,0.112652,0.375495,0.264359,0.113151,0.377510
101,Wyoming,1,425856,174618,349661.0,93332.0,82496.0,43344.0,321720.0,3332.0,...,563626,226879,0.274999,0.266921,0.232919,0.128142,0.361061,0.235931,0.123960,0.359891
102,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,308745538,116716292,0.344635,0.302097,0.232798,0.132378,0.365176,0.238118,0.143616,0.381734


# % race/ethnicity distribution by population

In [15]:
# % race/ethnicity distribution by population - statewide
summary_df['White_per'] = summary_df['TractWhite']/summary_df['Pop_byFlag']
summary_df['Black_per'] = summary_df['TractBlack']/summary_df['Pop_byFlag']
summary_df['Asian_per'] = summary_df['TractAsian']/summary_df['Pop_byFlag']
summary_df['NHOPI_per'] = summary_df['TractNHOPI']/summary_df['Pop_byFlag']
summary_df['AIAN_per'] = summary_df['TractAIAN']/summary_df['Pop_byFlag']
summary_df['Multi_Race_per'] = summary_df['TractOMultir']/summary_df['Pop_byFlag']
summary_df['Hispanic_per'] = summary_df['TractHispanic']/summary_df['Pop_byFlag']

# % race/ethnicity distribution by population - within 1/2 mile radius of food market
summary_df['White_perhalf'] = summary_df['lawhitehalf']/summary_df['lapophalf']
summary_df['Black_perhalf'] = summary_df['lablackhalf']/summary_df['lapophalf']
summary_df['Asian_perhalf'] = summary_df['laasianhalf']/summary_df['lapophalf']
summary_df['NHOPI_perhalf'] = summary_df['lanhopihalf']/summary_df['lapophalf']
summary_df['AIAN_perhalf'] = summary_df['laaianhalf']/summary_df['lapophalf']
summary_df['Multi_Race_perhalf'] = summary_df['laomultirhalf']/summary_df['lapophalf']
summary_df['Hispanic_perhalf'] = summary_df['lahisphalf']/summary_df['lapophalf']

summary_df.reset_index(inplace=True, drop=True)

summary_df

Unnamed: 0,Jurisdiction,LAhalfand10,Pop_byFlag,OHU_byFlag,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,AIAN_per,Multi_Race_per,Hispanic_per,White_perhalf,Black_perhalf,Asian_perhalf,NHOPI_perhalf,AIAN_perhalf,Multi_Race_perhalf,Hispanic_perhalf
0,Alabama,0,2011849,781527,1849451.0,686801.0,440820.0,260712.0,1498129.0,266327.0,...,0.007785,0.031841,0.032908,0.810040,0.144003,0.005857,0.000508,0.008039,0.031564,0.032556
1,Alabama,1,2767887,1102264,2247996.0,866493.0,536843.0,300285.0,1380105.0,746262.0,...,0.004536,0.037611,0.043137,0.613927,0.331968,0.013706,0.000681,0.004683,0.035040,0.039759
2,Alaska,0,186563,67331,162208.0,39637.0,43279.0,11935.0,123228.0,2684.0,...,0.101703,0.076462,0.042533,0.759691,0.016547,0.026429,0.003872,0.117522,0.075958,0.043339
3,Alaska,1,523668,190727,408808.0,103606.0,108282.0,30785.0,269025.0,14310.0,...,0.150500,0.092727,0.059784,0.658072,0.035004,0.055552,0.010073,0.153561,0.087716,0.056371
4,Arizona,0,857778,321996,514911.0,162071.0,121937.0,83225.0,388682.0,12292.0,...,0.070952,0.145708,0.285737,0.754853,0.023872,0.016690,0.003049,0.089011,0.112524,0.230376
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,0.008345,0.044735,0.061108,0.858946,0.064169,0.026089,0.000335,0.008436,0.042025,0.056931
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,0.060354,0.057306,0.094890,0.870798,0.004381,0.004962,0.000796,0.066556,0.052524,0.085627
101,Wyoming,1,425856,174618,349661.0,93332.0,82496.0,43344.0,321720.0,3332.0,...,0.011790,0.050522,0.087255,0.920091,0.009529,0.007722,0.000764,0.011594,0.050309,0.086109
102,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,0.010349,0.092062,0.158410,0.864191,0.067715,0.011578,0.000881,0.011713,0.043925,0.069396


# % households HUNV
* raw number of households within 1/2 mile radius of food market not available at state level

In [16]:
#% households HUNV distribution by housing units - statewide
summary_df['HUNV_per'] = summary_df['TractHUNV']/summary_df['OHU_byFlag']
summary_df.reset_index(inplace=True, drop=True)

summary_df

Unnamed: 0,Jurisdiction,LAhalfand10,Pop_byFlag,OHU_byFlag,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,Multi_Race_per,Hispanic_per,White_perhalf,Black_perhalf,Asian_perhalf,NHOPI_perhalf,AIAN_perhalf,Multi_Race_perhalf,Hispanic_perhalf,HUNV_per
0,Alabama,0,2011849,781527,1849451.0,686801.0,440820.0,260712.0,1498129.0,266327.0,...,0.031841,0.032908,0.810040,0.144003,0.005857,0.000508,0.008039,0.031564,0.032556,0.049612
1,Alabama,1,2767887,1102264,2247996.0,866493.0,536843.0,300285.0,1380105.0,746262.0,...,0.037611,0.043137,0.613927,0.331968,0.013706,0.000681,0.004683,0.035040,0.039759,0.069482
2,Alaska,0,186563,67331,162208.0,39637.0,43279.0,11935.0,123228.0,2684.0,...,0.076462,0.042533,0.759691,0.016547,0.026429,0.003872,0.117522,0.075958,0.043339,0.062601
3,Alaska,1,523668,190727,408808.0,103606.0,108282.0,30785.0,269025.0,14310.0,...,0.092727,0.059784,0.658072,0.035004,0.055552,0.010073,0.153561,0.087716,0.056371,0.095734
4,Arizona,0,857778,321996,514911.0,162071.0,121937.0,83225.0,388682.0,12292.0,...,0.145708,0.285737,0.754853,0.023872,0.016690,0.003049,0.089011,0.112524,0.230376,0.077380
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,0.044735,0.061108,0.858946,0.064169,0.026089,0.000335,0.008436,0.042025,0.056931,0.072271
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,0.057306,0.094890,0.870798,0.004381,0.004962,0.000796,0.066556,0.052524,0.085627,0.030233
101,Wyoming,1,425856,174618,349661.0,93332.0,82496.0,43344.0,321720.0,3332.0,...,0.050522,0.087255,0.920091,0.009529,0.007722,0.000764,0.011594,0.050309,0.086109,0.041765
102,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,0.092062,0.158410,0.864191,0.067715,0.011578,0.000881,0.011713,0.043925,0.069396,0.130793


# % SNAP housing units
* raw number of households within 1/2 mile radius of food market not available at state level

In [17]:
#% SNAP housing units by housing units
summary_df['SNAP_per'] = summary_df['TractSNAP']/summary_df['OHU_byFlag']
summary_df.reset_index(inplace=True, drop=True)

summary_df

Unnamed: 0,Jurisdiction,LAhalfand10,Pop_byFlag,OHU_byFlag,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,Hispanic_per,White_perhalf,Black_perhalf,Asian_perhalf,NHOPI_perhalf,AIAN_perhalf,Multi_Race_perhalf,Hispanic_perhalf,HUNV_per,SNAP_per
0,Alabama,0,2011849,781527,1849451.0,686801.0,440820.0,260712.0,1498129.0,266327.0,...,0.032908,0.810040,0.144003,0.005857,0.000508,0.008039,0.031564,0.032556,0.049612,0.134034
1,Alabama,1,2767887,1102264,2247996.0,866493.0,536843.0,300285.0,1380105.0,746262.0,...,0.043137,0.613927,0.331968,0.013706,0.000681,0.004683,0.035040,0.039759,0.069482,0.149558
2,Alaska,0,186563,67331,162208.0,39637.0,43279.0,11935.0,123228.0,2684.0,...,0.042533,0.759691,0.016547,0.026429,0.003872,0.117522,0.075958,0.043339,0.062601,0.079770
3,Alaska,1,523668,190727,408808.0,103606.0,108282.0,30785.0,269025.0,14310.0,...,0.059784,0.658072,0.035004,0.055552,0.010073,0.153561,0.087716,0.056371,0.095734,0.107400
4,Arizona,0,857778,321996,514911.0,162071.0,121937.0,83225.0,388682.0,12292.0,...,0.285737,0.754853,0.023872,0.016690,0.003049,0.089011,0.112524,0.230376,0.077380,0.133707
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99,Wisconsin,1,3733084,1514282,2837700.0,791278.0,678056.0,385149.0,2437432.0,182091.0,...,0.061108,0.858946,0.064169,0.026089,0.000335,0.008436,0.042025,0.056931,0.072271,0.124138
100,Wyoming,0,137770,52261,116879.0,32877.0,30898.0,13225.0,101778.0,512.0,...,0.094890,0.870798,0.004381,0.004962,0.000796,0.066556,0.052524,0.085627,0.030233,0.070435
101,Wyoming,1,425856,174618,349661.0,93332.0,82496.0,43344.0,321720.0,3332.0,...,0.087255,0.920091,0.009529,0.007722,0.000764,0.011594,0.050309,0.086109,0.041765,0.054737
102,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,0.158410,0.864191,0.067715,0.011578,0.000881,0.011713,0.043925,0.069396,0.130793,0.135775


In [18]:
# re-order table
summary_df = summary_df.sort_values(by=['StateFIPS','LAhalfand10']).reset_index(drop=True)
summary_df.head()

Unnamed: 0,Jurisdiction,LAhalfand10,Pop_byFlag,OHU_byFlag,lapophalf,lalowihalf,lakidshalf,laseniorshalf,lawhitehalf,lablackhalf,...,Hispanic_per,White_perhalf,Black_perhalf,Asian_perhalf,NHOPI_perhalf,AIAN_perhalf,Multi_Race_perhalf,Hispanic_perhalf,HUNV_per,SNAP_per
0,United States,0,91132485,34733952,55901364.0,16887651.0,13311123.0,8028355.0,48309471.0,3785381.0,...,0.15841,0.864191,0.067715,0.011578,0.000881,0.011713,0.043925,0.069396,0.130793,0.135775
1,United States,1,217613053,81982340,159273701.0,48027446.0,39061810.0,20715139.0,116738059.0,20745886.0,...,0.165619,0.73294,0.130253,0.045038,0.001732,0.009324,0.080718,0.146784,0.071721,0.12096
2,Alabama,0,2011849,781527,1849451.0,686801.0,440820.0,260712.0,1498129.0,266327.0,...,0.032908,0.81004,0.144003,0.005857,0.000508,0.008039,0.031564,0.032556,0.049612,0.134034
3,Alabama,1,2767887,1102264,2247996.0,866493.0,536843.0,300285.0,1380105.0,746262.0,...,0.043137,0.613927,0.331968,0.013706,0.000681,0.004683,0.03504,0.039759,0.069482,0.149558
4,Alaska,0,186563,67331,162208.0,39637.0,43279.0,11935.0,123228.0,2684.0,...,0.042533,0.759691,0.016547,0.026429,0.003872,0.117522,0.075958,0.043339,0.062601,0.07977


In [19]:
# Get df information
summary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 56 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   Jurisdiction           104 non-null    object 
 1   LAhalfand10            104 non-null    int64  
 2   Pop_byFlag             104 non-null    int64  
 3   OHU_byFlag             104 non-null    int64  
 4   lapophalf              104 non-null    float64
 5   lalowihalf             104 non-null    float64
 6   lakidshalf             104 non-null    float64
 7   laseniorshalf          104 non-null    float64
 8   lawhitehalf            104 non-null    float64
 9   lablackhalf            104 non-null    float64
 10  laasianhalf            104 non-null    float64
 11  lanhopihalf            104 non-null    float64
 12  laaianhalf             104 non-null    float64
 13  laomultirhalf          104 non-null    float64
 14  lahisphalf             104 non-null    float64
 15  lahunv

In [20]:
print(summary_df.columns.tolist())

['Jurisdiction', 'LAhalfand10', 'Pop_byFlag', 'OHU_byFlag', 'lapophalf', 'lalowihalf', 'lakidshalf', 'laseniorshalf', 'lawhitehalf', 'lablackhalf', 'laasianhalf', 'lanhopihalf', 'laaianhalf', 'laomultirhalf', 'lahisphalf', 'lahunvhalf', 'lasnaphalf', 'TractLOWI', 'TractKids', 'TractSeniors', 'TractWhite', 'TractBlack', 'TractAsian', 'TractNHOPI', 'TractAIAN', 'TractOMultir', 'TractHispanic', 'TractHUNV', 'TractSNAP', 'StateFIPS', 'Pop_Total', 'OHU_Total', 'LOWI_per', 'LOWI_perhalf', 'Kids_per', 'Seniors_per', 'Non_prod_ages_per', 'Kids_perhalf', 'Seniors_perhalf', 'Non_prod_ages_perhalf', 'White_per', 'Black_per', 'Asian_per', 'NHOPI_per', 'AIAN_per', 'Multi_Race_per', 'Hispanic_per', 'White_perhalf', 'Black_perhalf', 'Asian_perhalf', 'NHOPI_perhalf', 'AIAN_perhalf', 'Multi_Race_perhalf', 'Hispanic_perhalf', 'HUNV_per', 'SNAP_per']


In [21]:
summary_df = summary_df.reindex(columns=['StateFIPS','Jurisdiction','Pop_Total','OHU_Total','Pop_byFlag','OHU_byFlag','lapophalf','LAhalfand10',
                        'TractLOWI','TractKids','TractSeniors','TractWhite','TractBlack','TractAsian','TractNHOPI','TractAIAN','TractOMultir','TractHispanic','TractHUNV','TractSNAP',
                        'LOWI_per','Kids_per','Seniors_per','Non_prod_ages_per','White_per','Black_per','Asian_per','NHOPI_per','AIAN_per','Multi_Race_per','Hispanic_per','HUNV_per', 'SNAP_per',
                        'lalowihalf','lakidshalf','laseniorshalf','lawhitehalf','lablackhalf','laasianhalf','lanhopihalf','laaianhalf','laomultirhalf','lahisphalf','lahunvhalf','lasnaphalf',
                        'LOWI_perhalf','Kids_perhalf','Seniors_perhalf','Non_prod_ages_perhalf','White_perhalf','Black_perhalf','Asian_perhalf','NHOPI_perhalf','AIAN_perhalf','Multi_Race_perhalf','Hispanic_perhalf'])
summary_df.head()

Unnamed: 0,StateFIPS,Jurisdiction,Pop_Total,OHU_Total,Pop_byFlag,OHU_byFlag,lapophalf,LAhalfand10,TractLOWI,TractKids,...,Kids_perhalf,Seniors_perhalf,Non_prod_ages_perhalf,White_perhalf,Black_perhalf,Asian_perhalf,NHOPI_perhalf,AIAN_perhalf,Multi_Race_perhalf,Hispanic_perhalf
0,0,United States,308745538,116716292,91132485,34733952,55901364.0,0,31407468.0,21215438.0,...,0.238118,0.143616,0.381734,0.864191,0.067715,0.011578,0.000881,0.011713,0.043925,0.069396
1,0,United States,308745538,116716292,217613053,81982340,159273701.0,1,69046369.0,52957586.0,...,0.24525,0.13006,0.37531,0.73294,0.130253,0.045038,0.001732,0.009324,0.080718,0.146784
2,1,Alabama,4779736,1883791,2011849,781527,1849451.0,0,750829.0,477968.0,...,0.238352,0.140967,0.379319,0.81004,0.144003,0.005857,0.000508,0.008039,0.031564,0.032556
3,1,Alabama,4779736,1883791,2767887,1102264,2247996.0,1,1042096.0,654491.0,...,0.23881,0.133579,0.372389,0.613927,0.331968,0.013706,0.000681,0.004683,0.03504,0.039759
4,2,Alaska,710231,258058,186563,67331,162208.0,0,42372.0,46209.0,...,0.266812,0.073578,0.34039,0.759691,0.016547,0.026429,0.003872,0.117522,0.075958,0.043339


# Load data to AWS PostgreSQL

In [28]:
# Connection to AWS PostgresSQL Server
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [29]:
# Confirm tables names
engine.table_names()

  engine.table_names()


['summary',
 'state',
 'food_access_1',
 'food_access_2',
 'food_access_3',
 'viz_data']

In [30]:
# Export the transformed data into the PostgresSQL
summary_df.to_sql('summary', engine, if_exists='replace', index=False)

104

In [32]:
# Read the data from Postgres table
pd.read_sql_query('select * from summary', engine).head()

Unnamed: 0,StateFIPS,Jurisdiction,Pop_Total,OHU_Total,Pop_byFlag,OHU_byFlag,lapophalf,LAhalfand10,TractLOWI,TractKids,...,Kids_perhalf,Seniors_perhalf,Non_prod_ages_perhalf,White_perhalf,Black_perhalf,Asian_perhalf,NHOPI_perhalf,AIAN_perhalf,Multi_Race_perhalf,Hispanic_perhalf
0,0,United States,308745538,116716292,91132485,34733952,55901364.0,0,31407468.0,21215438.0,...,0.238118,0.143616,0.381734,0.864191,0.067715,0.011578,0.000881,0.011713,0.043925,0.069396
1,0,United States,308745538,116716292,217613053,81982340,159273701.0,1,69046369.0,52957586.0,...,0.24525,0.13006,0.37531,0.73294,0.130253,0.045038,0.001732,0.009324,0.080718,0.146784
2,1,Alabama,4779736,1883791,2011849,781527,1849451.0,0,750829.0,477968.0,...,0.238352,0.140967,0.379319,0.81004,0.144003,0.005857,0.000508,0.008039,0.031564,0.032556
3,1,Alabama,4779736,1883791,2767887,1102264,2247996.0,1,1042096.0,654491.0,...,0.23881,0.133579,0.372389,0.613927,0.331968,0.013706,0.000681,0.004683,0.03504,0.039759
4,2,Alaska,710231,258058,186563,67331,162208.0,0,42372.0,46209.0,...,0.266812,0.073578,0.34039,0.759691,0.016547,0.026429,0.003872,0.117522,0.075958,0.043339
