In [1]:
# dependencies
import pandas as pd
from sqlalchemy import create_engine

# path for config file (project root folder)
import sys
sys.path.append('../../../food-insecurity-machine-learning/')

from config import protocol, username, password, host, port, database_name

# Extract, Transform, Load (ETL)

In [2]:
# Read the food access data from the S3 bucket into a DataFrame
# food_access_raw_df = pd.read_csv("FoodAccessResearchAtlasData2019.csv", dtype={'CensusTract': str})
raw_df = pd.read_csv("https://gtbootcamp20230221.s3.amazonaws.com/FoodAccessResearchAtlasData2019.csv", dtype={'CensusTract': str})
raw_df.head()

Unnamed: 0,CensusTract,State,County,Urban,Pop2010,OHU2010,GroupQuartersFlag,NUMGQTRS,PCTGQTRS,LILATracts_1And10,...,TractSeniors,TractWhite,TractBlack,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP
0,1001020100,Alabama,Autauga County,1,1912,693,0,0.0,0.0,0,...,221.0,1622.0,217.0,14.0,0.0,14.0,45.0,44.0,6.0,102.0
1,1001020200,Alabama,Autauga County,1,2170,743,0,181.0,8.34,1,...,214.0,888.0,1217.0,5.0,0.0,5.0,55.0,75.0,89.0,156.0
2,1001020300,Alabama,Autauga County,1,3373,1256,0,0.0,0.0,0,...,439.0,2576.0,647.0,17.0,5.0,11.0,117.0,87.0,99.0,172.0
3,1001020400,Alabama,Autauga County,1,4386,1722,0,0.0,0.0,0,...,904.0,4086.0,193.0,18.0,4.0,11.0,74.0,85.0,21.0,98.0
4,1001020500,Alabama,Autauga County,1,10766,4082,0,181.0,1.68,0,...,1126.0,8666.0,1437.0,296.0,9.0,48.0,310.0,355.0,230.0,339.0


In [3]:
# Create "StateFIPS" column by slicing "CensusTract" column 
raw_df["StateFIPS"] = raw_df["CensusTract"].str.slice(0, 2)
raw_df.head()

Unnamed: 0,CensusTract,State,County,Urban,Pop2010,OHU2010,GroupQuartersFlag,NUMGQTRS,PCTGQTRS,LILATracts_1And10,...,TractWhite,TractBlack,TractAsian,TractNHOPI,TractAIAN,TractOMultir,TractHispanic,TractHUNV,TractSNAP,StateFIPS
0,1001020100,Alabama,Autauga County,1,1912,693,0,0.0,0.0,0,...,1622.0,217.0,14.0,0.0,14.0,45.0,44.0,6.0,102.0,1
1,1001020200,Alabama,Autauga County,1,2170,743,0,181.0,8.34,1,...,888.0,1217.0,5.0,0.0,5.0,55.0,75.0,89.0,156.0,1
2,1001020300,Alabama,Autauga County,1,3373,1256,0,0.0,0.0,0,...,2576.0,647.0,17.0,5.0,11.0,117.0,87.0,99.0,172.0,1
3,1001020400,Alabama,Autauga County,1,4386,1722,0,0.0,0.0,0,...,4086.0,193.0,18.0,4.0,11.0,74.0,85.0,21.0,98.0,1
4,1001020500,Alabama,Autauga County,1,10766,4082,0,181.0,1.68,0,...,8666.0,1437.0,296.0,9.0,48.0,310.0,355.0,230.0,339.0,1


In [4]:
# Calculate percentage population of each group
raw_df[["TractLOWI_PCT", "TractKids_PCT", "TractSeniors_PCT", "TractWhite_PCT",
                "TractBlack_PCT", "TractAsian_PCT", "TractNHOPI_PCT", "TractAIAN_PCT",
                "TractOMultir_PCT", "TractHispanic_PCT"]] = raw_df[["TractLOWI", "TractKids", "TractSeniors", "TractWhite",
                                     "TractBlack", "TractAsian", "TractNHOPI", "TractAIAN", "TractOMultir",
                                     "TractHispanic"]]\
    .apply(lambda x: x/raw_df['Pop2010']*100).round(2)

# Calculate percentage TractHUNV and TractSNAP  group
raw_df[["TractHUNV_PCT", "TractSNAP_PCT"]] = raw_df[["TractHUNV", "TractSNAP"]]\
    .apply(lambda x: x/raw_df['OHU2010']*100).round(2)

# Display Dataframe
raw_df.head()

Unnamed: 0,CensusTract,State,County,Urban,Pop2010,OHU2010,GroupQuartersFlag,NUMGQTRS,PCTGQTRS,LILATracts_1And10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
0,1001020100,Alabama,Autauga County,1,1912,693,0,0.0,0.0,0,...,11.56,84.83,11.35,0.73,0.0,0.73,2.35,2.3,0.87,14.72
1,1001020200,Alabama,Autauga County,1,2170,743,0,181.0,8.34,1,...,9.86,40.92,56.08,0.23,0.0,0.23,2.53,3.46,11.98,21.0
2,1001020300,Alabama,Autauga County,1,3373,1256,0,0.0,0.0,0,...,13.02,76.37,19.18,0.5,0.15,0.33,3.47,2.58,7.88,13.69
3,1001020400,Alabama,Autauga County,1,4386,1722,0,0.0,0.0,0,...,20.61,93.16,4.4,0.41,0.09,0.25,1.69,1.94,1.22,5.69
4,1001020500,Alabama,Autauga County,1,10766,4082,0,181.0,1.68,0,...,10.46,80.49,13.35,2.75,0.08,0.45,2.88,3.3,5.63,8.3


# 1st DataFrame (71,782 rows)
* Raw data includes total populations and households in each category per tract
* We included calculations for percentage of population or household per tract
* Null values are removed

In [5]:
# Create a subset dataframe with selected columns
df1 = raw_df[["CensusTract", "StateFIPS", "State", "County", "Urban", "Pop2010", "OHU2010", "PovertyRate",
                                            "MedianFamilyIncome", "LAhalfand10", "TractLOWI", "TractKids", "TractSeniors", "TractWhite",
                                            "TractBlack", "TractAsian", "TractNHOPI", "TractAIAN", "TractOMultir",
                                            "TractHispanic", "TractHUNV", "TractSNAP", "TractLOWI_PCT", "TractKids_PCT", "TractSeniors_PCT", "TractWhite_PCT",
                                            "TractBlack_PCT", "TractAsian_PCT", "TractNHOPI_PCT", "TractAIAN_PCT",
                                            "TractOMultir_PCT", "TractHispanic_PCT", "TractHUNV_PCT", "TractSNAP_PCT"]]
df1.tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
72526,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
72527,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
72528,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
72529,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
72530,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47


In [6]:
# drop rows with NAN values
df1 = df1.dropna()
 
# reset the index
df1 = df1.reset_index(drop = True)

# display data
df1.tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
71777,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
71778,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
71779,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
71780,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
71781,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47


# 2nd DataFrame (7,708 rows)
* Raw data includes total population and households in each category per tract as well as percentage shares per tract for population within a 1/2 mile and 10 miles of a food/grocery store
* We included calculations for percentage of population or household per tract
* Null values are removed

In [7]:
# select columns
df2 = raw_df[["CensusTract", "StateFIPS", "State", "County", "Urban", "Pop2010", "OHU2010", "PovertyRate", "MedianFamilyIncome", "LAhalfand10", 
              "lapophalfshare", "lalowihalfshare", "lakidshalfshare", "laseniorshalfshare", "lawhitehalfshare", "lablackhalfshare", "laasianhalfshare", "lanhopihalfshare", "laaianhalfshare", "laomultirhalfshare", "lahisphalfshare", "lahunvhalfshare", "lasnaphalfshare",
              "lapop10share", "lalowi10share", "lakids10share", "laseniors10share", "lawhite10share", "lablack10share", "laasian10share", "lanhopi10share", "laaian10share", "laomultir10share", "lahisp10share", "lahunv10share", "lasnap10share",
              "TractLOWI", "TractKids", "TractSeniors", "TractWhite", "TractBlack", "TractAsian", "TractNHOPI", "TractAIAN", "TractOMultir","TractHispanic", "TractHUNV", "TractSNAP", 
              "TractLOWI_PCT", "TractKids_PCT", "TractSeniors_PCT", "TractWhite_PCT","TractBlack_PCT", "TractAsian_PCT", "TractNHOPI_PCT", "TractAIAN_PCT","TractOMultir_PCT", "TractHispanic_PCT", "TractHUNV_PCT", "TractSNAP_PCT"]]
df2.tail()


Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
72526,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
72527,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
72528,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
72529,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
72530,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47


In [8]:
# drop rows with NAN values
df2 = df2.dropna()

# reset index
df2 = df2.reset_index(drop=True)
df2.tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
7703,56041975200,56,Wyoming,Uinta County,0,6505,2340,6.4,91350.0,0,...,9.16,96.88,0.22,0.2,0.17,0.57,1.97,2.87,3.03,3.46
7704,56041975300,56,Wyoming,Uinta County,0,7761,2696,13.6,62445.0,0,...,7.33,90.86,0.27,0.37,0.3,0.82,7.37,10.27,3.97,9.46
7705,56041975400,56,Wyoming,Uinta County,0,6852,2632,17.3,57248.0,0,...,10.35,89.9,0.29,0.28,0.03,0.98,8.52,12.71,4.71,8.24
7706,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
7707,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57


In [9]:
print(df2.columns.tolist())

['CensusTract', 'StateFIPS', 'State', 'County', 'Urban', 'Pop2010', 'OHU2010', 'PovertyRate', 'MedianFamilyIncome', 'LAhalfand10', 'lapophalfshare', 'lalowihalfshare', 'lakidshalfshare', 'laseniorshalfshare', 'lawhitehalfshare', 'lablackhalfshare', 'laasianhalfshare', 'lanhopihalfshare', 'laaianhalfshare', 'laomultirhalfshare', 'lahisphalfshare', 'lahunvhalfshare', 'lasnaphalfshare', 'lapop10share', 'lalowi10share', 'lakids10share', 'laseniors10share', 'lawhite10share', 'lablack10share', 'laasian10share', 'lanhopi10share', 'laaian10share', 'laomultir10share', 'lahisp10share', 'lahunv10share', 'lasnap10share', 'TractLOWI', 'TractKids', 'TractSeniors', 'TractWhite', 'TractBlack', 'TractAsian', 'TractNHOPI', 'TractAIAN', 'TractOMultir', 'TractHispanic', 'TractHUNV', 'TractSNAP', 'TractLOWI_PCT', 'TractKids_PCT', 'TractSeniors_PCT', 'TractWhite_PCT', 'TractBlack_PCT', 'TractAsian_PCT', 'TractNHOPI_PCT', 'TractAIAN_PCT', 'TractOMultir_PCT', 'TractHispanic_PCT', 'TractHUNV_PCT', 'TractSNAP_P

In [10]:
df2.describe()

Unnamed: 0,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,lapophalfshare,lalowihalfshare,lakidshalfshare,laseniorshalfshare,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
count,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,...,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0,7708.0
mean,0.048262,3817.423197,1455.493254,14.96104,62870.473145,0.457577,94.535147,33.729872,21.895384,15.241136,...,16.219402,84.767547,6.895898,0.55137,0.086785,3.118605,4.579817,7.511745,4.597898,12.03598
std,0.214332,1847.27087,645.236776,8.30629,16929.294665,0.498229,11.404792,12.676014,5.026505,5.079622,...,5.138068,19.210586,14.787106,1.709641,0.771337,12.062965,6.155449,14.103786,4.22931,7.658584
min,0.0,24.0,12.0,0.0,14539.0,0.0,12.71,0.0,1.16,0.0,...,0.0,0.39,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,2486.75,984.75,9.1,51535.5,0.0,95.815,24.55,19.0675,12.33,...,13.22,79.545,0.23,0.14,0.0,0.22,1.35,1.12,2.11,6.5675
50%,0.0,3520.5,1361.0,13.4,61462.0,0.0,100.0,32.645,22.38,14.94,...,15.755,93.2,0.56,0.27,0.0,0.46,2.26,2.19,3.67,10.6
75%,0.0,4817.0,1827.25,18.9,72163.75,1.0,100.0,41.7225,24.75,17.73,...,18.79,97.24,4.07,0.51,0.06,1.03,5.09,6.21,5.92,15.9125
max,1.0,37452.0,6395.0,69.7,231287.0,1.0,100.0,87.24,49.87,59.25,...,59.25,100.0,92.69,55.66,28.2,98.89,81.15,99.24,85.42,62.79


# 3rd DataFrame (67,286 rows)
* This is the option selected for analysis machine learning models
* Raw data includes total population and households in each category per tract as well as percentage shares per tract for population within a 1/2 mile of a food/grocery store
* We included calculations for percentage of population or household per tract
* Null values are removed

In [11]:
# select columns
df3 = raw_df[["CensusTract", "StateFIPS", "State", "County", "Urban", "Pop2010", "OHU2010", "PovertyRate", "MedianFamilyIncome", "LAhalfand10", 
              "lapophalfshare", "lalowihalfshare", "lakidshalfshare", "laseniorshalfshare", "lawhitehalfshare", "lablackhalfshare", "laasianhalfshare", "lanhopihalfshare", "laaianhalfshare", "laomultirhalfshare", "lahisphalfshare", "lahunvhalfshare", "lasnaphalfshare",
              "TractLOWI", "TractKids", "TractSeniors", "TractWhite", "TractBlack", "TractAsian", "TractNHOPI", "TractAIAN", "TractOMultir","TractHispanic", "TractHUNV", "TractSNAP", 
              "TractLOWI_PCT", "TractKids_PCT", "TractSeniors_PCT", "TractWhite_PCT","TractBlack_PCT", "TractAsian_PCT", "TractNHOPI_PCT", "TractAIAN_PCT","TractOMultir_PCT", "TractHispanic_PCT", "TractHUNV_PCT", "TractSNAP_PCT"]]
df3.tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
72526,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
72527,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
72528,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
72529,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
72530,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47


In [12]:
# drop rows with NAN values
df3 = df3.dropna()

# reset index
df3 = df3.reset_index(drop=True)
df3.tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
67281,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
67282,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
67283,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
67284,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
67285,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47


# State reference table

In [13]:
state_df = raw_df.drop_duplicates(subset=['State','StateFIPS'], keep='first').reset_index(drop=True)
state_df = state_df[['StateFIPS', 'State']]
# state_df.drop_duplicates()
state_df.tail()

Unnamed: 0,StateFIPS,State
46,51,Virginia
47,53,Washington
48,54,West Virginia
49,55,Wisconsin
50,56,Wyoming


# Data for visualizations (72531 records)
* The schema is the same as the 3rd dataframe - option selected for analysis machine learning models
* Raw data includes total population and households in each category per tract as well as percentage shares per tract for population within a 1/2 mile of a food/grocery store
* We included calculations for percentage of population or household per tract
* Null values are NOT removed

In [14]:
viz_df = raw_df[["CensusTract", "StateFIPS", "State", "County", "Urban", "Pop2010", "OHU2010", "PovertyRate", "MedianFamilyIncome", "LAhalfand10", 
              "lapophalfshare", "lalowihalfshare", "lakidshalfshare", "laseniorshalfshare", "lawhitehalfshare", "lablackhalfshare", "laasianhalfshare", "lanhopihalfshare", "laaianhalfshare", "laomultirhalfshare", "lahisphalfshare", "lahunvhalfshare", "lasnaphalfshare",
              "TractLOWI", "TractKids", "TractSeniors", "TractWhite", "TractBlack", "TractAsian", "TractNHOPI", "TractAIAN", "TractOMultir","TractHispanic", "TractHUNV", "TractSNAP", 
              "TractLOWI_PCT", "TractKids_PCT", "TractSeniors_PCT", "TractWhite_PCT","TractBlack_PCT", "TractAsian_PCT", "TractNHOPI_PCT", "TractAIAN_PCT","TractOMultir_PCT", "TractHispanic_PCT", "TractHUNV_PCT", "TractSNAP_PCT"]]
viz_df.tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
72526,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
72527,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
72528,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
72529,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
72530,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47


# Load data to AWS Postgres server

In [15]:
# connect to postgres database
rds_connection_string = f'{protocol}://{username}:{password}@{host}:{port}/{database_name}'
engine = create_engine(rds_connection_string)

In [16]:
# drop dependent objects if they exist
with engine.connect() as conn:
    conn.execute('DROP TABLE IF EXISTS state CASCADE')

In [17]:
# load state dataframe to rds
state_df.to_sql('state', engine, if_exists='replace', index=False, method='multi')

In [18]:
# Read the data from Postgres table
pd.read_sql_query('select * from state', engine).tail()

Unnamed: 0,StateFIPS,State
46,51,Virginia
47,53,Washington
48,54,West Virginia
49,55,Wisconsin
50,56,Wyoming


In [19]:
# load 1st dataframe to rds
df1.to_sql('food_access_1', engine, if_exists='replace', index=False)

In [20]:
# Read the data from Postgres table
pd.read_sql_query('select * from food_access_1', engine).tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
71777,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
71778,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
71779,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
71780,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
71781,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47


In [21]:
# load 2nd dataframe to rds
df2.to_sql('food_access_2', engine, if_exists='replace', index=False)

In [22]:
# Read the data from Postgres table
pd.read_sql_query('select * from food_access_2', engine).tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
7703,56041975200,56,Wyoming,Uinta County,0,6505,2340,6.4,91350.0,0,...,9.16,96.88,0.22,0.2,0.17,0.57,1.97,2.87,3.03,3.46
7704,56041975300,56,Wyoming,Uinta County,0,7761,2696,13.6,62445.0,0,...,7.33,90.86,0.27,0.37,0.3,0.82,7.37,10.27,3.97,9.46
7705,56041975400,56,Wyoming,Uinta County,0,6852,2632,17.3,57248.0,0,...,10.35,89.9,0.29,0.28,0.03,0.98,8.52,12.71,4.71,8.24
7706,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
7707,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57


In [23]:
# load 3rd dataframe to rds
df3.to_sql('food_access_3', engine, if_exists='replace', index=False)

In [24]:
# Read the data from Postgres table
pd.read_sql_query('select * from food_access_3', engine).tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
67281,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
67282,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
67283,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
67284,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
67285,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47


In [25]:
# load raw data to the Postgres table
viz_df.to_sql('viz_data', engine, if_exists='replace', index=False)

In [26]:
# Read the data from Postgres table
pd.read_sql_query('select * from viz_data', engine).tail()

Unnamed: 0,CensusTract,StateFIPS,State,County,Urban,Pop2010,OHU2010,PovertyRate,MedianFamilyIncome,LAhalfand10,...,TractSeniors_PCT,TractWhite_PCT,TractBlack_PCT,TractAsian_PCT,TractNHOPI_PCT,TractAIAN_PCT,TractOMultir_PCT,TractHispanic_PCT,TractHUNV_PCT,TractSNAP_PCT
72526,56043000200,56,Wyoming,Washakie County,0,3326,1317,9.7,67254.0,1,...,17.83,93.39,0.18,0.45,0.0,0.81,5.17,9.29,4.63,4.86
72527,56043000301,56,Wyoming,Washakie County,1,2665,1154,11.6,64152.0,1,...,14.97,89.19,0.19,0.86,0.0,1.5,8.26,16.74,7.63,3.55
72528,56043000302,56,Wyoming,Washakie County,1,2542,1021,16.3,69605.0,1,...,20.3,90.95,0.43,0.39,0.04,1.02,7.16,16.01,2.25,6.27
72529,56045951100,56,Wyoming,Weston County,0,3314,1322,17.5,74500.0,1,...,15.06,95.93,0.45,0.3,0.03,1.42,1.87,2.75,3.56,2.57
72530,56045951300,56,Wyoming,Weston County,1,3894,1699,17.3,76838.0,1,...,16.69,95.17,0.15,0.26,0.05,1.13,3.24,3.21,2.0,6.47
