# Data Collection and Cleaning
## Projecting US Food Insecurity in 2020
### By Khyatee Desai

In [1]:
# import necessary libraries
import pandas as pd
import numpy as np
import os
from zipfile import ZipFile 
set_config(print_changed_only=False, display=None)
pd.set_option('display.max_columns', None)
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_style('darkgrid')
import warnings
warnings.filterwarnings('ignore')

# 1. Feeding America Datasets
### Import all the files

In [155]:
directory = "../datasets/feeding_america/"

df_FA_09 = pd.read_excel(directory+'FA_2011_2009.xlsx')
df_FA_10 = pd.read_excel(directory+'FA_2012_2010.xlsx')
df_FA_11 = pd.read_excel(directory+'FA_2013_2011.xlsx')
df_FA_12 = pd.read_excel(directory+'FA_2014_2012.xlsx')
df_FA_13 = pd.read_excel(directory+'FA_2015_2013.xlsx')
df_FA_14 = pd.read_excel(directory+'FA_2016_2014.xlsx')
df_FA_15 = pd.read_excel(directory+'FA_2017_2015.xlsx')
df_FA_16 = pd.read_excel(directory+'FA_2018_2016.xlsx')
df_FA_17 = pd.read_excel(directory+'FA_2019_2017.xlsx')
df_FA_18 = pd.read_excel(directory+'FA_2020_2018.xlsx',header=1)
df_FAprojection_20 = pd.read_excel(directory+'projection_10.2020.xlsx')


In [159]:
df_FA_09 = df_FA_09.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
            '% FI Btwn Thresholds','% FI > High Threshold', '% of children in FI HH with HH incomes at or below 185% FPL',
              'Number Food Insecure Children','% of children in FI HH with HH incomes above 185% FPL'], axis=1)

In [160]:
df_FA_10 = df_FA_10.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
              '% FI Btwn Thresholds', 'Number of Food Insecure Children in 2010 ',
              '% FI > High Threshold', '% food insecure children in HH w/ HH incomes below 185 FPL',
               '% of food insecure children in HH w/ HH incomes above 185 FPL'], axis=1)

In [161]:
df_FA_11 = df_FA_11.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
                          '% FI Btwn Thresholds', 'Number of Food Insecure Children in 2011',
              '% FI > High Threshold', '% food insecure children in HH w/ HH incomes below 185 FPL',
               '% of food insecure children in HH w/ HH incomes above 185 FPL'], axis=1)

In [162]:
df_FA_12 = df_FA_12.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
                '% FI Btwn Thresholds','% FI > High Threshold', '# of Food Insecure Children in 2012',
               '% food insecure Children in HH w/HH Incomes Below 185 FPL in 2012',
               '% food insecure Children in HH w/HH Incomes Above 185 FPL in 2012'], axis=1)

In [163]:
df_FA_13 = df_FA_13.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
                '% FI Btwn Thresholds','% FI > High Threshold', '# of Food Insecure Children in 2013',
               '% food insecure Children in HH w/HH Incomes Below 185 FPL in 2013',
               '% food insecure Children in HH w/HH Incomes Above 185 FPL in 2013'], axis=1)

In [164]:
df_FA_14 = df_FA_14.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
                '% FI Btwn Thresholds', '% FI > High Threshold', '# of Food Insecure Children in 2014',
               '% food insecure children in HH w/ HH incomes below 185 FPL in 2014',
               '% food insecure children in HH w/ HH incomes above 185 FPL in 2014'], axis=1)

In [165]:
df_FA_15 = df_FA_15.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
                '% FI Btwn Thresholds','% FI > High Threshold', '# of Food Insecure Children in 2015',
               '% food insecure children in HH w/ HH incomes below 185 FPL in 2015',
               '% food insecure children in HH w/ HH incomes above 185 FPL in 2015'], axis=1)

In [166]:
df_FA_16 = df_FA_16.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
                '% FI Btwn Thresholds','% FI > High Threshold', '# of Food Insecure Children in 2016',
               '% food insecure children in HH w/ HH incomes below 185 FPL in 2016',
               '% food insecure children in HH w/ HH incomes above 185 FPL in 2016'], axis=1)

In [167]:
df_FA_17 = df_FA_17.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
                '% FI Btwn Thresholds', '% FI > High Threshold', '# of Food Insecure Children in 2017',
               '% food insecure children in HH w/ HH incomes below 185 FPL in 2017',
               '% food insecure children in HH w/ HH incomes above 185 FPL in 2017'], axis=1)

In [168]:
df_FA_18 = df_FA_18.drop(['Low Threshold in state', 'High Threshold in state', '% FI ≤ Low Threshold',
                '% FI Btwn Thresholds', '% FI > High Threshold', '# of Food Insecure Children in 2018',
               '% food insecure children in HH w/ HH incomes below 185 FPL in 2018',
               '% food insecure children in HH w/ HH incomes above 185 FPL in 2018'], axis=1)

# 2. Unemployment Data

In [193]:
directory = "../datasets/unemployment/"

df_unemp_09 = pd.read_excel(directory + 'laucnty09.xlsx', header=4).drop(0,axis=0)
df_unemp_10 = pd.read_excel(directory + 'laucnty10.xlsx', header=4).drop(0,axis=0)
df_unemp_11 = pd.read_excel(directory + 'laucnty11.xlsx', header=4).drop(0,axis=0)
df_unemp_12 = pd.read_excel(directory + 'laucnty12.xlsx', header=4).drop(0,axis=0)
df_unemp_13 = pd.read_excel(directory + 'laucnty13.xlsx', header=4).drop(0,axis=0)
df_unemp_14 = pd.read_excel(directory + 'laucnty14.xlsx', header=4).drop(0,axis=0)
df_unemp_15 = pd.read_excel(directory + 'laucnty15.xlsx', header=4).drop(0,axis=0)
df_unemp_16 = pd.read_excel(directory + 'laucnty16.xlsx', header=4).drop(0,axis=0)
df_unemp_17 = pd.read_excel(directory + 'laucnty17.xlsx', header=4).drop(0,axis=0)
df_unemp_18 = pd.read_excel(directory + 'laucnty18.xlsx', header=4).drop(0,axis=0)
df_unemp_19 = pd.read_excel(directory + 'laucnty19.xlsx', header=4).drop(0,axis=0)


In [191]:
df_unemp_09.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [195]:
df_unemp_10.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [197]:
df_unemp_11.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [198]:
df_unemp_12.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [199]:
df_unemp_13.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [200]:
df_unemp_14.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [201]:
df_unemp_15.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [202]:
df_unemp_16.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [203]:
df_unemp_17.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [210]:
df_unemp_18.rename(columns = {'Code':'CN', 'Code.1':'FIPS_state', 'Code.2':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 'Unnamed: 5': 'idk',
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

In [211]:
df_unemp_19.rename(columns = {'LAUS Code':'LAUS', 'Code':'FIPS_state', 'Code.1':'FIPS_county', 
                             'County Name/State Abbreviation': 'State/County', 
                              'Force':'Total_workforce','(%)':'Unemployment_rate'}, inplace=True)

# 3. Household Income Data (2019 &2020)
data dicts: https://www2.census.gov/programs-surveys/cps/techdocs/cpsmar19.pdf<br>
'GESTFIPS':'FIPS_state', 'GTCO':'FIPS_county', 'GTMETSTA':'Metro_status',
                               'HEFAMINC':'HH_income', 
                                'H_NUMPER':'HH_size', 'HUNDER18':'Num_minors','H_TENURE':'Rent_vs_Owned',
                               'HDIS_YN':'Disability', 'HCSP_YN', 'Child_support', 'HINC_UC':'Unemployment_payments',
                               'NOW_HCOV':'Health_insurance'

In [243]:
df_household_19 = pd.read_csv('../datasets/household/hhpub19.csv')
df_household_20 = pd.read_csv('../datasets/household/hhpub20.csv')

### Map column values to data dictionary

In [244]:
conditions=[df_household_19['GTMETSTA'] ==1,df_household_19['GTMETSTA'] ==2, df_household_19['GTMETSTA'] ==3]
choices = ['HH_Metrop', 'HH_Non-Metrop','N/A']
df_household_19['GTMETSTA'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_19['H_TENURE'] ==0,df_household_19['H_TENURE'] ==1,df_household_19['H_TENURE'] ==2, df_household_19['H_TENURE'] ==3]
choices = ['N/A', 'HH_owned', 'HH_rented','HH_rented_noCash']
df_household_19['H_TENURE'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_19['HDIS_YN'] ==0,df_household_19['HDIS_YN'] ==1,df_household_19['HDIS_YN'] ==2]
choices = ['N/A',  'HH_disabled','HH_not_disabled' ]
df_household_19['HDIS_YN'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_19['HCSP_YN'] ==0,df_household_19['HCSP_YN'] ==1,df_household_19['HCSP_YN'] ==2]
choices = ['N/A','HH_Child_support', 'HH_no_child_support' ]
df_household_19['HCSP_YN'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_19['HINC_UC'] ==0,df_household_19['HINC_UC'] ==1,df_household_19['HINC_UC'] ==2]
choices = ['N/A','HH_unemployment_pay', 'HH_no_unemployment_pay' ]
df_household_19['HINC_UC'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_19['NOW_HCOV'] ==1,df_household_19['NOW_HCOV'] ==2,df_household_19['NOW_HCOV'] ==3]
choices = [ 'HH_health_insured','HH_some_health_insured','HH_no_health_insured' ]
df_household_19['NOW_HCOV'] = np.select(conditions, choices,default='N/A')

In [246]:
conditions=[df_household_20['GTMETSTA'] ==1,df_household_20['GTMETSTA'] ==2, df_household_20['GTMETSTA'] ==3]
choices = ['HH_Metrop', 'HH_Non-Metrop','N/A']
df_household_20['GTMETSTA'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_20['H_TENURE'] ==0,df_household_20['H_TENURE'] ==1,df_household_20['H_TENURE'] ==2, df_household_20['H_TENURE'] ==3]
choices = ['N/A', 'HH_owned', 'HH_rented','HH_rented_noCash']
df_household_20['H_TENURE'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_20['HDIS_YN'] ==0,df_household_20['HDIS_YN'] ==1,df_household_20['HDIS_YN'] ==2]
choices = ['N/A',  'HH_disabled','HH_not_disabled' ]
df_household_20['HDIS_YN'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_20['HCSP_YN'] ==0,df_household_20['HCSP_YN'] ==1,df_household_20['HCSP_YN'] ==2]
choices = ['N/A','HH_Child_support', 'HH_no_child_support' ]
df_household_20['HCSP_YN'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_20['HINC_UC'] ==0,df_household_20['HINC_UC'] ==1,df_household_20['HINC_UC'] ==2]
choices = ['N/A','HH_unemployment_pay', 'HH_no_unemployment_pay' ]
df_household_20['HINC_UC'] = np.select(conditions, choices,default='N/A')

conditions=[df_household_20['NOW_HCOV'] ==1,df_household_20['NOW_HCOV'] ==2,df_household_20['NOW_HCOV'] ==3]
choices = [ 'HH_health_insured','HH_some_health_insured','HH_no_health_insured' ]
df_household_20['NOW_HCOV'] = np.select(conditions, choices,default='N/A')

### Rename columns for interpretability

In [247]:
df_household_19 = df_household_19.loc[:,['GESTFIPS', 'GTCO', 'GTMETSTA', 'HTOTVAL','H_NUMPER', 'HUNDER18',
                 'H_TENURE','HDIS_YN', 'HCSP_YN', 'HINC_UC','NOW_HCOV']]
df_household_19 = df_household_19.rename(columns={'GESTFIPS':'FIPS_state', 'GTCO':'FIPS_county', 'GTMETSTA':'Metro_status',
                               'HEFAMINC':'HH_income', 
                                'H_NUMPER':'HH_size', 'HUNDER18':'Num_minors','H_TENURE':'Rent_vs_Owned',
                               'HDIS_YN':'Disability', 'HCSP_YN':'Child_support', 'HINC_UC':'Unemployment_payments',
                               'NOW_HCOV':'Health_insurance'})


In [248]:
df_household_20 = df_household_20.loc[:,['GESTFIPS', 'GTCO', 'GTMETSTA', 'HTOTVAL','H_NUMPER', 'HUNDER18',
                 'H_TENURE','HDIS_YN', 'HCSP_YN', 'HINC_UC','NOW_HCOV']]
df_household_20 = df_household_20.rename(columns={'GESTFIPS':'FIPS_state', 'GTCO':'FIPS_county', 'GTMETSTA':'Metro_status',
                               'HEFAMINC':'HH_income', 
                                'H_NUMPER':'HH_size', 'HUNDER18':'Num_minors','H_TENURE':'Rent_vs_Owned',
                               'HDIS_YN':'Disability', 'HCSP_YN':'Child_support', 'HINC_UC':'Unemployment_payments',
                               'NOW_HCOV':'Health_insurance'})


Unnamed: 0,FIPS_state,FIPS_county,Metro_status,HTOTVAL,HH_size,Num_minors,Rent_vs_Owned,Disability,Child_support,Unemployment_payments,Health_insurance
0,23,0,,127449,2,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
1,23,0,,64680,2,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
2,23,0,,40002,1,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
3,23,0,,8424,2,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
4,23,0,,59114,4,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
...,...,...,...,...,...,...,...,...,...,...,...
91495,15,3,,40700,1,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
91496,15,3,,20421,1,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
91497,15,3,,72455,2,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_some_health_insured
91498,15,3,,13626,1,0,,,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured


In [261]:
df_household_19.sample(10)

Unnamed: 0,FIPS_state,FIPS_county,Metro_status,HTOTVAL,HH_size,Num_minors,Rent_vs_Owned,Disability,Child_support,Unemployment_payments,Health_insurance
10492,36,119,HH_Metrop,149750,2,0,HH_owned,HH_not_disabled,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
21241,17,97,HH_Metrop,7800,3,1,HH_owned,HH_not_disabled,HH_Child_support,HH_no_unemployment_pay,HH_some_health_insured
47871,12,0,HH_Metrop,0,0,0,,,,,
47057,13,139,HH_Metrop,67000,6,2,HH_rented,HH_not_disabled,HH_no_child_support,HH_no_unemployment_pay,HH_some_health_insured
89632,6,73,HH_Metrop,79969,2,0,HH_owned,HH_not_disabled,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
43595,45,0,HH_Non-Metrop,69000,3,0,HH_owned,HH_not_disabled,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
39887,54,0,HH_Non-Metrop,10812,1,0,HH_owned,HH_not_disabled,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
49348,12,86,HH_Metrop,50000,2,0,HH_owned,HH_not_disabled,HH_no_child_support,HH_no_unemployment_pay,HH_some_health_insured
83199,41,0,HH_Non-Metrop,30862,1,0,HH_rented,HH_not_disabled,HH_no_child_support,HH_no_unemployment_pay,HH_health_insured
57532,28,0,HH_Metrop,0,0,0,,,,,


# 4. Demographic Data
Data Dict: https://www2.census.gov/programs-surveys/popest/technical-documentation/file-layouts/2010-2019/cc-est2019-alldata.pdf

In [97]:
df_demographics = pd.read_csv('../datasets/demographics/demographics.csv',encoding='iso-8859-1')
df_demographics

Unnamed: 0,SUMLEV,STATE,COUNTY,STNAME,CTYNAME,YEAR,AGEGRP,TOT_POP,TOT_MALE,TOT_FEMALE,WA_MALE,WA_FEMALE,BA_MALE,BA_FEMALE,IA_MALE,IA_FEMALE,AA_MALE,AA_FEMALE,NA_MALE,NA_FEMALE,TOM_MALE,TOM_FEMALE,WAC_MALE,WAC_FEMALE,BAC_MALE,BAC_FEMALE,IAC_MALE,IAC_FEMALE,AAC_MALE,AAC_FEMALE,NAC_MALE,NAC_FEMALE,NH_MALE,NH_FEMALE,NHWA_MALE,NHWA_FEMALE,NHBA_MALE,NHBA_FEMALE,NHIA_MALE,NHIA_FEMALE,NHAA_MALE,NHAA_FEMALE,NHNA_MALE,NHNA_FEMALE,NHTOM_MALE,NHTOM_FEMALE,NHWAC_MALE,NHWAC_FEMALE,NHBAC_MALE,NHBAC_FEMALE,NHIAC_MALE,NHIAC_FEMALE,NHAAC_MALE,NHAAC_FEMALE,NHNAC_MALE,NHNAC_FEMALE,H_MALE,H_FEMALE,HWA_MALE,HWA_FEMALE,HBA_MALE,HBA_FEMALE,HIA_MALE,HIA_FEMALE,HAA_MALE,HAA_FEMALE,HNA_MALE,HNA_FEMALE,HTOM_MALE,HTOM_FEMALE,HWAC_MALE,HWAC_FEMALE,HBAC_MALE,HBAC_FEMALE,HIAC_MALE,HIAC_FEMALE,HAAC_MALE,HAAC_FEMALE,HNAC_MALE,HNAC_FEMALE
0,50,1,1,Alabama,Autauga County,1,0,54571,26569,28002,21295,22002,4559,5130,119,139,200,284,29,18,367,429,21633,22391,4704,5306,277,314,300,409,42,37,25875,27386,20709,21485,4512,5091,103,115,194,280,13,9,344,406,21026,21853,4647,5258,251,282,291,398,23,27,694,616,586,517,47,39,16,24,6,4,16,9,23,23,607,538,57,48,26,32,9,11,19,10
1,50,1,1,Alabama,Autauga County,1,1,3579,1866,1713,1411,1316,362,317,5,3,13,15,1,0,74,62,1479,1368,405,362,23,18,34,28,3,1,1778,1651,1337,1260,356,313,2,2,13,15,0,0,70,61,1402,1312,396,357,19,17,34,28,1,0,88,62,74,56,6,4,3,1,0,0,1,0,4,1,77,56,9,5,4,1,0,0,2,1
2,50,1,1,Alabama,Autauga County,1,2,3991,2001,1990,1521,1526,399,374,14,8,17,21,1,3,49,58,1570,1583,425,403,27,19,32,42,3,4,1933,1916,1460,1465,398,372,12,2,17,21,0,3,46,53,1506,1517,423,400,25,12,30,39,1,4,68,74,61,61,1,2,2,6,0,0,1,0,3,5,64,66,2,3,2,7,2,3,2,0
3,50,1,1,Alabama,Autauga County,1,3,4290,2171,2119,1658,1620,431,406,15,12,23,18,4,1,40,62,1694,1681,453,436,29,27,32,37,4,5,2105,2055,1613,1570,421,403,12,9,22,18,3,0,34,55,1643,1624,440,429,24,22,30,36,3,4,66,64,45,50,10,3,3,3,1,0,1,1,6,7,51,57,13,7,5,5,2,1,1,1
4,50,1,1,Alabama,Autauga County,1,4,4290,2213,2077,1628,1585,502,424,12,7,25,14,4,2,42,45,1664,1624,525,444,23,20,39,31,6,5,2153,2026,1580,1543,495,420,12,5,23,14,1,1,42,43,1616,1580,518,439,23,18,37,30,3,4,60,51,48,42,7,4,0,2,2,0,3,1,0,2,48,44,7,5,0,2,2,1,3,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
716371,50,56,45,Wyoming,Weston County,12,14,499,280,219,256,203,0,1,2,0,19,12,0,0,3,3,258,206,1,1,5,2,19,13,0,0,276,214,252,198,0,1,2,0,19,12,0,0,3,3,254,201,1,1,5,2,19,13,0,0,4,5,4,5,0,0,0,0,0,0,0,0,0,0,4,5,0,0,0,0,0,0,0,0
716372,50,56,45,Wyoming,Weston County,12,15,352,180,172,173,169,0,0,1,1,3,1,0,0,3,1,176,170,0,1,3,1,4,1,0,0,176,170,172,167,0,0,1,1,0,1,0,0,3,1,175,168,0,1,3,1,1,1,0,0,4,2,1,2,0,0,0,0,3,0,0,0,0,0,1,2,0,0,0,0,3,0,0,0
716373,50,56,45,Wyoming,Weston County,12,16,229,107,122,105,120,0,0,0,2,0,0,0,0,2,0,107,120,0,0,2,2,0,0,0,0,105,122,103,120,0,0,0,2,0,0,0,0,2,0,105,120,0,0,2,2,0,0,0,0,2,0,2,0,0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0,0,0,0,0
716374,50,56,45,Wyoming,Weston County,12,17,198,82,116,80,115,0,0,1,1,0,0,0,0,1,0,81,115,0,0,1,1,1,0,0,0,80,115,79,114,0,0,0,1,0,0,0,0,1,0,80,114,0,0,0,1,1,0,0,0,2,1,1,1,0,0,1,0,0,0,0,0,0,0,1,1,0,0,1,0,0,0,0,0


# 5. Houselessness
Need to find CoC to county conversion

In [98]:
df_houseless = pd.read_excel('../datasets/houseless/houseless_coc.xlsx')
df_houseless

Unnamed: 0,CoC Number,CoC Name,CoC Category,"Overall Homeless, 2019","Sheltered ES Homeless, 2019","Sheltered TH Homeless, 2019","Sheltered SH Homeless, 2019","Sheltered Total Homeless, 2019","Unsheltered Homeless, 2019","Homeless Individuals, 2019","Sheltered ES Homeless Individuals, 2019","Sheltered TH Homeless Individuals, 2019","Sheltered SH Homeless Individuals, 2019","Sheltered Total Homeless Individuals, 2019","Unsheltered Homeless Individuals, 2019","Homeless People in Families, 2019","Sheltered ES Homeless People in Families, 2019","Sheltered TH Homeless People in Families, 2019","Sheltered Total Homeless People in Families, 2019","Unsheltered Homeless People in Families, 2019","Homeless Family Households, 2019","Sheltered ES Homeless Family Households, 2019","Sheltered TH Homeless Family Households, 2019","Sheltered Total Homeless Family Households, 2019","Unsheltered Homeless Family Households, 2019","Chronically Homeless, 2019","Sheltered ES Chronically Homeless, 2019","Sheltered SH Chronically Homeless, 2019","Sheltered Total Chronically Homeless, 2019","Unsheltered Chronically Homeless, 2019","Chronically Homeless Individuals, 2019","Sheltered ES Chronically Homeless Individuals, 2019","Sheltered SH Chronically Homeless Individuals, 2019","Sheltered Total Chronically Homeless Individuals, 2019","Unsheltered Chronically Homeless Individuals, 2019","Chronically Homeless People in Families, 2019","Sheltered ES Chronically Homeless People in Families, 2019","Sheltered Total Chronically Homeless People in Families, 2019","Unsheltered Chronically Homeless People in Families, 2019","Homeless Veterans, 2019","Sheltered ES Homeless Veterans, 2019","Sheltered TH Homeless Veterans, 2019","Sheltered SH Homeless Veterans, 2019","Sheltered Total Homeless Veterans, 2019","Unsheltered Homeless Veterans, 2019","Homeless Unaccompanied Youth (Under 25), 2019","Sheltered ES Homeless Unaccompanied Youth (Under 25), 2019","Sheltered TH Homeless Unaccompanied Youth (Under 25), 2019","Sheltered SH Homeless Unaccompanied Youth (Under 25), 2019","Sheltered Total Homeless Unaccompanied Youth (Under 25), 2019","Unsheltered Homeless Unaccompanied Youth (Under 25), 2019","Homeless Unaccompanied Youth Under 18, 2019","Sheltered ES Homeless Unaccompanied Youth Under 18, 2019","Sheltered TH Homeless Unaccompanied Youth Under 18, 2019","Sheltered SH Homeless Unaccompanied Youth Under 18, 2019","Sheltered Total Homeless Unaccompanied Youth Under 18, 2019","Unsheltered Homeless Unaccompanied Youth Under 18, 2019","Homeless Unaccompanied Youth Age 18-24, 2019","Sheltered ES Homeless Unaccompanied Youth Age 18-24, 2019","Sheltered TH Homeless Unaccompanied Youth Age 18-24, 2019","Sheltered SH Homeless Unaccompanied Youth Age 18-24, 2019","Sheltered Total Homeless Unaccompanied Youth Age 18-24, 2019","Unsheltered Homeless Unaccompanied Youth Age 18-24, 2019","Homeless Parenting Youth (Under 25), 2019","Sheltered ES Homeless Parenting Youth (Under 25), 2019","Sheltered TH Homeless Parenting Youth (Under 25), 2019","Sheltered Total Homeless Parenting Youth (Under 25), 2019","Unsheltered Homeless Parenting Youth (Under 25), 2019","Homeless Parenting Youth Under 18, 2019","Sheltered ES Homeless Parenting Youth Under 18, 2019","Sheltered TH Homeless Parenting Youth Under 18, 2019","Sheltered Total Homeless Parenting Youth Under 18, 2019","Unsheltered Homeless Parenting Youth Under 18, 2019","Homeless Parenting Youth Age 18-24, 2019","Sheltered ES Homeless Parenting Youth Age 18-24, 2019","Sheltered TH Homeless Parenting Youth Age 18-24, 2019","Sheltered Total Homeless Parenting Youth Age 18-24, 2019","Unsheltered Homeless Parenting Youth Age 18-24, 2019","Homeless Children of Parenting Youth, 2019","Sheltered ES Homeless Children of Parenting Youth, 2019","Sheltered TH Homeless Children of Parenting Youth, 2019","Sheltered Total Homeless Children of Parenting Youth, 2019","Unsheltered Homeless Children of Parenting Youth, 2019"
0,AK-500,Anchorage CoC,Other Urban CoCs,1111.0,802.0,212.0,0.0,1014.0,97.0,799.0,632.0,70.0,0.0,702.0,97.0,312.0,170.0,142.0,312.0,0.0,89.0,49.0,40.0,89.0,0.0,91.0,73.0,0.0,73.0,18.0,79.0,61.0,0.0,61.0,18.0,12.0,12.0,12.0,0.0,47.0,34.0,3.0,0.0,37.0,10.0,106.0,69.0,36.0,0.0,105.0,1.0,3.0,3.0,0.0,0.0,3.0,0.0,103.0,66.0,36.0,0.0,102.0,1.0,17.0,5.0,12.0,17.0,0.0,0.0,0.0,0.0,0.0,0.0,17.0,5.0,12.0,17.0,0.0,24.0,12.0,12.0,24.0,0.0
1,AK-501,Alaska Balance of State CoC,Rural CoCs,796.0,416.0,204.0,0.0,620.0,176.0,585.0,306.0,110.0,0.0,416.0,169.0,211.0,110.0,94.0,204.0,7.0,72.0,40.0,29.0,69.0,3.0,178.0,97.0,0.0,97.0,81.0,152.0,73.0,0.0,73.0,79.0,26.0,24.0,24.0,2.0,64.0,19.0,29.0,0.0,48.0,16.0,70.0,33.0,17.0,0.0,50.0,20.0,11.0,9.0,2.0,0.0,11.0,0.0,59.0,24.0,15.0,0.0,39.0,20.0,3.0,1.0,2.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,2.0,3.0,0.0,3.0,1.0,2.0,3.0,0.0
2,AL-500,"Birmingham/Jefferson, St. Clair, Shelby Counti...",Suburban CoCs,981.0,444.0,182.0,29.0,655.0,326.0,805.0,341.0,125.0,29.0,495.0,310.0,176.0,103.0,57.0,160.0,16.0,64.0,39.0,20.0,59.0,5.0,101.0,70.0,13.0,83.0,18.0,97.0,66.0,13.0,79.0,18.0,4.0,4.0,4.0,0.0,91.0,66.0,9.0,0.0,75.0,16.0,67.0,19.0,7.0,0.0,26.0,41.0,4.0,3.0,0.0,0.0,3.0,1.0,63.0,16.0,7.0,0.0,23.0,40.0,7.0,0.0,7.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,7.0,0.0,7.0,7.0,0.0,7.0,0.0,7.0,7.0,0.0
3,AL-501,Mobile City & County/Baldwin County CoC,Other Urban CoCs,505.0,191.0,112.0,0.0,303.0,202.0,329.0,106.0,44.0,0.0,150.0,179.0,176.0,85.0,68.0,153.0,23.0,56.0,28.0,21.0,49.0,7.0,43.0,10.0,0.0,10.0,33.0,38.0,10.0,0.0,10.0,28.0,5.0,0.0,0.0,5.0,53.0,5.0,38.0,0.0,43.0,10.0,9.0,2.0,0.0,0.0,2.0,7.0,0.0,0.0,0.0,0.0,0.0,0.0,9.0,2.0,0.0,0.0,2.0,7.0,4.0,2.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0,4.0,0.0,6.0,3.0,3.0,6.0,0.0
4,AL-502,Florence/Northwest Alabama CoC,Rural CoCs,390.0,113.0,76.0,0.0,189.0,201.0,383.0,106.0,76.0,0.0,182.0,201.0,7.0,7.0,0.0,7.0,0.0,3.0,3.0,0.0,3.0,0.0,21.0,19.0,0.0,19.0,2.0,21.0,19.0,0.0,19.0,2.0,0.0,0.0,0.0,0.0,20.0,15.0,0.0,0.0,15.0,5.0,156.0,8.0,0.0,0.0,8.0,148.0,148.0,0.0,0.0,0.0,0.0,148.0,8.0,8.0,0.0,0.0,8.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,WV-508,West Virginia Balance of State CoC,Rural CoCs,797.0,545.0,64.0,0.0,609.0,188.0,654.0,439.0,45.0,0.0,484.0,170.0,143.0,106.0,19.0,125.0,18.0,45.0,35.0,5.0,40.0,5.0,88.0,39.0,0.0,39.0,49.0,79.0,36.0,0.0,36.0,43.0,9.0,3.0,3.0,6.0,95.0,55.0,20.0,0.0,75.0,20.0,46.0,23.0,9.0,0.0,32.0,14.0,1.0,0.0,0.0,0.0,0.0,1.0,45.0,23.0,9.0,0.0,32.0,13.0,5.0,2.0,1.0,3.0,2.0,1.0,0.0,0.0,0.0,1.0,4.0,2.0,1.0,3.0,1.0,5.0,2.0,1.0,3.0,2.0
396,WY-500,Wyoming Statewide CoC,Rural CoCs,548.0,264.0,159.0,0.0,423.0,125.0,437.0,239.0,90.0,0.0,329.0,108.0,111.0,25.0,69.0,94.0,17.0,37.0,6.0,25.0,31.0,6.0,67.0,23.0,0.0,23.0,44.0,64.0,23.0,0.0,23.0,41.0,3.0,0.0,0.0,3.0,51.0,22.0,21.0,0.0,43.0,8.0,81.0,27.0,37.0,0.0,64.0,17.0,16.0,6.0,6.0,0.0,12.0,4.0,65.0,21.0,31.0,0.0,52.0,13.0,4.0,2.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,2.0,2.0,4.0,0.0,4.0,2.0,2.0,4.0,0.0
397,,Total,,567715.0,279327.0,75162.0,1933.0,356422.0,211293.0,396045.0,156673.0,40925.0,1933.0,199531.0,196514.0,171670.0,122654.0,34237.0,156891.0,14779.0,53692.0,38318.0,11026.0,49344.0,4348.0,105583.0,41638.0,969.0,42607.0,62976.0,96141.0,34231.0,969.0,35200.0,60941.0,9442.0,7407.0,7407.0,2035.0,37085.0,10802.0,11006.0,932.0,22740.0,14345.0,35038.0,11555.0,6107.0,46.0,17708.0,17330.0,3976.0,1478.0,396.0,0.0,1874.0,2102.0,31062.0,10077.0,5711.0,46.0,15834.0,15228.0,7564.0,5076.0,1982.0,7058.0,506.0,59.0,24.0,30.0,54.0,5.0,7505.0,5052.0,1952.0,7004.0,501.0,9779.0,6612.0,2597.0,9209.0,570.0
398,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


# 6. Rent Prices
Zillow Observed Rent Index (ZORI): A smoothed measure of the typical observed market rate rent across a given region. ZORI is a repeat-rent index that is weighted to the rental housing stock to ensure representativeness across the entire market, not just those homes currently listed for-rent. The index is dollar-denominated by computing the mean of listed rents that fall into the 40th to 60th percentile range for all homes and apartments in a given region, which is once again weighted to reflect the rental housing stock. Details available in ZORI methodology.
### Need to find zip code-county conversion

In [99]:
df_rent = pd.read_csv('../datasets/rent_prices/rent_prices.csv')
df_rent

Unnamed: 0,RegionID,RegionName,SizeRank,MsaName,2014-01,2014-02,2014-03,2014-04,2014-05,2014-06,2014-07,2014-08,2014-09,2014-10,2014-11,2014-12,2015-01,2015-02,2015-03,2015-04,2015-05,2015-06,2015-07,2015-08,2015-09,2015-10,2015-11,2015-12,2016-01,2016-02,2016-03,2016-04,2016-05,2016-06,2016-07,2016-08,2016-09,2016-10,2016-11,2016-12,2017-01,2017-02,2017-03,2017-04,2017-05,2017-06,2017-07,2017-08,2017-09,2017-10,2017-11,2017-12,2018-01,2018-02,2018-03,2018-04,2018-05,2018-06,2018-07,2018-08,2018-09,2018-10,2018-11,2018-12,2019-01,2019-02,2019-03,2019-04,2019-05,2019-06,2019-07,2019-08,2019-09,2019-10,2019-11,2019-12,2020-01,2020-02,2020-03,2020-04,2020-05,2020-06,2020-07,2020-08,2020-09,2020-10
0,61639,10025,1,"New York, NY",3008.0,3020.0,3032.0,3045.0,3057.0,3069.0,3082.0,3094.0,3106.0,3118.0,3129.0,3141.0,3153.0,3162.0,3171.0,3180.0,3185.0,3190.0,3194.0,3198.0,3202.0,3206.0,3210.0,3213.0,3217.0,3219.0,3222.0,3225.0,3225.0,3226.0,3227.0,3225.0,3223.0,3221.0,3220.0,3219.0,3218.0,3219.0,3220.0,3220.0,3219.0,3217.0,3216.0,3214.0,3213.0,3211.0,3210.0,3209.0,3207.0,3207.0,3208.0,3208.0,3210.0,3211.0,3213.0,3216.0,3218.0,3221.0,3225.0,3229.0,3233.0,3239.0,3245.0,3252.0,3261.0,3271.0,3281.0,3289.0,3298.0,3306.0,3296.0,3287.0,3277.0,3249.0,3221.0,3193.0,3164.0,3134.0,3104.0,3071.0,3038.0,3005.0
1,84654,60657,2,"Chicago, IL",1577.0,1583.0,1588.0,1593.0,1598.0,1603.0,1608.0,1613.0,1618.0,1623.0,1628.0,1633.0,1638.0,1643.0,1647.0,1652.0,1656.0,1660.0,1664.0,1669.0,1673.0,1678.0,1683.0,1688.0,1693.0,1698.0,1703.0,1708.0,1712.0,1717.0,1722.0,1726.0,1731.0,1735.0,1738.0,1741.0,1744.0,1746.0,1747.0,1749.0,1751.0,1752.0,1754.0,1756.0,1757.0,1759.0,1760.0,1762.0,1763.0,1763.0,1764.0,1764.0,1764.0,1764.0,1764.0,1765.0,1766.0,1767.0,1769.0,1772.0,1774.0,1779.0,1784.0,1788.0,1795.0,1801.0,1807.0,1812.0,1817.0,1822.0,1822.0,1822.0,1822.0,1818.0,1814.0,1810.0,1806.0,1801.0,1797.0,1792.0,1786.0,1781.0
2,61637,10023,3,"New York, NY",3136.0,3145.0,3154.0,3163.0,3171.0,3179.0,3187.0,3195.0,3202.0,3210.0,3215.0,3220.0,3225.0,3229.0,3233.0,3236.0,3242.0,3248.0,3254.0,3261.0,3267.0,3274.0,3280.0,3285.0,3290.0,3290.0,3291.0,3291.0,3288.0,3284.0,3281.0,3278.0,3275.0,3272.0,3273.0,3274.0,3275.0,3276.0,3278.0,3279.0,3280.0,3281.0,3281.0,3280.0,3279.0,3279.0,3278.0,3276.0,3275.0,3277.0,3278.0,3280.0,3284.0,3288.0,3292.0,3296.0,3301.0,3305.0,3310.0,3315.0,3320.0,3327.0,3333.0,3340.0,3347.0,3355.0,3362.0,3368.0,3374.0,3380.0,3370.0,3360.0,3350.0,3324.0,3298.0,3272.0,3244.0,3217.0,3189.0,3158.0,3128.0,3097.0
3,91982,77494,4,"Houston, TX",1742.0,1746.0,1749.0,1753.0,1756.0,1759.0,1763.0,1766.0,1769.0,1772.0,1774.0,1776.0,1778.0,1778.0,1778.0,1778.0,1776.0,1775.0,1773.0,1769.0,1765.0,1762.0,1756.0,1751.0,1746.0,1739.0,1733.0,1727.0,1721.0,1715.0,1709.0,1706.0,1702.0,1699.0,1697.0,1695.0,1693.0,1692.0,1692.0,1692.0,1695.0,1698.0,1701.0,1706.0,1711.0,1715.0,1719.0,1723.0,1726.0,1729.0,1731.0,1733.0,1734.0,1734.0,1735.0,1735.0,1735.0,1735.0,1737.0,1738.0,1740.0,1743.0,1745.0,1747.0,1750.0,1752.0,1754.0,1756.0,1757.0,1759.0,1760.0,1761.0,1763.0,1765.0,1767.0,1769.0,1771.0,1773.0,1775.0,1777.0,1779.0,1781.0
4,84616,60614,5,"Chicago, IL",1747.0,1752.0,1757.0,1762.0,1767.0,1772.0,1776.0,1781.0,1785.0,1789.0,1794.0,1798.0,1803.0,1808.0,1813.0,1818.0,1824.0,1831.0,1837.0,1846.0,1854.0,1863.0,1871.0,1879.0,1887.0,1893.0,1898.0,1904.0,1908.0,1912.0,1915.0,1918.0,1920.0,1923.0,1924.0,1926.0,1928.0,1929.0,1930.0,1932.0,1934.0,1937.0,1940.0,1943.0,1947.0,1950.0,1953.0,1955.0,1958.0,1959.0,1961.0,1962.0,1963.0,1965.0,1966.0,1968.0,1971.0,1974.0,1977.0,1981.0,1984.0,1990.0,1996.0,2001.0,2008.0,2014.0,2021.0,2025.0,2030.0,2034.0,2035.0,2035.0,2036.0,2031.0,2027.0,2022.0,2017.0,2012.0,2007.0,2001.0,1995.0,1989.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3259,58624,2110,9469,"Boston, MA",4223.0,4217.0,4211.0,4204.0,,4196.0,4191.0,,,,,,4206.0,,4277.0,4313.0,4378.0,4444.0,4509.0,,4552.0,4574.0,4583.0,,4600.0,4615.0,4631.0,4647.0,4671.0,4695.0,4719.0,4756.0,4792.0,4829.0,,4873.0,,4896.0,4898.0,4899.0,4877.0,4854.0,4831.0,4808.0,,4763.0,4743.0,4722.0,4702.0,4680.0,4659.0,4637.0,4642.0,4646.0,4650.0,4666.0,4682.0,4698.0,4712.0,4726.0,4740.0,4760.0,4780.0,4799.0,4816.0,4832.0,4848.0,4838.0,4828.0,4818.0,4790.0,4762.0,4733.0,4686.0,,,4538.0,4486.0,4434.0,4377.0,4320.0,4262.0
3260,66128,20004,9592,"Washington, DC",,,2258.0,2264.0,,2276.0,,2289.0,2295.0,,,,2313.0,2313.0,2314.0,2314.0,2317.0,2320.0,2324.0,2329.0,2334.0,,2341.0,2343.0,,2355.0,2364.0,2373.0,2381.0,2389.0,2397.0,2400.0,2403.0,2406.0,2412.0,2418.0,2424.0,2425.0,2427.0,2428.0,2427.0,2426.0,2424.0,2423.0,2422.0,2421.0,2417.0,2413.0,2409.0,2406.0,2404.0,2401.0,2399.0,2396.0,2394.0,2392.0,2391.0,2389.0,2390.0,2391.0,2392.0,2394.0,2396.0,2397.0,2403.0,2408.0,2413.0,2421.0,2429.0,2437.0,2442.0,2448.0,2454.0,2457.0,2460.0,2463.0,2465.0,2468.0,2470.0,2472.0,2474.0,2476.0
3261,399647,80951,9634,"Colorado Springs, CO",,1237.0,1242.0,1247.0,1251.0,1256.0,1260.0,1265.0,1269.0,1274.0,1278.0,1282.0,1287.0,1291.0,1295.0,1299.0,1304.0,1308.0,1313.0,1319.0,1325.0,1331.0,1337.0,1343.0,1349.0,1354.0,1360.0,1366.0,1373.0,1379.0,1386.0,1394.0,1401.0,1408.0,1415.0,1422.0,1429.0,1435.0,1441.0,1447.0,1451.0,1455.0,1458.0,1462.0,1465.0,,1475.0,,1486.0,1495.0,1504.0,1512.0,1522.0,1531.0,1541.0,1549.0,,1566.0,1572.0,1578.0,1584.0,1588.0,1593.0,1597.0,1602.0,1607.0,1612.0,1617.0,,1628.0,1633.0,1637.0,1642.0,,1653.0,1659.0,1664.0,1670.0,1676.0,1681.0,1687.0,1692.0
3262,62128,11509,9912,"New York, NY",,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,14126.0,,,,,,,,,,,,,13311.0,,12935.0,,,,,,,,,,,,,,,,,,,,,,,,,


## 7. Businesses Data
Data Dict: https://www2.census.gov/programs-surveys/cbp/technical-documentation/records-layouts/2018_record_layouts/county-layout-2018.txt
<br>
naics dict: https://www2.census.gov/programs-surveys/cbp/technical-documentation/reference/naics-descriptions/naics2017.txt

In [109]:
df_business_09 = pd.read_csv('../datasets/businesses/bus_09.txt')
df_business_10 = pd.read_csv('../datasets/businesses/bus_10.txt')
df_business_11 = pd.read_csv('../datasets/businesses/bus_11.txt')
df_business_12 = pd.read_csv('../datasets/businesses/bus_12.txt')
df_business_13 = pd.read_csv('../datasets/businesses/bus_13.txt')
df_business_14 = pd.read_csv('../datasets/businesses/bus_14.txt')
df_business_15 = pd.read_csv('../datasets/businesses/bus_15.txt')
df_business_16 = pd.read_csv('../datasets/businesses/bus_16.txt')
df_business_17 = pd.read_csv('../datasets/businesses/bus_17.txt')
df_business_18 = pd.read_csv('../datasets/businesses/bus_18.txt')
df_business_11

Unnamed: 0,fipstate,fipscty,naics,empflag,emp_nf,emp,qp1_nf,qp1,ap_nf,ap,est,n1_4,n5_9,n10_19,n20_49,n50_99,n100_249,n250_499,n500_999,n1000,n1000_1,n1000_2,n1000_3,n1000_4,censtate,cencty
0,1,1,------,,G,10290,G,65926,G,276674,835,412,178,122,78,33,10,1,1,0,0,0,0,0,63,1
1,1,1,11----,,H,33,H,259,H,1163,6,5,0,1,0,0,0,0,0,0,0,0,0,0,63,1
2,1,1,113///,,H,31,H,237,H,1087,5,4,0,1,0,0,0,0,0,0,0,0,0,0,63,1
3,1,1,1133//,,H,31,H,237,H,1087,5,4,0,1,0,0,0,0,0,0,0,0,0,0,63,1
4,1,1,11331/,,H,31,H,237,H,1087,5,4,0,1,0,0,0,0,0,0,0,0,0,0,63,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2151502,56,999,811310,A,D,0,D,0,D,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,83,999
2151503,56,999,813///,A,D,0,D,0,D,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,83,999
2151504,56,999,8133//,A,D,0,D,0,D,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,83,999
2151505,56,999,81331/,A,D,0,D,0,D,0,2,2,0,0,0,0,0,0,0,0,0,0,0,0,83,999
