In [1]:
#import dependencies
import pandas as pd
from pathlib import Path

In [3]:
#read in homeless csv
home_df = pd.read_csv(Path('Resources/clean_homeless.csv'))
home_df.head()

Unnamed: 0,State_Year,Year,State,Measures,Count
0,2007_AK,2007,AK,Chronically Homeless Individuals,224
1,2007_AK,2007,AK,Homeless Individuals,696
2,2007_AK,2007,AK,Homeless People in Families,278
3,2007_AK,2007,AK,Sheltered Chronically Homeless Individuals,187
4,2007_AK,2007,AK,Sheltered Homeless,842


In [4]:
#read in education csv
edu_df = pd.read_csv(Path('Resources/clean_education.csv'))
edu_df.head()

Unnamed: 0,State_Year,STATE,YEAR,TOTAL_REVENUE,TOTAL_EXPENDITURE,GRADES_9_12_G,GRADES_ALL_G
0,1992_AL,AL,1992,2678885.0,2653798.0,,731634.0
1,1992_AK,AK,1992,1049591.0,972488.0,,122487.0
2,1992_AZ,AZ,1992,3258079.0,3401580.0,,673477.0
3,1992_AR,AR,1992,1711959.0,1743022.0,,441490.0
4,1992_CA,CA,1992,26260025.0,27138832.0,,5254844.0


In [5]:
#fill NaN with 0
edu_df.fillna(0)

Unnamed: 0,State_Year,STATE,YEAR,TOTAL_REVENUE,TOTAL_EXPENDITURE,GRADES_9_12_G,GRADES_ALL_G
0,1992_AL,AL,1992,2678885.0,2653798.0,0.0,731634.0
1,1992_AK,AK,1992,1049591.0,972488.0,0.0,122487.0
2,1992_AZ,AZ,1992,3258079.0,3401580.0,0.0,673477.0
3,1992_AR,AR,1992,1711959.0,1743022.0,0.0,441490.0
4,1992_CA,CA,1992,26260025.0,27138832.0,0.0,5254844.0
...,...,...,...,...,...,...,...
1710,2019_VA,VA,2019,0.0,0.0,0.0,0.0
1711,2019_WA,WA,2019,0.0,0.0,0.0,0.0
1712,2019_WEST_VIRGINIA,WEST_VIRGINIA,2019,0.0,0.0,0.0,0.0
1713,2019_WI,WI,2019,0.0,0.0,0.0,0.0


In [6]:
#get homeless types
homeless_measure = home_df['Measures'].unique()
homeless_measure

array(['Chronically Homeless Individuals', 'Homeless Individuals',
       'Homeless People in Families',
       'Sheltered Chronically Homeless Individuals', 'Sheltered Homeless',
       'Sheltered Homeless Individuals',
       'Sheltered Homeless People in Families', 'Total Homeless',
       'Unsheltered Chronically Homeless Individuals',
       'Unsheltered Homeless', 'Unsheltered Homeless Individuals',
       'Unsheltered Homeless People in Families', 'Chronically Homeless',
       'Chronically Homeless People in Families', 'Homeless Veterans',
       'Sheltered Chronically Homeless',
       'Sheltered Chronically Homeless People in Families',
       'Sheltered Homeless Veterans', 'Unsheltered Chronically Homeless',
       'Unsheltered Chronically Homeless People in Families',
       'Unsheltered Homeless Veterans', 'Children of Parenting Youth',
       'Homeless Unaccompanied Children (Under 18)',
       'Homeless Unaccompanied Young Adults (Age 18-24)',
       'Homeless Unaccompan

In [7]:
#bin sheltered vs nonsheltered
grouping_lists = [['Sheltered Chronically Homeless Individuals','Sheltered Homeless','Sheltered Homeless Individuals',
                  'Sheltered Homeless People in Families','Sheltered Chronically Homeless','Sheltered Chronically Homeless People in Families',
                  'Sheltered Homeless Veterans','Sheltered Children of Parenting Youth','Sheltered Homeless Unaccompanied Children (Under 18)',
                  'Sheltered Homeless Unaccompanied Young Adults (Age 18-24)','Sheltered Homeless Unaccompanied Youth (Under 25)','Sheltered Parenting Youth (Under 25)',
                  'Sheltered Parenting Youth Age 18-24','Sheltered Parenting Youth Under 18',],['Unsheltered Chronically Homeless Individuals',
                  'Unsheltered Homeless', 'Unsheltered Homeless Individuals','Unsheltered Homeless People in Families',
                  'Unsheltered Chronically Homeless','Unsheltered Chronically Homeless People in Families','Unsheltered Homeless Veterans',
                  'Unsheltered Children of Parenting Youth','Unsheltered Homeless Unaccompanied Children (Under 18)',
                  'Unsheltered Homeless Unaccompanied Young Adults (Age 18-24)','Unsheltered Homeless Unaccompanied Youth (Under 25)','Unsheltered Parenting Youth (Under 25)',
                  'Unsheltered Parenting Youth Age 18-24','Unsheltered Parenting Youth Under 18'], ['Chronically Homeless Individuals', 'Homeless Individuals',
                  'Homeless People in Families','Total Homeless','Chronically Homeless','Chronically Homeless People in Families', 'Homeless Veterans',
                  'Children of Parenting Youth','Homeless Unaccompanied Children (Under 18)','Homeless Unaccompanied Young Adults (Age 18-24)','Homeless Unaccompanied Youth (Under 25)',
                  'Parenting Youth (Under 25)', 'Parenting Youth Age 18-24','Parenting Youth Under 18',]]
group_names = ['Sheltered', 'Unsheltered','Other']

groups = (pd.DataFrame({'Groups':group_names, 'Measures': grouping_lists})
         .explode('Measures')
         .reset_index(drop=True))

home_df = home_df.merge(groups, on='Measures', how='left')
home_df.head()

Unnamed: 0,State_Year,Year,State,Measures,Count,Groups
0,2007_AK,2007,AK,Chronically Homeless Individuals,224,Other
1,2007_AK,2007,AK,Homeless Individuals,696,Other
2,2007_AK,2007,AK,Homeless People in Families,278,Other
3,2007_AK,2007,AK,Sheltered Chronically Homeless Individuals,187,Sheltered
4,2007_AK,2007,AK,Sheltered Homeless,842,Sheltered


In [8]:
#drop Measures col
home_df = home_df.drop(columns=['Measures'])
home_df.head()

Unnamed: 0,State_Year,Year,State,Count,Groups
0,2007_AK,2007,AK,224,Other
1,2007_AK,2007,AK,696,Other
2,2007_AK,2007,AK,278,Other
3,2007_AK,2007,AK,187,Sheltered
4,2007_AK,2007,AK,842,Sheltered


In [9]:
#merge datasets
merged_df = home_df.merge(edu_df, on='State_Year',how='inner')
merged_df.head()

Unnamed: 0,State_Year,Year,State,Count,Groups,STATE,YEAR,TOTAL_REVENUE,TOTAL_EXPENDITURE,GRADES_9_12_G,GRADES_ALL_G
0,2007_AK,2007,AK,224,Other,AK,2007,1800616.0,1938755.0,42049.0,131029.0
1,2007_AK,2007,AK,696,Other,AK,2007,1800616.0,1938755.0,42049.0,131029.0
2,2007_AK,2007,AK,278,Other,AK,2007,1800616.0,1938755.0,42049.0,131029.0
3,2007_AK,2007,AK,187,Sheltered,AK,2007,1800616.0,1938755.0,42049.0,131029.0
4,2007_AK,2007,AK,842,Sheltered,AK,2007,1800616.0,1938755.0,42049.0,131029.0


In [11]:
merged_df = merged_df.drop(columns=['STATE','YEAR'])
merged_df.head()

Unnamed: 0,State_Year,Year,State,Count,Groups,TOTAL_REVENUE,TOTAL_EXPENDITURE,GRADES_9_12_G,GRADES_ALL_G
0,2007_AK,2007,AK,224,Other,1800616.0,1938755.0,42049.0,131029.0
1,2007_AK,2007,AK,696,Other,1800616.0,1938755.0,42049.0,131029.0
2,2007_AK,2007,AK,278,Other,1800616.0,1938755.0,42049.0,131029.0
3,2007_AK,2007,AK,187,Sheltered,1800616.0,1938755.0,42049.0,131029.0
4,2007_AK,2007,AK,842,Sheltered,1800616.0,1938755.0,42049.0,131029.0


In [12]:
#export preprocessed data
edu_df.to_csv('Resources/homeless_edu.csv',index=False)