# Homelessness analysis (Homeless in the US in recent years)
This file analyzes homelessness data. The data comes from HUD (Housing and Urban Development), the U.S. Census Bureau (for the population of each state), and MIT (for red vs blue states). The exact links and access dates are in the readme.txt file.


In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import datetime as dt
import os
import statsmodels.api as sm
from IPython.display import display
%matplotlib inline
pd.options.display.max_rows=5000 
pd.options.display.max_columns=50

### dfbeds -- the no. of beds dataframe
The number of shelter beds is imported -- there are too many columns in this file to be useful, so the "total beds (ES,TH,SH)" is the only column used -- this is the number of 'shelter beds', including ES (Emergency Shelter), TH (Transitional Housing), and SH (Safe Haven). There's one sheet per year in the workbook. The headings and the way of aggregating data was not completely consistent, so this is reflected in the data cleaning below.

In [None]:
excel_sheet='2008'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',
                      index_col=0,header=[0,1],sheet_name=excel_sheet) 
# the excel workbook has many sheets but here we access the one called" 2008"
# the first column has state abbreviations, so it is used as the "index" for the dataframe
# each column has a multi-index based on top 2 rows of excel

In [None]:
dftmp.head()

In [None]:
# this is too much data, so only work with the first column only -- by assigning to a series the first column of data only.
s2008=dftmp[('Total Beds (ES,TH,SH)','Total Year-Round Beds (ES,TH,SH)')] # this is the 1st column, now it's a series # multindex on cols, 1st col = rows
s2008.name='2008'

In [None]:
s2008.head() # a series with the index being state name abbrev's and the values being no. of beds

In [None]:
# now do the same for the rest of the sheets.

excel_sheet='2009'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2009=dftmp[('Total Beds (ES,TH,SH)','Total Year-Round Beds (ES,TH,SH)')]
s2009.name='2009'

excel_sheet='2010'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2010=dftmp[('Total Beds (ES,TH,SH)','Total Year-Round Beds (ES,TH,SH)')]
s2010.name='2010'

excel_sheet='2011'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2011=dftmp[('Total Beds (ES,TH,SH)','Total Year-Round Beds (ES,TH,SH)')]
s2011.name='2011'

excel_sheet='2012'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2012=dftmp[('Total Beds (ES,TH,SH)','Total Year-Round Beds (ES,TH,SH)')]
s2012.name='2012'

# sheet 2013 was a bit different in the header rows, so this set of statements is a bit different.
# also, the total no. of yr. round beds included "RRH" so I had to subtract that out for consistency.
excel_sheet='2013'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,skiprows=[0],sheet_name=excel_sheet) 
dftmp['tot']=dftmp['Total Year-Round Beds (ES,TH,RRH,SH)']-dftmp['Total Year-Round RRH Beds']
s2013=dftmp['tot']
s2013.name='2013'

# sheet 2014 and subsequent sheets had extra spaces in names, so these sets of statements are a bit different:
excel_sheet='2014'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2014=dftmp[('Total Beds (ES, TH, SH)','Total Year-Round Beds (ES, TH, SH)')]
s2014.name='2014'

excel_sheet='2015'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2015=dftmp[('Total Beds (ES, TH, SH)','Total Year-Round Beds (ES, TH, SH)')]
s2015.name='2015'

excel_sheet='2016'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2016=dftmp[('Total Beds (ES, TH, SH)','Total Year-Round Beds (ES, TH, SH)')]
s2016.name='2016'

excel_sheet='2017'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2017=dftmp[('Total Beds (ES, TH, SH)','Total Year-Round Beds (ES, TH, SH)')]
s2017.name='2017'

excel_sheet='2018'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2018=dftmp[('Total Beds (ES, TH, SH)','Total Year-Round Beds (ES, TH, SH)')]
s2018.name='2018'

excel_sheet='2019'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Housing-Inventory-Count-by-State.xlsx',index_col=0,header=[0,1],sheet_name=excel_sheet) 
s2019=dftmp[('Total Beds (ES, TH, SH)','Total Year-Round Beds (ES, TH, SH)')]
s2019.name='2019'

In [None]:
# There are 50 states, so each series below should have about 50 entries -- but there are about 55 -- this is because
# the data includes places like Wash. D.C. (not a state) -- also there are more entries after the 2015 year.
for item in [s2008,s2009,s2010,s2011,s2012,s2013,s2014,s2015,s2016,s2017,s2018,s2019]:
             print(item.name,' ',len(item))

In [None]:
# these are supposed to be states, but may include territories etc. -- what was added after 2016? It turns out that there's
# an entry for "MP", which is maybe is "Northern Marianas" -- a territory -- anyway below I found out what this was.
setA=set(s2016.index)
setB=set(s2017.index)
setB-setA # the order actually matters here

In [None]:
# Here is what a typical series looks like. Some entries, like the one for Wash. DC and the "totals" are 
# extraneous & will be deleted
s2016

In [None]:
# combine all of the series above into a single dataframe, with the years as column headings.
# each series has the name "2008", "2009", etc. so this is done implicitly below.
# first I combine 2008 and 2009 using merge into dftmp (temporary dataframe), then I add a year each time.
dftmp = pd.merge(s2008,s2009,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2010,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2011,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2012,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2013,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2014,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2015,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2016,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2017,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2018,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2019,left_index=True,right_index=True)

In [None]:
# drop anything that's not recognizable as a state -- this gets rid of territories etc.
state_list = ['AK','AL','AR','AZ','CA','CO','CT','DE','FL','GA',
             'HI','IA','ID','IL','IN','KS','KY','LA','MA','MD',
             'ME','MI','MN','MO','MS','MT','NC','ND','NE','NH',
             'NJ','NM','NV','NY','OH','OK','OR','PA','RI','SC',
             'SD','TN','TX','UT','VA','VT','WA','WI','WV','WY']
setA=set(state_list) # make a "set" from the state abbreviations
setB=set(dftmp.index) # make a "set" from the indices for the big dataframe
drop_list = setB-setA # whatever is in setB but NOT in setA, get ready to drop it
dfbeds = dftmp.drop(drop_list) # take the temp dataset dftmp, drop extraneous items, reassign to "dfbeds"

In [None]:
# transpose and rename index/column -- index is still year, a string value
dfbeds = dfbeds.T
dfbeds.index.name='year'
dfbeds.columns.name='state'
dfbeds.head()

In [None]:
# the index for dfbeds is still a bunch of strings like "2008" so make a range of years and make those the indices instead.
rng=pd.period_range('2008','2019',freq='Y')
dfbeds.set_index(rng)

In [None]:
dfbeds.head()

### people dataframe

In [None]:
# This is a bit redundant and could've been put into a loop, but for each sheet in the Excel workbook,
# take the data and put it into a series, each row in the series is a state (or other entity), the name
# of each series is the year in question, and the data is the number of homeless people.
excel_sheet='2008'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2008=dftmp['Overall Homeless, 2008']
s2008.name=excel_sheet

excel_sheet='2009'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet)
s2009=dftmp['Overall Homeless, 2009']
s2009.name=excel_sheet

excel_sheet='2010'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet)
s2010=dftmp['Overall Homeless, 2010']
s2010.name=excel_sheet

excel_sheet='2011'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2011=dftmp['Overall Homeless, 2011']
s2011.name=excel_sheet

excel_sheet='2012'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2012=dftmp['Overall Homeless, 2012']
s2012.name=excel_sheet

excel_sheet='2013'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2013=dftmp['Overall Homeless, 2013']
s2013.name=excel_sheet

excel_sheet='2014'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2014=dftmp['Overall Homeless, 2014']
s2014.name=excel_sheet

excel_sheet='2015'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2015=dftmp['Overall Homeless, 2015']
s2015.name=excel_sheet

excel_sheet='2016'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2016=dftmp['Overall Homeless, 2016']
s2016.name=excel_sheet

excel_sheet='2017'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2017=dftmp['Overall Homeless, 2017']
s2017.name=excel_sheet

excel_sheet='2018'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2018=dftmp['Overall Homeless, 2018']
s2018.name=excel_sheet

excel_sheet='2019'
dftmp = pd.read_excel('../input/homeless-in-america-version-2-20102019/2007-2019-Point-in-Time-Estimates-by-state.xlsx',index_col=0,header=[0],sheet_name=excel_sheet) 
s2019=dftmp['Overall Homeless, 2019']
s2019.name=excel_sheet

In [None]:
# combine all of them into a single dataframe, years are column headings.
# this is about what I did before for the dataframe dfbeds.
dftmp = pd.merge(s2008,s2009,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2010,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2011,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2012,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2013,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2014,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2015,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2016,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2017,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2018,left_index=True,right_index=True)
dftmp = pd.merge(dftmp,s2019,left_index=True,right_index=True)

In [None]:
dftmp.info()

In [None]:
# as I did with dfbeds, drop anything that's not recognizable as a state
state_list = ['AK','AL','AR','AZ','CA','CO','CT','DE','FL','GA',
             'HI','IA','ID','IL','IN','KS','KY','LA','MA','MD',
             'ME','MI','MN','MO','MS','MT','NC','ND','NE','NH',
             'NJ','NM','NV','NY','OH','OK','OR','PA','RI','SC',
             'SD','TN','TX','UT','VA','VT','WA','WI','WV','WY']
setA=set(state_list)
setB=set(dftmp.index)
drop_list = setB-setA
dfnohome=dftmp.drop(drop_list)

In [None]:
# transpose and rename index/column -- index is still year, a string value
dfnohome = dfnohome.T
dfnohome.index.name='year'
dfnohome.columns.name='state'
dfnohome.head()

In [None]:
dfnohome.info()

In [None]:
# those entries should have been integers above, but they're called "objects". Converting:
dfnohome=dfnohome.astype(int)

In [None]:
rng=pd.period_range('2008','2019',freq='Y') # as with dfbeds, make the entries for years a range of years, not strings
dfnohome.set_index(rng)

In [None]:
dfnohome.head()

### populations by state

In [None]:
# get the populations by state
my_cols=[4,7,8,9,10,11,12,13,14,15,16]
my_names=['state_name','2010','2011','2012','2013','2014','2015','2016','2017','2018','2019']
dftmp = pd.read_csv('../input/homeless-in-america-version-2-20102019/nst-est2019-alldata.csv',skiprows=1,usecols=my_cols,names=my_names)

In [None]:
dftmp # show first ten rows -- extraneous entries will be dropped.

In [None]:
dftmp.state_name

In [None]:
dftmp.state_name

In [None]:
# form a dictionary of abbreviations for all 50 states based on a csv file
dftmp2 = pd.read_csv('../input/homeless-in-america-version-2-20102019/state_abbrev_dict.csv',index_col=None,header=None,names=['state','abbrev'])
abbr_dict = dict(zip(dftmp2.state,dftmp2.abbrev))

# use this dictionary to convert full names of states into their abbreviated counterparts
x=dftmp.state_name # the column "state_name" gets put into a list
y=[]
for item in x:
    val = abbr_dict.get(item,'NOTASTATE') # look in dictionary "abbr_dict"and assign abbreviation or else the string "NOTASTATE"
    y.append(val)
dftmp['state']=y # make a NEW column called "state" -- it has the abbreviations and also some "NOTASTATE" entries

# now drop everthing that's not a state (e.g., Puerto Rico)
dftmp = dftmp[dftmp['state'] != 'NOTASTATE']
# now dump the 'state_name' column
dftmp.drop('state_name',axis=1,inplace=True)
# now make the state col (really the state's abbreviated name) the index
dftmp.set_index('state',inplace=True)
# see McKinney book on 'period' -- make the column names actual years, not strings that represent years
rng=pd.period_range('2010','2019',freq='Y')
dftmp.columns=rng
dfpop=dftmp.copy()

In [None]:
dfpop.head() # still needs to be transposed

In [None]:
dfpop=dfpop.T # transposing

In [None]:
dfpop.head()

### analysis
The analysis below has not been closely scrutinized lately, but at this point there are three dataframes you can work with:
dfbeds -- no. of beds available
dfnohome -- no. of homeless people
dfpop -- populations of states

In [None]:
# It would be good to know the no. of homeless normalized by each state's population.
# obviously, there are less homeless in alaska than california, but how many do we have relative to state's residents?
# get homeless and population data into same shape -- the former has two more rows (years) than the latter:
dftmp1 = dfnohome[2:].copy()
# here's another temporary datafile:
dftmp2 = dfpop.copy()
# make a copy of dftmp1, but with the intention of overwriting every value, keeping only columns and index values:
dfhomepop = dftmp1.copy()
dfhomepop=dfhomepop.astype(float) # but the values I'm replacing will be floats, so change this here

# go through each row and column, form a new value, assign it -- this is the no. of homeless per 100,000 residents:
for i in range(0,len(dftmp1)):
    for j in range(0,len(dftmp1.columns)):
        xlabel=dftmp1.index[i]
        ylabel=dftmp1.columns[j]
        homeless = dftmp1.loc[xlabel].loc[ylabel]
        statepop = dftmp2.loc[xlabel].loc[ylabel]
        x = homeless/statepop*100000.0
        dfhomepop.loc[xlabel][ylabel]=x

In [None]:
dfhomepop.tail(2)

In [None]:
dftmp3 = dfhomepop.iloc[-1] # get last year (2019) date only
dftmp4 = dftmp3.sort_values() # sort this series out
sns.set_style('whitegrid')
fig,axis=plt.subplots(nrows=1,ncols=1,figsize=(6,9))
sns.barplot(x=dftmp4.values,y=dftmp4.index,color='cyan',ax=axis)
axis.set_title('homeless per 100,000 residents (by state) (2019 only)')
axis.set_xlabel('')
axis.set_ylabel('');

In [None]:
# no. of shelter beds per homeless:
dftmp5 = dfbeds.copy()
# here's another temporary datafile:
dftmp6 = dfnohome.copy()
# make a copy of dftmp5, but with the intention of overwriting every value, keeping only columns and index values:
dfbedhome = dftmp5.copy()
dfbedhome=dfbedhome.astype(float) # but the values I'm replacing will be floats, so change this here

# go through each row and column, form a new value, assign it -- this is the no. of homeless per 100,000 residents:
for i in range(0,len(dftmp5)):
    for j in range(0,len(dftmp5.columns)):
        xlabel=dftmp5.index[i]
        ylabel=dftmp5.columns[j]
        beds = dftmp5.loc[xlabel].loc[ylabel]
        nohome = dftmp6.loc[xlabel].loc[ylabel]
        x = beds/nohome*1.0
        dfbedhome.loc[xlabel][ylabel]=x

In [None]:
dfbedhome.head()

In [None]:
dftmp7 = dfbedhome.iloc[-1] # get last year (2019) date only
dftmp8 = dftmp7.sort_values() # sort this series out
sns.set_style('whitegrid')
fig,axis=plt.subplots(nrows=1,ncols=1,figsize=(6,9))
sns.barplot(x=dftmp8.values,y=dftmp8.index,color='cyan',ax=axis)
axis.set_title('beds per homeless person (by state) (2019 only)')
axis.set_xlabel('')
axis.set_ylabel('');

In [None]:
# maybe states with the most homeless people per 100,000 residents have the lowest bed-to-homeless ratios?
s = dfhomepop.iloc[9] # get latest yr's data for homeless per 100,000
dftmp10 = pd.DataFrame(s) # convert to dataframe so merge can happen
s = dfbedhome.iloc[11] # beds per homeless as series
dftmp11 = pd.DataFrame(s) # as dataframe
dftmp12 = dftmp10.merge(dftmp11,left_index=True,right_index=True)
dftmp12.columns=['homelessper100000','bedsperhomeless'] # this is a dataframe with states as index and two columns
dftmp12.head()

In [None]:
fig,axis=plt.subplots(nrows=1,ncols=1,figsize=(6,6))
x=dftmp12.homelessper100000.astype('float')
y=dftmp12.bedsperhomeless.astype('float')
sns.regplot(x=x,y=y)
plt.xlabel('homeless per 100,000 residents')
plt.ylabel('beds per homeless person')
plt.annotate('HI',xy=(450,0.4),size=15)
plt.annotate('NY',xy=(450,0.9),size=15)
plt.annotate('OR',xy=(335,0.42),size=15)
plt.annotate('CA',xy=(337,0.28),size=15);

In [None]:
x2 = sm.add_constant(x) # if y intercept is assumed non-zero, then need this.
model = sm.OLS(y,x2).fit()
pred = model.predict(x2)
model.summary()

In [None]:
# stuff you can get from the linear model
b = model.params[0]
m = model.params[1]
rsqared=model.rsquared
rsqradj=model.rsquared_adj
pred =model.fittedvalues;
#model.params gets all of these, maybe

In [None]:
print(sm.stats.linear_rainbow.__doc__)

In [None]:
sm.stats.linear_rainbow(model) # if 2nd of these values is low (say less than 0.05, linear fit may be good)

In [None]:
# this shows that seaborn's regplot and statsmodels OLS fit both do the same thing.
fig,axis=plt.subplots(nrows=1,ncols=1,figsize=(6,6))

sns.regplot(x=x,y=y)
sns.lineplot(x=x,y=pred)

plt.xlabel('homeless per 100,000 residents')
plt.ylabel('beds per homeless person')
plt.annotate('HI',xy=(450,0.4),size=15)
plt.annotate('NY',xy=(450,0.9),size=15)
plt.annotate('OR',xy=(335,0.42),size=15)
plt.annotate('CA',xy=(337,0.28),size=15)
my_text = 'weak correlation:\nrsqradj = %.2f'%(rsqradj)
plt.annotate(my_text,xy=(200,1.2),size=15);

In [None]:
# show the three time-trending values, population, no. homeless, no. beds -- but for aggregate of 50 states
SnohomeTOT = dfnohome.sum(axis=1)
SnohomeTOT=SnohomeTOT[2:] # population data doesn't start until 2010, so truncate this item to match
SbedsTOT=dfbeds.sum(axis=1)
SbedsTOT=SbedsTOT[2:] # population data doesn't start until 2010, so truncate this item to match
SpopTOT=dfpop.sum(axis=1)
rng=pd.period_range('2010','2019',freq='Y')

fig,axes=plt.subplots(nrows=3,ncols=1,sharex=True,figsize=(6,9))
#plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

sns.lineplot(x=rng.to_timestamp(),y=SpopTOT/1E6,marker='o',ax=axes[0])
axes[0].set_ylabel('US population (millions)')

sns.lineplot(x=rng.to_timestamp(),y=SnohomeTOT,marker='o',ax=axes[1])
axes[1].set_ylabel('no. of homeless in US')

sns.lineplot(x=rng.to_timestamp(),y=SbedsTOT,marker='o',ax=axes[2])
axes[2].set_ylabel('no. of shelter beds in US')

In [None]:
# normalized by 2010 data:
SnohomeTOTNORM = SnohomeTOT/SnohomeTOT[0]
SbedsTOTNORM=SbedsTOT/SbedsTOT[0]
SpopTOTNORM=SpopTOT/SpopTOT[0]
rng=pd.period_range('2010','2019',freq='Y')

fig,axes=plt.subplots(nrows=3,ncols=1,sharex=True,figsize=(6,9))
#plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=None, hspace=0.4)

sns.lineplot(x=rng.to_timestamp(),y=SpopTOTNORM,marker='o',ax=axes[0])
axes[0].set_ylabel('US population\n(normalized to 2010 data)')

sns.lineplot(x=rng.to_timestamp(),y=SnohomeTOTNORM,marker='o',ax=axes[1])
axes[1].set_ylabel('homeless in US\n(normalized to 2010 data)')

sns.lineplot(x=rng.to_timestamp(),y=SbedsTOTNORM,marker='o',ax=axes[2])
axes[2].set_ylabel('shelter beds in US\n(normalized to 2010 data)');

In [None]:
Sratio = SbedsTOT/SnohomeTOT # overall, the no. of beds to homeless seems to be improving a bit, at least until 2018

In [None]:
Sratio

In [None]:
fig,axis=plt.subplots(nrows=1,ncols=1,figsize=(6,6))
rng=pd.period_range('2010','2019',freq='Y')
sns.lineplot(x=rng.to_timestamp(),y=Sratio,marker='o',ax=axis)
axis.set_ylabel('beds per homeless person (all of US)');

In [None]:
dfbedhome.head() # beds per homeless person

In [None]:
# do states that are blue (democrats) have a better or worse ratio than the red states (republicans)? 2019 values only
# THIS DOES NOT WORK RIGHT NOW.