In [1]:
import os
import warnings

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 1000)
pd.options.mode.chained_assignment = None

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (20.0, 10.0)
import seaborn as sns

with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)

Data source:https://www.cde.state.co.us/dropoutprevention/homeless_data

This data is commonly called "McKinney-Vento data" after the legislation that mandated the reporting and services for homeless students. The data used here is from the 2011 - 2012 school year.

In [2]:
homels_students_tmp = pd.read_excel('../../npsg_datafiles/McKinney_Vento_Data_2011-12_County_edited.xls') 

In [3]:
homels_students_tmp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 243 entries, 0 to 242
Data columns (total 7 columns):
District Code                                           243 non-null object
District Name                                           243 non-null object
Shelters, transitional housing, awaiting foster care    243 non-null int64
Doubled-up                                              243 non-null int64
Unsheltered                                             243 non-null int64
Hotels/Motels                                           243 non-null int64
TOTAL                                                   243 non-null int64
dtypes: int64(5), object(2)
memory usage: 13.4+ KB


In [4]:
homels_students_tmp.head(10)

Unnamed: 0,District Code,District Name,"Shelters, transitional housing, awaiting foster care",Doubled-up,Unsheltered,Hotels/Motels,TOTAL
0,0010,Mapleton 1,0,138,24,4,166
1,0020,Adams 12 Five Star Schools,91,1127,16,102,1336
2,0030,Adams County 14,67,823,4,40,934
3,0040,Brighton 27J,14,389,9,17,429
4,0050,Bennett 29J,0,14,0,0,14
5,0060,Strasburg 31J,0,0,0,0,0
6,0070,Westminster 50,27,1472,5,13,1517
7,ADAMS,TOTALS,199,3963,58,176,4396
8,0100,Alamosa RE-11J,17,25,5,2,49
9,0110,Sangre de Cristo RE-22J,0,26,8,0,34


In [5]:
homels_students_tmp.tail(10)

Unnamed: 0,District Code,District Name,"Shelters, transitional housing, awaiting foster care",Doubled-up,Unsheltered,Hotels/Motels,TOTAL
233,WELD,TOTALS,54,639,37,40,770
234,3200,Yuma 1,0,45,0,0,45
235,3210,Wray RD-2,1,20,0,0,21
236,3230,Liberty J-4,0,0,0,0,0
237,YUMA,TOTALS,1,65,0,0,66
238,8001,Charter School Institute,20,69,14,6,109
239,9000,Colorado School for the Deaf and the Blind,0,1,0,0,1
240,9035,Centennial BOCES,56,45,8,2,111
241,9055,San Luis Valley BOCES,9,252,36,20,317
242,MISC,TOTALS,85,367,58,28,538


In [6]:
homels_students_tmp = homels_students_tmp.loc[homels_students_tmp['District Name'] == 'TOTALS']

In [7]:
homels_students_tmp = homels_students_tmp.loc[homels_students_tmp['District Code'] != 'MISC']
homels_students_2012 = homels_students_tmp.loc[homels_students_tmp['District Code'] != 'NaN']

In [8]:
homels_students_2012.head(20)

Unnamed: 0,District Code,District Name,"Shelters, transitional housing, awaiting foster care",Doubled-up,Unsheltered,Hotels/Motels,TOTAL
7,ADAMS,TOTALS,199,3963,58,176,4396
10,ALAMOSA,TOTALS,17,51,13,2,83
18,ARAPAHOE,TOTALS,266,2492,22,269,3049
20,ARCHULETA,TOTALS,0,13,0,0,13
26,BACA,TOTALS,0,0,0,0,0
29,BENT,TOTALS,0,0,0,0,0
32,BOULDER,TOTALS,392,1092,54,61,1599
35,CHAFFEE,TOTALS,1,7,0,1,9
38,CHEYENNE,TOTALS,0,0,0,0,0
40,CLEAR CREEK,TOTALS,0,4,0,6,10


In [9]:
homels_students_2012.rename(columns=lambda x: x.replace(', ', '_'), inplace=True)
homels_students_2012.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)
homels_students_2012.rename(columns=lambda x: x.replace('/', '_'), inplace=True)
homels_students_2012.rename(columns=lambda x: x.lower(), inplace=True)
homels_students_2012.rename(columns={'district_code':'county_name'}, inplace=True)
homels_students_2012 = homels_students_2012.drop(['district_name'], axis=1)

In [10]:
homels_students_2012.columns = [col_name + '_2011_12' for col_name in homels_students_2012.columns]

In [11]:
homels_students_2012['county_name_2011_12'] = homels_students_2012['county_name_2011_12'].str.title()

In [12]:
homels_students_2012.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 63 entries, 7 to 237
Data columns (total 6 columns):
county_name_2011_12                                           63 non-null object
shelters_transitional_housing_awaiting_foster_care_2011_12    63 non-null int64
doubled-up_2011_12                                            63 non-null int64
unsheltered__2011_12                                          63 non-null int64
hotels_motels_2011_12                                         63 non-null int64
total_2011_12                                                 63 non-null int64
dtypes: int64(5), object(1)
memory usage: 3.4+ KB


In [13]:
homels_students_2012.head(20)

Unnamed: 0,county_name_2011_12,shelters_transitional_housing_awaiting_foster_care_2011_12,doubled-up_2011_12,unsheltered__2011_12,hotels_motels_2011_12,total_2011_12
7,Adams,199,3963,58,176,4396
10,Alamosa,17,51,13,2,83
18,Arapahoe,266,2492,22,269,3049
20,Archuleta,0,13,0,0,13
26,Baca,0,0,0,0,0
29,Bent,0,0,0,0,0
32,Boulder,392,1092,54,61,1599
35,Chaffee,1,7,0,1,9
38,Cheyenne,0,0,0,0,0
40,Clear Creek,0,4,0,6,10


In [14]:
homels_students_2017 = pd.read_csv('../../npsg_datafiles/homeless_students_by_county_2016_17.csv') 

In [15]:
homels_students_2017 = homels_students_2017.drop(['county_only_totals_2016_17'], axis=1)

In [16]:
homels_students_2017.head(20)

Unnamed: 0,county_name_2016_17,shelters_transitional_housing_awaiting_foster_care_2016_17,doubled-up_due_to_economic_hardship_2016_17,unsheltered_2016_17,hotels_motels_2016_17,total_2016_17
0,Adams,221.0,3195.0,67.0,125.0,3608.0
1,Alamosa,16.0,98.0,12.0,2.0,128.0
2,Arapahoe,215.0,1304.0,32.0,230.0,1781.0
3,Archuleta,0.0,4.0,2.0,0.0,6.0
4,Baca,0.0,0.0,0.0,0.0,0.0
5,Bent,4.0,9.0,3.0,1.0,17.0
6,Boulder,382.0,586.0,42.0,85.0,1095.0
7,Chaffee,0.0,22.0,7.0,22.0,51.0
8,Cheyenne,0.0,0.0,0.0,0.0,0.0
9,Clear Creek,3.0,11.0,1.0,14.0,29.0


In [17]:
homels_students_2012_2017 = pd.merge(homels_students_2012, homels_students_2017, left_on = 'county_name_2011_12', right_on = 'county_name_2016_17')

In [18]:
homels_students_2012_2017.head(20)

Unnamed: 0,county_name_2011_12,shelters_transitional_housing_awaiting_foster_care_2011_12,doubled-up_2011_12,unsheltered__2011_12,hotels_motels_2011_12,total_2011_12,county_name_2016_17,shelters_transitional_housing_awaiting_foster_care_2016_17,doubled-up_due_to_economic_hardship_2016_17,unsheltered_2016_17,hotels_motels_2016_17,total_2016_17
0,Adams,199,3963,58,176,4396,Adams,221.0,3195.0,67.0,125.0,3608.0
1,Alamosa,17,51,13,2,83,Alamosa,16.0,98.0,12.0,2.0,128.0
2,Baca,0,0,0,0,0,Baca,0.0,0.0,0.0,0.0,0.0
3,Bent,0,0,0,0,0,Bent,4.0,9.0,3.0,1.0,17.0
4,Chaffee,1,7,0,1,9,Chaffee,0.0,22.0,7.0,22.0,51.0
5,Conejos,5,12,2,0,19,Conejos,0.0,22.0,0.0,0.0,22.0
6,Costilla,5,50,6,0,61,Costilla,0.0,34.0,23.0,0.0,57.0
7,El Paso,395,2067,42,282,2786,El Paso,388.0,1069.0,121.0,333.0,1911.0
8,Garfield,3,128,1,0,132,Garfield,8.0,171.0,43.0,56.0,278.0
9,Grand,1,2,1,0,4,Grand,0.0,36.0,0.0,5.0,41.0


In [19]:
file_n = '../../npsg_datafiles/homeless_students_by_county_2012_2017.csv'
homels_students_2012_2017.to_csv(file_n, index=False)