In [1]:
import os
import warnings

import numpy as np
import pandas as pd
pd.set_option('display.max_rows', 1000)
pd.options.mode.chained_assignment = None

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = (20.0, 10.0)
import seaborn as sns

with warnings.catch_warnings():
    warnings.filterwarnings("ignore",category=DeprecationWarning)

Data source:https://www.cde.state.co.us/dropoutprevention/homeless_data

This data is commonly called "McKinney-Vento data" after the legislation that mandated the reporting and services for homeless students. The data used here is from the 2016 - 2017 school year.

The 2011 - 2012 data is not grouped by county, but could be useable after matching these two files on district name.

In [2]:
homels_students_tmp = pd.read_excel('../../npsg_datafiles/McKinney_Vento_Data_2016_17_County_edited.xlsx') 

In [3]:
homels_students_tmp.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 8 columns):
District Name                                           247 non-null object
District Code                                           247 non-null object
Shelters, transitional housing, awaiting foster care    247 non-null float64
Doubled-up due to economic hardship                     247 non-null float64
Unsheltered                                             247 non-null float64
Hotels/Motels                                           247 non-null float64
TOTAL                                                   247 non-null float64
County Only Totals                                      65 non-null float64
dtypes: float64(6), object(2)
memory usage: 15.7+ KB


In [4]:
homels_students_tmp.head(10)

Unnamed: 0,District Name,District Code,"Shelters, transitional housing, awaiting foster care",Doubled-up due to economic hardship,Unsheltered,Hotels/Motels,TOTAL,County Only Totals
0,Mapleton 1,10,6.0,109.0,6.0,9.0,130.0,
1,Adams 12 Five Star Schools,20,80.0,1329.0,13.0,41.0,1463.0,
2,Adams County 14,30,31.0,459.0,11.0,29.0,530.0,
3,Brighton 27J,40,74.0,213.0,4.0,21.0,312.0,
4,Bennett 29J,50,0.0,20.0,0.0,2.0,22.0,
5,Strasburg 31J,60,0.0,21.0,0.0,0.0,21.0,
6,Westminster 50,70,30.0,1044.0,33.0,23.0,1130.0,
7,ADAMS,TOTALS,221.0,3195.0,67.0,125.0,3608.0,3608.0
8,Alamosa RE-11J,100,16.0,69.0,1.0,1.0,87.0,
9,Sangre de Cristo RE-22J,110,0.0,29.0,11.0,1.0,41.0,


In [5]:
homels_students_tmp.tail(10)

Unnamed: 0,District Name,District Code,"Shelters, transitional housing, awaiting foster care",Doubled-up due to economic hardship,Unsheltered,Hotels/Motels,TOTAL,County Only Totals
240,YUMA,TOTALS,1.0,52.0,0.0,2.0,55.0,55.0
241,Charter School Institute,8001,18.0,92.0,7.0,9.0,126.0,
242,Colorado School for the Deaf and the Blind,9000,2.0,9.0,0.0,0.0,11.0,
243,Centennial BOCES,9035,16.0,432.0,96.0,1.0,545.0,
244,San Luis Valley BOCES,9055,22.0,278.0,49.0,21.0,370.0,
245,Colorado Digital BOCES,9170,0.0,1.0,3.0,1.0,5.0,
246,MISC,TOTALS,58.0,812.0,155.0,32.0,1057.0,1057.0
247,,,,,,,,
248,,,,,,,,
249,,,,,,,,21943.0


In [6]:
homels_students_tmp = homels_students_tmp.loc[homels_students_tmp['District Code'] == 'TOTALS']

In [7]:
homels_students_tmp = homels_students_tmp.loc[homels_students_tmp['District Name'] != 'MISC']
homels_students_2017 = homels_students_tmp.loc[homels_students_tmp['District Code'] != 'NaN']

In [8]:
homels_students_2017.rename(columns=lambda x: x.replace(', ', '_'), inplace=True)
homels_students_2017.rename(columns=lambda x: x.replace(' ', '_'), inplace=True)
homels_students_2017.rename(columns=lambda x: x.replace('/', '_'), inplace=True)
homels_students_2017.rename(columns=lambda x: x.lower(), inplace=True)
homels_students_2017.rename(columns={'district_name':'county_name'}, inplace=True)
homels_students_2017 = homels_students_2017.drop(['district_code'], axis=1)

In [9]:
homels_students_2017.columns = [col_name + '_2016_17' for col_name in homels_students_2017.columns]

In [10]:
homels_students_2017['county_name_2016_17'] = homels_students_2017['county_name_2016_17'].str.title()

In [11]:
homels_students_2017.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 63 entries, 7 to 240
Data columns (total 7 columns):
county_name_2016_17                                           63 non-null object
shelters_transitional_housing_awaiting_foster_care_2016_17    63 non-null float64
doubled-up_due_to_economic_hardship_2016_17                   63 non-null float64
unsheltered_2016_17                                           63 non-null float64
hotels_motels_2016_17                                         63 non-null float64
total_2016_17                                                 63 non-null float64
county_only_totals_2016_17                                    63 non-null float64
dtypes: float64(6), object(1)
memory usage: 3.9+ KB


In [12]:
file_n = '../../npsg_datafiles/homeless_students_by_county_2016_17.csv'
homels_students_2017.to_csv(file_n, index=False)