In [None]:
# Import some standard modules

import numpy as np
import matplotlib.pylab as plt

%matplotlib notebook

import hmis

from datetime import datetime

import seaborn as sn
sn.set()

# Get the data and look at it

In [None]:
# Get the data!

data = hmis.read_dictionary_file('CARESNY_data_NEW.pkl')

In [None]:
# How many entries are there? 
# Just check the length!

print(len(data))
print(type(data))

In [None]:
# Inspect the first record. Remember, counting starts at 0!

data[1500]

In [None]:
# Inspect the first 10 records!

hmis.pretty_print(data[0:10])

In [None]:
# Can get a list of the project types

ptypes = hmis.project_types()
nptypes = len(ptypes)

print(nptypes)
print(ptypes)

# Data science!

In [None]:
person = data[16779]

person["Programs"]

In [None]:
# Look at the records of people who have more than 30 recorded projects
# with which they are associated

for i,person in enumerate(data):
    if len(person["Programs"])>35:
        hmis.pretty_print(person)
        print(i,person["DOB"],person["Personal ID"])

Let's now try to answer a very specific question:

*What is the distribution of ages of people who have stayed at least 1 night in an emergency shelter?*

We would want to look at this as a function of year, because people age. :)

In [None]:
# Make a list of the years we want to look at
years = [2011,2012,2013,2014,2015,2016]

# Define the project of interest
#project_of_interest = 'Emergency Shelter'
project_of_interest = 'PH - Rapid Re-Housing'

# Let's store the information in a dictionary
ages = {}

# Loop over the years. 
# Note that by using "enumerate" we also have a "counter" in "i"
for i,year in enumerate(years):
    
    print(year)
    
    # For each year, we will create a list into which we will put the ages of people.
    key = str(year) # They key needs to be a string
    ages[key] = []
    
    # Loop over each person in our data
    for person in data:
        
        # Loop over every program they've been in 
        for program in person["Programs"]:
            
            # Get the date they were in that program
            date = hmis.get_date_from_string(program['Admission date'])

            # If they were in the project we're interested in during the year that we're looping over...
            if program["Project type"] == project_of_interest and date.year==year:
                
                # Get their age at the end of that year
                dob = hmis.get_date_from_string(person['DOB'])
                age = datetime(year,12,31)-dob
                age = age.days/365. # Convert to years
                
                ages[key].append(age)

                break 
                # We can break out of the loop once we find one instance
                # This saves us some computing time and makes sure we don't 
                # double/triple/etc-count ages

In [None]:
#ages["2014"]

In [None]:
# Once we've got the data in our ages dictionary, we can do all sorts of cool
# plotting stuff with it!

# Pull out the number of people in each year
num = []
for year in years:
    key = str(year)
    num.append(len(ages[key]))
    
plt.figure()
plt.plot(years,num,'o',markersize=20)
plt.xlabel('Year',fontsize=18)
plt.ylabel('# of people',fontsize=18)
plt.title('# of people who used an Emergency Shelter at least once',fontsize=12)


In [None]:
# Get the ages and plot them in a box plot

values = []
for year in years:
    key = str(year)
    x = np.array(ages[key])
    x = x[x<90]
    values.append(x)

plt.figure()
plt.boxplot(values,labels=years)

plt.xlabel('Year',fontsize=18)
plt.ylabel('Age (years)',fontsize=18)
plt.title('Age of people who used an Emergency Shelter at least once',fontsize=12)


In [None]:
# Violin plot
# This shows a bit more of the distribution of data

plt.figure()

# We have to cut out the entries that are 0
plt.violinplot(values[1:],years[1:])

plt.xlabel('Year',fontsize=18)
plt.ylabel('Age (years)',fontsize=18)
plt.title('Age of people who used an Emergency Shelter at least once',fontsize=12)


In [None]:
# Histograms!

# We could do each histogram by hand...
plt.figure()
plt.subplot(2,2,1)
plt.hist(values[2],label='2013')
plt.legend()

In [None]:
#...but better to automate things in a loop

plt.figure(figsize=(10,8))

for i in range(2,6):
    plt.subplot(2,2,i-1)
    label = str(2011+i)
    plt.hist(values[i],label=label,bins=25,range=(0,80))
    plt.ylim(0,200)
    plt.legend()
    
plt.savefig('plots.png')

In [None]:
# Plot the average ages for each year

for v in values:
    print(np.mean(v))

When you're done with this, go back and look at the distributions for another Project Type like 'Transitional Housing'.

# Maps

In [None]:
zipcodes = {}
zipcodes["Albany"] = [12023,12193,12202,12203,12204,12205,12206,12041,
12207,12208,12209,12045,12210,12046,12211,12047,
12053,12054,12059,12233,12234,12067,12077,12083,
12250,12084,12085,12087,12260,12303,12107,12304,
12110,12309,12120,12128,12143,12147,12158,12159,
12161,12007,12009,12460,12183,12186,12189,12469]

In [None]:
import folium
import zipcode

# Albany
lat = 42.65
lon = -73.75

mymap = folium.Map(location=[lat,lon],
                   zoom_start=10,
                   tiles='Stamen Terrain')

folium.Marker([lat,lon], popup='Albany, NY').add_to(mymap)

# Add markers for the different zip code centers
for zc in zipcodes["Albany"]:
    myzip = zipcode.isequal(str(zc))
    lat, lon = myzip.lat, myzip.lon
    name = "%s npeople: %d" % (str(zc),np.random.randint(45))
    folium.Marker([lat,lon], popup=name).add_to(mymap)


# Display the map
mymap

In [None]:
12210 in zipcodes["Albany"]