In [None]:
import housing_cluster_visualisations
import employment_hotspots
import housing_insecurity as housing_helper
from helper_functions import ACS_helper

In [None]:
import pickle
import os
import json

# TODO: Add your [Census API Key](sus.gov/data/key_signup.html)

In [None]:
census_key = #### ADD YOUR CENSUS API KEY HERE

# Clustering Census Tracts based on Housing Insecurity

## Creating input for US cities

city:_city name_   
state_abbrv:_state abbreviation (lower case)_  
state:_state name_  
state_fips:_FIPS code for state_  
counties:[_list of FIPS codes for all counties in city_]

## Example:
city:boston  
state_abbrv:ma  
state:massachusetts  
state_fips:25  
counties:['025']  

### The following cities are those that have data in ```data/input/< city >_input.txt```

In [None]:
cities = ['jacksonville',
 'new_york_city',
 'las_vegas',
 'albuquerque',
 'hartford',
 'phoenix',
 'greenville',
 'dallas',
 'st_louis',
 'bridgeport',
 'columbus',
 'charleston',
 'kansas_city',
 'gainesville',
 'fort_worth',
 'houston',
 'san_francisco',
 'cleveland',
 'new_orleans',
 'boston',
 'philadelphia',
 'cincinnati',
 'memphis',
 'milwaukee']

## Eviction data for these cities is provided by The Eviction Lab and can be found in ```data/eviction_lab_data.csv```

In [None]:
evictions_df = {city:None for city in cities}

### ```housing_insecurity.py``` consists of four steps:
1. Download relevant housing characteristics from US Census
2. Nested
    * By default, downloads ACS 5-Year Estimates from 2020  
3. Preprocess housing and eviction data
    * If eviction data frame is not passed, filters Eviction Lab data to only include tract-level evictions for the focal city 
    * Removes census tracts that either...
        + Have a reported population of 0
        + Have at least one feature pertaining to Housing Affordability dimensions (Rent Burden, Mortgage, Housing Stock)
    * Standardise housing data
    * Apply spectral clustering
        + Estimate appropriate number of clusters using spectral gap of eigenvalues
        + Rank clusters based on median values of housing characteristics
        + Partition clusters into 3 groups: less, mildly, and most vulnerable to housing insecurity
    

In [None]:
for city in cities:
    print(city)
    input_path = 'data/input/'+city+'_input.txt'
    
    housing_helper.main([census_key, input_path, evictions_df[city]])

## Once your housing clusters have been generated and saved to the ```housing clusters/``` directory, we can visualise our results

In [None]:
save_viz = True
for city in (cities):
    print(city)
    input_path = 'data/input/'+city+'_input.txt'
    housing_cluster_visualisations.main([census_key, input_path, None, save_viz])

## Finally, we can use the LEHD-Origin Destination Employment Statistics data to identify employment hotspots for each housing demographic

In [None]:
cities = [f[:-4] for f in os.listdir('housing_clusters/') if 'ipynb' not in f]
for city in cities:
    input_path = 'data/input/'+city+'_input.txt'

    gsa,lsa = employment_hotspots.main([census_key, input_path])