In [None]:
!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 4.4 MB/s 
Collecting fiona>=1.8
  Downloading Fiona-1.8.22-cp37-cp37m-manylinux2014_x86_64.whl (16.7 MB)
[K     |████████████████████████████████| 16.7 MB 298 kB/s 
[?25hCollecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 32.2 MB/s 
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: munch, cligj, click-plugins, pyproj, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.22 geopandas-0.10.2 munch-2.5.0 pyproj-3.2.1


In [None]:
# import libraries
import pandas as pd
import os
import requests
import dask
import geopandas as gpd
from datetime import datetime
from google.colab import files
from time import sleep

%matplotlib inline

#Set directory as Google Drive
wrkDir = "/content/drive/MyDrive/Native_American_Tribal_Needs_Assessment_Code"
os.chdir(wrkDir)

import warnings
warnings.filterwarnings("ignore")

**Village codes for NA villages and dataset years**

In [None]:
# village = santee, rosalie, macy, walthill, winnebego
vil_codes = [43475, 42250, 30170, 51245, 53275]
years = ["2015-2019", "2014-2018", "2013-2017", "2012-2016", "2011-2015", "2010-2014", "2009-2013", "2008-2012", "2007-2011"]

**CHAS API token**

In [None]:
# read in API token
with open("data/hud_key.txt", "r") as f:
  token = f.read().strip()

**Call for Places**

In [None]:
#create request payload
# documentation: https://www.huduser.gov/portal/dataset/chas-api.html
# token = "eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiIsImp0aSI6ImJjMzEyM2YwMmMzYWUxMzkwMDdhODNmZDE3MTU5ZjllYjE3Nzg4MWJkZTgyYjQzOWNjNTFhYmY3MTY1MzFiYThhNzcwMmRiNTc4NmJjYjc5In0.eyJhdWQiOiI2IiwianRpIjoiYmMzMTIzZjAyYzNhZTEzOTAwN2E4M2ZkMTcxNTlmOWViMTc3ODgxYmRlODJiNDM5Y2M1MWFiZjcxNjUzMWJhOGE3NzAyZGI1Nzg2YmNiNzkiLCJpYXQiOjE2NTk1NzU3OTcsIm5iZiI6MTY1OTU3NTc5NywiZXhwIjoxOTc1MTk0OTk3LCJzdWIiOiIzNzU1NyIsInNjb3BlcyI6W119.XVt6gKaq1ktOHOLkuIr1_Qy8d35qJp8b4ZOMPcfyinxVbP87HsmqWUfiCLKTkHn7lZenjFzbkMbI1dsNOdP5Hg"

def request(year_list, geo_type=None, place_list=None):

  list_dfs =  []

  for year in year_list:
    if geo_type == "place":
      for place in place_list:
        # create payload, type: 5 = place
        payload = {'type': 5, "stateId": 31, "entityId": place, "year" : year}
        
        # send request
        r = requests.get("https://www.huduser.gov/hudapi/public/chas", params = payload, headers = {"Authorization" : "Bearer " + token})

        # create dataframe
        df = pd.DataFrame.from_dict(r.json())

        # work on categories
        df["year"] = pd.to_datetime(year[-4:] + "-06-01")
        df["pl_merge"] = year[-4:]
        df["entityID"] = int(place)

        list_dfs.append(df)

    elif geo_type == "state":
      # create payload, type: 2 = state
      payload = {'type': 2, "stateId": 31, "year" : year}

      # send request
      r = requests.get("https://www.huduser.gov/hudapi/public/chas", params = payload, headers = {"Authorization" : "Bearer " + token})

      # create dataframe
      df = pd.DataFrame.from_dict(r.json())

      # work on categories
      df["year"] = pd.to_datetime(year[-4:] + "-06-01")
      df["pl_merge"] = year[-4:]

      list_dfs.append(df)

    else:
      # create payload, type: 1 = us
      payload = {'type': 1, "year" : year}

      # send request
      r = requests.get("https://www.huduser.gov/hudapi/public/chas", params = payload, headers = {"Authorization" : "Bearer " + token})

      # create dataframe
      df = pd.DataFrame.from_dict(r.json())

      # work on categories
      df["year"] = pd.to_datetime(year[-4:] + "-06-01")
      df["pl_merge"] = year[-4:]

      list_dfs.append(df)

  return list_dfs

Execute requests

In [None]:
# places
places_dfs = request(years, "place", vil_codes)
sleep(60) # api supports only 60 queries per minute, places_dfs is ~45

# states
states_dfs = request(years, "state")

# us
us_dfs = request(years)

Concat into single dataframes

In [None]:
#place
places = pd.concat(places_dfs)

#state
states = pd.concat(states_dfs)

#us
us = pd.concat(us_dfs)

Refactor dataframes

In [None]:
def update_df_cols(df, geo_name):
  #get list of columns
  columns = list(df.columns.values)[3:132] #gets columns to correct

  #coerce to numeric
  df[columns] = df[columns].apply(pd.to_numeric, errors="coerce", axis=1)

  #calculate
  geo_sev_housing = geo_name +"_sev_housing"
  df[geo_sev_housing] = round(df["C3"] / df["A18"] *100, 1)

  # refine columns
  ref_cols = ["geoname", "year", geo_sev_housing, "pl_merge"]

  if geo_name == "pl":
    # create PLACEFIPS
    df["PLACEFIPS"] = "31" + df["entityID"].astype(str)
    
    # extra columns for place
    places_ref_cols = ref_cols + ["entityID", "PLACEFIPS"]

    df = df[places_ref_cols]

  else:
    df = df[ref_cols]

  return df

In [None]:
# places
places = update_df_cols(places, "pl")

# states
states = update_df_cols(states, "st")

# us
us = update_df_cols(us, "us")

**Merge dataframes**

In [None]:
# pl <- st
places_st = places.merge(states, how="left", on="pl_merge", suffixes=(None, "_drop"))

# pl_st <- US
places_all = places_st.merge(us, how="inner", on="pl_merge", suffixes=(None, "_drop2"))

# drop columns
for col in list(places_all.columns.values):
  if col[-4:] == "drop" or col[-5:] == "drop2":
    places_all.drop(col, axis=1, inplace=True)

places_all.head(10)

Unnamed: 0,geoname,year,pl_sev_housing,pl_merge,entityID,PLACEFIPS,st_sev_housing,us_sev_housing
0,"Santee village, Nebraska",2019-06-01,22.7,2019,43475,3143475,12.2,17.0
1,"Rosalie village, Nebraska",2019-06-01,13.3,2019,42250,3142250,12.2,17.0
2,"Macy CDP, Nebraska",2019-06-01,41.3,2019,30170,3130170,12.2,17.0
3,"Walthill village, Nebraska",2019-06-01,35.1,2019,51245,3151245,12.2,17.0
4,"Winnebago village, Nebraska",2019-06-01,15.8,2019,53275,3153275,12.2,17.0
5,"Santee village, Nebraska",2018-06-01,28.0,2018,43475,3143475,12.3,17.3
6,"Rosalie village, Nebraska",2018-06-01,12.3,2018,42250,3142250,12.3,17.3
7,"Macy CDP, Nebraska",2018-06-01,45.5,2018,30170,3130170,12.3,17.3
8,"Walthill village, Nebraska",2018-06-01,30.8,2018,51245,3151245,12.3,17.3
9,"Winnebago village, Nebraska",2018-06-01,24.4,2018,53275,3153275,12.3,17.3


Final columns

In [None]:
places_all["year_int"] = places_all["year"].astype(str).str[0:4].astype(int)

# places_all["NAME"] = places_all["geoname"].str.split().str.get(0)
places_all

Unnamed: 0,geoname,year,pl_sev_housing,pl_merge,entityID,PLACEFIPS,st_sev_housing,us_sev_housing,year_int
0,"Santee village, Nebraska",2019-06-01,22.7,2019,43475,3143475,12.2,17.0,2019
1,"Rosalie village, Nebraska",2019-06-01,13.3,2019,42250,3142250,12.2,17.0,2019
2,"Macy CDP, Nebraska",2019-06-01,41.3,2019,30170,3130170,12.2,17.0,2019
3,"Walthill village, Nebraska",2019-06-01,35.1,2019,51245,3151245,12.2,17.0,2019
4,"Winnebago village, Nebraska",2019-06-01,15.8,2019,53275,3153275,12.2,17.0,2019
5,"Santee village, Nebraska",2018-06-01,28.0,2018,43475,3143475,12.3,17.3,2018
6,"Rosalie village, Nebraska",2018-06-01,12.3,2018,42250,3142250,12.3,17.3,2018
7,"Macy CDP, Nebraska",2018-06-01,45.5,2018,30170,3130170,12.3,17.3,2018
8,"Walthill village, Nebraska",2018-06-01,30.8,2018,51245,3151245,12.3,17.3,2018
9,"Winnebago village, Nebraska",2018-06-01,24.4,2018,53275,3153275,12.3,17.3,2018


**Bring in Places (Census Designated Places) geometries**

In [None]:
#bring in places geometries
places_geos = gpd.read_file("https://services.arcgis.com/P3ePLMYs2RVChkJx/arcgis/rest/services/USA_Census_Populated_Places/FeatureServer/0/query?where=PLACEFIPS%20%3D%20'3143475'%20OR%20PLACEFIPS%20%3D%20'3142250'%20OR%20PLACEFIPS%20%3D%20'3130170'%20OR%20PLACEFIPS%20%3D%20'3138750'%20OR%20PLACEFIPS%20%3D%20'3151245'%20OR%20PLACEFIPS%20%3D%20'3148900'%20OR%20PLACEFIPS%20%3D%20'3153275'&outFields=NAME,STFIPS,PLACEFIPS&outSR=4326&f=json")
# places_geos.plot()

**Create merged dataframe**

In [None]:
# merge places to geos and keep all rows
print(places_geos.shape)
places_full = places_geos.merge(places_all, how="right", on="PLACEFIPS")
print(places_full.shape)
places_full.head(5)

(7, 4)
(45, 12)


Unnamed: 0,NAME,STFIPS,PLACEFIPS,geometry,geoname,year,pl_sev_housing,pl_merge,entityID,st_sev_housing,us_sev_housing,year_int
0,Santee,31,3143475,"POLYGON ((-97.85201 42.84192, -97.85185 42.841...","Santee village, Nebraska",2019-06-01,22.7,2019,43475,12.2,17.0,2019
1,Rosalie,31,3142250,"POLYGON ((-96.50927 42.06107, -96.50932 42.060...","Rosalie village, Nebraska",2019-06-01,13.3,2019,42250,12.2,17.0,2019
2,Macy,31,3130170,"POLYGON ((-96.37321 42.12605, -96.37278 42.125...","Macy CDP, Nebraska",2019-06-01,41.3,2019,30170,12.2,17.0,2019
3,Walthill,31,3151245,"POLYGON ((-96.49161 42.15520, -96.49145 42.151...","Walthill village, Nebraska",2019-06-01,35.1,2019,51245,12.2,17.0,2019
4,Winnebago,31,3153275,"POLYGON ((-96.46833 42.24599, -96.46834 42.245...","Winnebago village, Nebraska",2019-06-01,15.8,2019,53275,12.2,17.0,2019


**Export**

In [None]:
# Export geojson
file = "data/ne_ntcna_places_chas_updated.geojson"
places_full.to_file(file)
files.download(file)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>