# Notebook info

# Package setup

## System-level setup

In [None]:
#@title
# Useful for Colab
# !pip install geopandas
# !pip install rtree
# !sudo apt-get update && apt-get install -y libspatialindex-dev
# !pip install spatiallibindex

In [62]:
# #@title
# # This setup is useful for Colab and other environments with pesky setup for Geopandas
# !apt-get install -qq curl g++ make
# !curl -L http://download.osgeo.org/libspatialindex/spatialindex-src-1.8.5.tar.gz | tar xz
# import os
# os.chdir('spatialindex-src-1.8.5')
# !make
# !make install
# !pip install rtree
# !ldconfig
# from rtree import index
# from rtree.index import Rtree

## Import libraries

In [63]:
#@title
# PATH MANAGEMENT
# OS-independent setup
import os
from pathlib import Path
# https://docs.python.org/3/library/pathlib.html
CWD = Path.cwd()
HOME = Path.home()

if USE_COLAB:
    CWD = Path(CWD / 'drive' / 'Colab Notebooks')
    print(f"We're running on Colab... alternative CWD: {CWD}")

DATA_IN = CWD / 'data_in'
DATA_OUT = CWD / 'data_out'
VIZ_OUT = CWD / "viz"

# os.chdir(HOME / 'LocalDev' / 'data-analysis-template')

print(f"""PATH SETUP: 
---
cwd is {CWD}

Refer to these paths for IO operations:
HOME: {HOME}
DATA_IN: {DATA_IN}
DATA_OUT: {DATA_OUT}
VIZ_OUT: {VIZ_OUT}
""")

PATH SETUP: 
---
cwd is /Users/riledigital/LocalDev/data-analysis-template

Refer to these paths for IO operations:
HOME: /Users/riledigital
DATA_IN: /Users/riledigital/LocalDev/data-analysis-template/data_in
DATA_OUT: /Users/riledigital/LocalDev/data-analysis-template/data_out
VIZ_OUT: /Users/riledigital/LocalDev/data-analysis-template/viz



In [None]:
# Importing analysis libraries
import datetime
import pandas as pd
import numpy as np

In [None]:
# Data Viz
# good ol matplotlib
import matplotlib.pyplot as plt
# Easier mplt's
import seaborn as sns
# Vega-Lite for interactive
import altair as alt

# high-performance plotting
# import bokeh
# from bokeh.io import output_notebook, show, output_file
# from bokeh.plotting import figure
# from bokeh.models import GeoJSONDataSource, LinearColorMapper, ColorBar
# from bokeh.palettes import brewer
# import json

In [None]:
# ML/deep learning
import sklearn as sk
# from keras.models import Sequential
# from keras import layers

In [None]:
# Geospatial stuff
# GIS
import geopandas as gpd
import fiona
import shapely.geometry
from shapely.geometry import Point

## Jupyter notebook options and viz options

In [59]:
# Options for python notebook
pd.options.display.max_rows = 1000
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [60]:
# Theming for plots + high DPI figures
%config InlineBackend.figure_format = 'retina'


# Analysis

# Load and verify data

In [61]:
## URL parsing made EZ and OO-style
import urllib
import urllib.parse as up
import requests

# Web/data input libraries
import json

## For connections to PostgreSQL
# import psycopg2


In [58]:
## US library for working with Census
import us
import census
from census import Census
c = Census(os.getenv("CENSUS_SECRET"))
# Lookup the specific table in Census API 
myList = c.acs5.get(('NAME', 'B25034_010E'),
          {'for': 'state:{}'.format(us.states.MD.fips)})

# Input data can be turned into a df
pd.DataFrame(myList)

Unnamed: 0,NAME,B25034_010E,state
0,Maryland,129556.0,24


In [1]:
# # Get a URL for county-level shapefile!
# # us.states.NY.shapefile_urls()['county']
# import zipfile
# def getZipFile(url):
#     from urllib.request import urlopen
#     from zipfile import ZipFile
#     zipurl = 'Valid URL to zip file'
#         # Download the file from the URL
#     zipresp = urlopen(url)
#         # Create a new file on the hard drive
#     tempzip = open(DATA_IN / "tempfile.zip", "wb")
#         # Write the contents of the downloaded file into the new file
#     tempzip.write(zipresp.read())
#         # Close the newly-created file
#     tempzip.close()
#         # Re-open the newly-created file with ZipFile()
#     zf = ZipFile(DATA_IN / "tempfile.zip")
#         # Extract its contents into <extraction_path>
#         # note that extractall will automatically create the path
#     zf.extractall(path = CWD / 'temp')
#         # close the ZipFile instance
#     zf.close()
# getZipFile(us.states.NY.shapefile_urls()['county'])

In [None]:
# Helper function to clean column names :-)
# https://medium.com/@chaimgluck1/working-with-pandas-fixing-messy-column-names-42a54a6659cd
def clean_column_names(dataframe:DataFrame)->DataFrame:

# df.columns = df.columns.str.strip().str.lower().str.replace(' ', '_').str.replace('(', '').str.replace(')', '')

# Visualization

In [None]:
df['geometry'] = list(zip(df.longitude, df.latitude))

In [None]:
%%time
%store
df['geometry'] = df['geometry'].apply(Point)
gdf = gpd.GeoDataFrame(df, geometry='geometry')
gdf.sample(4)
print(gdf.crs)

In [None]:
CRS_WGS = {'init': 'epsg:4326', 'no_defs': True}
CRS_LA = {'init': 'epsg:2229'}

In [None]:
gdf.crs = CRS_WGS

In [None]:
gdf.crs

In [None]:
airbnb_crs_la = gdf.to_crs(crs=CRS_LA)

In [None]:
gdf.plot(figsize=(10, 50), color='teal', markersize=5)


In [None]:
airbnb_crs_la.plot(figsize=(10, 50), color='pink', markersize=5)

In [None]:
#neighborhoods base file

neighborhoods=gpd.read_file("/content/drive/My Drive/Colab Notebooks/IFX_Final_HomelessnessLA/data_in/LA-Neighborhoods.geojson")

In [None]:
neighborhoods.crs

In [None]:
neighborhoods_crs_la =  neighborhoods.to_crs(crs=CRS_LA)

In [None]:
neighborhoods_crs_la

In [None]:
neighborhoods_crs_la.crs

In [None]:
neighborhoods_crs_la.plot()

In [None]:
print(f'neighborhood:{neighborhoods_crs_la.crs}, airbnb:{airbnb_crs_la.crs}')

In [None]:
#43560 feet = 1 acre
neighborhoods_crs_la['neighborhood_acres'] = neighborhoods_crs_la['geometry'].area / 43560

In [None]:
#might have to restart kernel
%%time
%store
airbnb_la = gpd.sjoin(
    airbnb_crs_la,
    neighborhoods_crs_la,
    how='inner')

In [None]:
airbnb_la.head()

In [None]:
%%time
%store
## Count points in neighborhood
point_counts = airbnb_la.groupby(['neighbourhood_left'], as_index=False).count()


In [None]:
point_counts.head()

In [None]:
point_counts['points_in_neighborhood'] = point_counts['index_right']


In [None]:
# ## Join points to the original 
df2 = airbnb_la.merge(
    point_counts[['neighbourhood_left', 'points_in_neighborhood', 'neighborhood_acres']],
    on='neighbourhood_left'
    ).drop(['index_right', 'neighbourhood_group', 'neighbourhood_right'], axis='columns')


In [None]:
df2.head()

In [None]:
df2['units_per_area'] = df2['points_in_neighborhood'] / df2['neighborhood_acres_x']

In [None]:
df2.sort_values('units_per_area',ascending=False).head()
summary_unit_density = df2.groupby(['neighbourhood_left'], as_index=False).mean().sort_values(['units_per_area'], ascending=False)


In [None]:
summary_unit_density.head(10)

In [None]:
summary_unit_density.to_csv('/content/drive/My Drive/Colab Notebooks/IFX_Final_HomelessnessLA/data_out/airbnb_density_la.csv')

# Export/saving
- Use the convenience variable DATA_OUT and VIZ_OUT to store/save information