# Open Tools for Urban, Regional, and Neighborhood Science

**A Gentle Introduction to `geosnap` for Neighborhood Analytics**

https://github.com/oturns/geosnap

### Resources:

- [API Docs](https://oturns.github.io/geosnap/api.html)
- [Tutorial Materials](https://github.com/knaaptime/pysal-narsc22/)
- [Tutorial Video](https://www.youtube.com/watch?v=4AHJVMs7iH4)

`geosnap` - the geospatial neighborhood analysis package - provides a suite of tools for understanding the composition and extent of [endogenous] neighborhoods and regions in a study area. It provides: 

 - simple access to commonly-used datasets in urban and regional analyses (https://open.quiltdata.com/b/spatial-ucr)
     - demographic data (Census/ACS)
     - employment (LEHD)
     - environment (EPA)
     - travel infrastructure (OSM)
     - public education systems (NCES)
     - educational acievement (SEDA)
     - land use/land cover (NLCD)

 - an easy interface to build geodemographic typologies
     - classic aspatial typologies
     - constrained homogenous regions

 - built-in functionality to facilitate spatiotemporal analysis
     - within time-period standardization
     - boundary harmonization
     - inflation adjustment

 - bespoke plotting tools to help visualize neighborhood dynamics
    - temporally-static choropleth mapping
    - animated mapping

 - state-of-the-art techniques for modeling neighborhood change over time
     - spatial Markov transition models
     - sequence analysis

In [None]:
from geosnap import DataStore
from geosnap.io import get_census, get_acs, get_nces
import geopandas as gpd
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## Datasets

In [None]:
datasets = DataStore("data/geosnap/")

In [None]:
dir(datasets)

In [None]:
datasets.msas()

In [None]:
datasets.msas().plot()

In [None]:
datasets.msas().explore()

In [None]:
counties = ['06075', '06001']

In [None]:
get_acs?

In [None]:
acs_teens = get_acs(datasets, county_fips=counties, years=[2016,2017,2018], level='tract')

In [None]:
acs_teens.head()

In [None]:
acs_teens[acs_teens.year==2018].explore('p_nonhisp_white_persons', scheme='quantiles', tiles='CartoDB Positron', cmap='GnBu')

In [None]:
acs_teens.year.unique()

In [None]:
acs_teens[acs_teens.year==2018].shape

In [None]:
for year in acs_teens.year.unique():
    print(acs_teens[acs_teens.year==year].shape[0])

In [None]:
census = get_census(datasets, county_fips=counties)

In [None]:
census.year.unique()

In [None]:
for year in census.year.unique():
    print(census[census.year==year].shape[0])

## Standardizing Boundaries Over Time

In [None]:
from geosnap.harmonize import harmonize

In [None]:
harmonize?

In [None]:
columns = ['median_household_income', 
           'median_home_value', 
           'p_asian_persons', 
           'p_hispanic_persons', 
           'p_nonhisp_black_persons', 
           'p_nonhisp_white_persons']

In [None]:
census = harmonize(census.set_index('geoid'), 
                   target_year=2010, 
                   intensive_variables=columns, 
                   extensive_variables=['n_total_pop'])

In [None]:
census = census.reset_index()

In [None]:
for year in census.year.unique():
    print(census[census.year==year].shape[0])

In [None]:
census[census.year==1990].explore('p_nonhisp_white_persons', scheme='quantiles', k=6, cmap='GnBu')

## Geodemographic Neighborhood Types

In [None]:
from geosnap.analyze import cluster, transition
from geosnap.visualize import plot_timeseries, plot_transition_matrix, animate_timeseries

In [None]:
import seaborn as sns

In [None]:
acs18 = acs_teens[acs_teens.year==2018]

In [None]:
for col in columns:
    ax = acs18.plot(col, scheme='quantiles', k=6, )
    ax.set_title(col)

In [None]:
sf18_kmeans = cluster(acs18, columns=columns, method='kmeans', n_clusters=6)

In [None]:
sf18_kmeans.head()

In [None]:
from geosnap import visualize as gvz

In [None]:
gvz.plot_violins_by_cluster?

In [None]:
sns.set_style('whitegrid')

In [None]:
gvz.plot_violins_by_cluster(sf18_kmeans, columns, cluster_col='kmeans', violin_kwargs={'cmap':'Set1'}, nrows=3, ncols=2)

In [None]:
sf18_kmeans.explore('kmeans', categorical=True, tiles='Stamen Toner Lite', tooltip=columns)

In [None]:
sf18_ward = cluster(acs18, columns=columns, method='ward', n_clusters=6,)

In [None]:
sf18_ward.explore('ward', categorical=True, tiles='Stamen Toner Lite', tooltip=columns)

In [None]:
sf18_kmeans10 = cluster(acs18, columns=columns, method='kmeans', n_clusters=10)

In [None]:
gvz.plot_violins_by_cluster(sf18_kmeans10, columns, cluster_col='kmeans', nrows=3, ncols=2)

In [None]:
sf18_kmeans10.explore('kmeans', categorical=True, tiles='Stamen Toner Lite', tooltip=columns)

In [None]:
from geosnap.analyze import regionalize

In [None]:
sf_regions = regionalize(acs18, 
                         columns=columns, 
                         method='ward_spatial', 
                         n_clusters=10)

In [None]:
sf_regions.explore('ward_spatial', categorical=True, tiles='Stamen Toner Lite', tooltip=columns)

In [None]:
gvz.plot_violins_by_cluster(sf_regions, columns, 'ward_spatial', nrows=3, ncols=2)

## Modeling Neighborhood Change

In [None]:
sf_types_kmeans = cluster(acs_teens, columns=columns, method='kmeans', n_clusters=6)

In [None]:
plot_timeseries?

In [None]:
import contextily as ctx

In [None]:
plot_timeseries(sf_types_kmeans, column='kmeans', categorical=True, ncols=3, nrows=1, figsize=(15, 5), ctxmap=ctx.providers.CartoDB.Positron)

In [None]:
animate_timeseries(
    sf_types_kmeans,
    column="kmeans",
    categorical=True,
    filename="sf_kmeans.gif",
    figsize=(12, 8),
    fps=3,
    ctxmap=ctx.providers.CartoDB.Positron,
)

In [None]:
from IPython.display import Image

In [None]:
Image('sf_kmeans.gif')

In [None]:
spmk = transition(sf_types_kmeans, cluster_col='kmeans')

In [None]:
type(spmk)

In [None]:
spmk.p

In [None]:
spmk.LR_p_value  # strong evidence of spatial dependence in transition dynamics

In [None]:
plot_transition_matrix(sf_types_kmeans, cluster_col='kmeans')

## Accessibility Isochrones

In [None]:
import pandana as pdna

https://github.com/udst/pandana

In [None]:
sfnet = pdna.Network.from_hdf5("data/41860.h5")

In [None]:
from geosnap.analyze import isochrone, isochrones

In [None]:
from geopandas.tools import geocode

In [None]:
import geopy
geopy.geocoders.options.default_user_agent = "geosnap-demo"

In [None]:
berkeley = geocode('university of california, berkeley', provider='Nominatim', user_agent="geosnap")

In [None]:
berkeley.explore()

In [None]:
isochrone?

In [None]:
# must provide the nearest node_id to the isochrone function
sfnet.get_node_ids(berkeley.geometry.x, berkeley.geometry.y)

In [None]:
iso_berkeley = isochrone(sfnet.get_node_ids(berkeley.geometry.x, berkeley.geometry.y)[0], sfnet, [500, 1000, 2000, 5000])

In [None]:
iso_berkeley.explore()

In [None]:
isochrones?

In [None]:
schools = datasets.nces(dataset='schools')

In [None]:
schools.plot()

In [None]:
sf_schools = schools[schools.intersects(census.to_crs(schools.crs).unary_union)]

In [None]:
sf_schools.plot()

In [None]:
iso_2km = isochrones(sf_schools.head(20), network=sfnet, threshold=2000)

In [None]:
m = sf_schools.head(20).explore()
iso_2km.explore(m=m)