# Park Crime Analysis

This notebook is a self-contained tool for loading data and setting up interaction for exploring which park vicinities have the most reported crimes for a given year.

In [1]:
%matplotlib inline
from IPython.display import display, IFrame

import os

import pandas as pd
import geopandas as gpd
import folium
from folium.plugins import HeatMap

from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets

## Helper Functions

### Data Preprocessor

This function's purpose is to remove fields we're not interested in and to reproject the geometry to a CRS we want to work in.

In [2]:
def data_preprocessing(gdf, cols, epsg_code):
    gdf = gdf[cols]
    new_crs = {'init': 'epsg:{}'.format(epsg_code)}
    gdf = gdf.to_crs(new_crs)
    return gdf

### Multiple Geometry Field Setup

This fucntion sets up a GeoDataFrame so that one can more easily work with multiple geometry columns.

In [3]:
def setup_for_multiple_geometry_fields(gdf):
    gdf['geom'] = gdf['geometry']
    gdf = gdf.set_geometry('geom')
    gdf = gdf.drop('geometry', axis = 1)
    return gdf

### Load result map as iFrame

There's something weird with Folium, JupyterLab, Windows, and/or Chrome where maps with lots of features, but few enough that Leaflet should be able to handle, do not render at all. There's a suggestion in the [Folium issues](https://github.com/python-visualization/folium/issues/812#issuecomment-437483792) that suggests exporting the result to and HTML file and loading it into an iFrame.

In [4]:
def embed_map(m, height):
    from IPython.display import IFrame

    m.save('index.html')
    return IFrame('index.html', width='100%', height='{}px'.format(height))

## Load Data

### Parks

In [5]:
parks_read_gdf = gpd.read_file('https://opendata.arcgis.com/datasets/43b5d6bf9d6e400599498d052545d331_0.geojson')

In [6]:
parks_gdf = data_preprocessing(parks_read_gdf, ['PARKID', 'NAME', 'DEVELOPED', 'INITIAL_AQUISITION_DATE', 'geometry'], 2264)
parks_gdf = setup_for_multiple_geometry_fields(parks_gdf)
parks_gdf['acquisition_year'] = parks_gdf.apply(lambda row: -1 if row['INITIAL_AQUISITION_DATE'] is None else int(row['INITIAL_AQUISITION_DATE'].split('-')[0]), axis = 1)
parks_gdf.head()

Unnamed: 0,PARKID,NAME,DEVELOPED,INITIAL_AQUISITION_DATE,geom,acquisition_year
0,34,Windemere Beaver Dam,Developed,1950-01-01T00:00:00,(POLYGON ((2097510.461880576 750949.1384854497...,1950
1,35,Walnut Creek North,Developed,1955-01-01T00:00:00,(POLYGON ((2126075.468296753 733175.2605656629...,1955
2,1,Thornton Road Property,Undeveloped,1998-10-07T00:00:00,"POLYGON ((2137024.335360867 783502.8727651152,...",1998
3,2,Mary Belle Pate,Undeveloped,2009-08-31T00:00:00,"POLYGON ((2095040.46173832 728970.3213452235, ...",2009
4,3,Eliza Pool,Developed,1996-02-29T00:00:00,(POLYGON ((2106009.598623645 731174.2719958539...,1996


### Crime (NIBRS)

In [7]:
# crimes_read_gdf = gpd.read_file('https://opendata.arcgis.com/datasets/24c0b37fa9bb4e16ba8bcaa7e806c615_0.geojson')
crimes_read_gdf = gpd.read_file('crimes_nibrs_20190402.geojson')

In [8]:
crimes_gdf = crimes_read_gdf[crimes_read_gdf.geometry.notnull()]
crimes_gdf = crimes_gdf[crimes_gdf['reported_date'].notnull()]
crimes_gdf['case_incident_id'] = crimes_gdf.apply(lambda row: "{}-{}".format(row['case_number'], str(row['OBJECTID'])), axis = 1)
crimes_gdf = data_preprocessing(crimes_gdf, [
    'case_incident_id', 'case_number', 'crime_category', 'crime_code', 'crime_type',
    'reported_date', 'geometry'
], 2264)
crimes_gdf['reported_year'] = crimes_gdf.apply(lambda row: int(row['reported_date'].split('-')[0]), axis = 1)
crimes_gdf.head()

Unnamed: 0,case_incident_id,case_number,crime_category,crime_code,crime_type,reported_date,geometry,reported_year
18180,P18009197-30509,P18009197,FRAUD,56A,CRIMES AGAINST PROPERTY,2018-02-19T22:28:00,POINT (2109454.101178873 734711.4020341579),2018
23390,P18004894-25636,P18004894,ASSAULT,25E,CRIMES AGAINST PERSONS,2018-01-28T08:18:00,POINT (2100772.629749156 764919.7760446103),2018
24864,P18006235-37323,P18006235,ASSAULT,25E,CRIMES AGAINST PERSONS,2018-02-04T03:16:00,POINT (2133259.957267007 778806.5223847849),2018
33163,P18011080-46792,P18011080,VANDALISM,80B,,2018-03-01T17:29:00,POINT (2137413.081753709 762926.4664529575),2018
44539,P17008917-57362,P17008917,LARCENY FROM MV,35F,CRIMES AGAINST PROPERTY,2017-02-17T13:16:00,POINT (2117889.356894409 767643.8970530435),2017


## Crime counts near parks

In [26]:
def crimes_near_parks(d = 300, since = 2019, result_length = 4):
    parks_gdf['buffer'] = parks_gdf.apply(lambda row: row['geom'].buffer(d), axis = 1)
    parks_buffer_gdf = parks_gdf.set_geometry('buffer')

    crimes_since_gdf = crimes_gdf[crimes_gdf['reported_year'] >= since]  

    near_park_crimes = gpd.sjoin(crimes_since_gdf, parks_buffer_gdf, 'left', 'within')
    near_park_crimes = near_park_crimes[near_park_crimes['PARKID'].notnull()]
    near_park_crimes = near_park_crimes[['case_incident_id', 'case_number', 'crime_category', 'crime_code', 'crime_type', 'reported_date', 'reported_year', 'PARKID', 'NAME', 'geometry']]
    near_park_crimes = near_park_crimes.rename(columns = {'PARKID': 'parkid', 'NAME': 'park_name'})
    near_park_crimes = gpd.GeoDataFrame(near_park_crimes, geometry = 'geometry', crs = {'init': 'epsg:2264'})


    # Get top-n parks
    near_park_crimes_counts = pd.DataFrame(near_park_crimes['parkid'].value_counts()).head(result_length)
    near_park_crimes_counts = near_park_crimes_counts.reset_index() 
    near_park_crimes_counts = near_park_crimes_counts.rename(columns = {'index': 'parkid', 'parkid': 'Crimes since {}'.format(since)})
    near_park_crimes_counts = pd.merge(near_park_crimes_counts, parks_gdf[['PARKID', 'NAME']], left_on = 'parkid', right_on = 'PARKID', how = 'left')
    near_park_crimes_counts = near_park_crimes_counts[['PARKID', 'NAME', 'Crimes since {}'.format(since)]]
    near_park_crimes_counts = near_park_crimes_counts.rename(columns = {'NAME': 'Park'})


    # Get Crimes in top-n parks
    top_n_parks = parks_buffer_gdf[parks_buffer_gdf['PARKID'].isin(near_park_crimes_counts['PARKID'])]
    top_n_park_crimes = gpd.sjoin(near_park_crimes, top_n_parks, 'left', 'within')
    top_n_park_crimes = top_n_park_crimes[top_n_park_crimes['PARKID'].notnull()]
    top_n_park_crimes = top_n_park_crimes[['case_incident_id', 'case_number', 'crime_category', 'crime_code', 'crime_type', 'reported_date', 'reported_year', 'geometry']]
    top_n_park_crimes = gpd.GeoDataFrame(top_n_park_crimes, geometry = 'geometry', crs = {'init': 'epsg:2264'})

    top_n_park_unique_crimes = top_n_park_crimes.groupby('case_incident_id').first()
    top_n_park_unique_crimes = top_n_park_unique_crimes.reset_index()
    top_n_park_unique_crimes = top_n_park_crimes[['case_incident_id', 'case_number', 'reported_date', 'reported_year', 'geometry']]
    top_n_park_unique_crimes = gpd.GeoDataFrame(top_n_park_unique_crimes, geometry = 'geometry', crs = {'init': 'epsg:2264'})
    top_n_park_unique_crimes = top_n_park_unique_crimes.to_crs({'init': 'epsg:4326'})

    top_n_parks = top_n_parks.drop(['buffer'], axis = 1)
    top_n_parks = top_n_parks.rename(columns = {'geom': 'geometry'})
    top_n_parks = top_n_parks.set_geometry('geometry')
    top_n_parks = top_n_parks.to_crs({'init': 'epsg:4326'})

    # Map near park crimes
    m = folium.Map(location = [35.779591, -78.638176], zoom_start = 13, tiles = 'CartoDB dark_matter')
    
    park_style = {
        'fillOpacity': 0,
        'weight': 1,
        'opacity': 1,
        'color': 'green'
    }
    folium.GeoJson(top_n_parks, style_function = lambda x: park_style).add_to(m)

    bounds = folium.GeoJson(top_n_park_unique_crimes).get_bounds()

    top_n_park_unique_crimes_locations = []
    for index, row in top_n_park_unique_crimes.iterrows():
        top_n_park_unique_crimes_locations.append([row.geometry.y, row.geometry.x])

    HeatMap(top_n_park_unique_crimes_locations, radius = 8, blur = 15, min_opacity = 0.1, gradient = {0.2: '#FFC107', 0.8: '#FBC02D', 0.9: '#FFF176', 1: '#ffffff'}).add_to(m)

    m.fit_bounds(bounds)
    
    return display(near_park_crimes_counts[['Park', 'Crimes since {}'.format(since)]]), display(embed_map(m, 600))
    
buffer_slider =  widgets.IntSlider(min = 0, max = 2640, step = 100, value = 300, description = 'Park vicinity distance (ft)', style = {'description_width': 'initial'})
crimes_since_slider =  widgets.IntSlider(min = 2014, max = 2019, step = 1, value = 2019, description = 'Crimes since (year)', style = {'description_width': 'initial'})
result_length_input = widgets.BoundedIntText(value = 3, min = 1, max = 50, step = 1, description = 'Top-n results', disabled = False)

output = widgets.interact_manual(crimes_near_parks, d = buffer_slider, since = crimes_since_slider, result_length = result_length_input)

interactive(children=(IntSlider(value=300, description='Park vicinity distance (ft)', max=2640, step=100, styl…