In [1]:
import pandas as pd

from shapely.geometry import Point

import geopandas
from geopandas import GeoSeries
from geopandas.tools import sjoin

import folium

import sys

if not sys.warnoptions:
    import warnings
    warnings.simplefilter("ignore")

In [2]:
# Read crime data into memory from URL

crime_data = pd.read_csv('https://public.tableau.com/views/PPBOpenDataDownloads/CrimeData-2018.csv?:showVizHome=no')

# Drop any rows not containing lat/long information

crime_data.dropna(subset = ['OpenDataLat', 'OpenDataLon'], inplace = True)

# Drop duplicate case numbers

crime_data.drop_duplicates(subset = 'CaseNumber', keep = 'last', inplace = True)

In [3]:
# Read geocoded MAX station data into memory from
# previously exported csv

stations = pd.read_csv('stations_geocoded.csv', header = None)

# Drop dupes

stations.drop_duplicates(inplace = True)

# Rename columns, given that the geocoding
# operation had to be performed on headerless csv files

stations.rename({0 : 'name',
                 1 : 'latitude',
                 2 : 'longitude'},
                 axis = 1,
                 inplace = True)

In [4]:
# Zip lat/long coordinates together into lists

station_locations = GeoSeries([Point(x, y) for x, y in zip(stations.latitude, stations.longitude)])
crime_locations = GeoSeries([Point(x, y) for x, y in zip(crime_data.OpenDataLat, crime_data.OpenDataLon)])

In [5]:
# Create buffer polygons (circles) with a radius
# of .0014 degrees (~154 meters, or 500 feet)

buffers = station_locations.buffer(.0014)

# Export buffers as a geoJSON file

buffers.to_file(filename='polys.geojson', driver='GeoJSON')

# Export zipped crime coordinates as a shapefile

crime_locations.to_file(filename='points.shp')

In [6]:
# Read into memory the exported files, resulting
# in the creation of geodataframes on which
# our "point-in-polygon" calculations can be performed

polys = geopandas.read_file('polys.geojson')
points = geopandas.read_file('points.shp')

In [7]:
# Perform "point-in-polygon" calculations,
# which is to say: find which crimes occured within
# the pre-defined buffers encompassing station locations

pointsInPolys = sjoin(points, polys, how='inner', op = 'within')

In [8]:
# Create a groupby object on which counts can be aggregated,
# which is to say: find the crime counts, by station

grouped = pointsInPolys.groupby('index_right')

In [9]:
# Create dataframe from count aggregations

crime_count = pd.DataFrame(grouped.id_right.count())

# Reset index

crime_count.reset_index(inplace = True)

# Rename columns so that we can join crime count
# dataframe to station dataframe

crime_count.rename({'index_right' : 'station_id',
             'id_right' : 'count'},
             axis = 1,
             inplace = True)

# Cast station_id as int instead of float

crime_count['station_id'] = crime_count['station_id'].astype(int)

In [10]:
# Reset index

stations.reset_index(inplace = True)

# Rename index column so that we can join station
# dataframe to crime count dataframe

stations.rename({'index' : 'station_id'},
                axis = 1,
                inplace = True)

In [11]:
# Join the two dataframes

crime_count_by_station_id = pd.merge(crime_count, stations, on = 'station_id')

In [12]:
# Export data to be used in development of web app

crime_count_by_station_id.to_csv('crime_count.csv')

In [13]:
# Set basemap

crime_map = folium.Map(location = [45.54, -122.6750],
                       zoom_start = 11,
                       tiles = 'stamentoner')
 
# Iteratively add circle markers to map

for i in range(0, len(crime_count_by_station_id)):
    
   folium.Circle(
       
      location = [crime_count_by_station_id.iloc[i]['latitude'],
                  crime_count_by_station_id.iloc[i]['longitude']],
      tooltip = "Crimes within ~500 ft in 2019 (YTD): " + str(crime_count_by_station_id.iloc[i]['count']),
      popup = crime_count_by_station_id.iloc[i]['name'],
      radius = str(crime_count_by_station_id.iloc[i]['count']),
      color = 'crimson',
      fill = True,
      fill_color = 'crimson'
       
   ).add_to(crime_map)

crime_map