# Seattle Collisions - Deployment

The final chapter here is deployment and the data product.  The end goal is to have this hosted in nbviewer as a dashboard.

In [8]:
from fbprophet.serialize import model_to_json, model_from_json
from fbprophet.plot import plot_cross_validation_metric 
from fbprophet.diagnostics import performance_metrics
from fbprophet.diagnostics import cross_validation

from sklearn.neighbors import BallTree
from sklearn import metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

import calendar
import json
from math import sqrt
import pandas as pd
import types
import itertools
import numpy as np
from datetime import datetime, timedelta, timezone 
import time

import os
import folium
import folium.plugins

In [9]:
week = [0,1,2,3,4,5,6]
workweek = [0,1,2,3,4]
weekend = [5,6]
monday = [0]
tuesday = [1]
wednesday = [2]
thursday = [3]
friday = [4]
saturday = [5]
sunday = [6]

monday_color = 'red'
tuesday_color = 'yellow'
wednesday_color = 'orange'
thursday_color = 'blue'
friday_color = 'green'
saturday_color = 'brown'
sunday_color = 'purple'

In [2]:
def nearest_neighbors(df_source, nearest_in_miles = 0.01, quantile = .9, minimum_neighbors = 0):
    columns = list('YX')
    coordinates = df_source.loc[:,columns].values
    radius = nearest_in_miles / 3959.87433
    bt = BallTree(np.radians(coordinates), metric='haversine')
    neighbors = bt.query_radius(np.radians(coordinates), r=radius, count_only=True, return_distance=False)
    if np.quantile(neighbors, quantile, interpolation='nearest') > minimum_neighbors:
        return df_source.iloc[np.where(neighbors >= np.quantile(neighbors, quantile, interpolation='nearest'))[0]]
    else:
        if np.max(neighbors) > 10:
            return df_source.iloc[np.where(neighbors >= np.quantile(neighbors, quantile, interpolation='nearest'))[0]]

    return pd.DataFrame()

In [3]:
#nearest_neighbors_forecast(20, dft)
def nearest_neighbors_forecast(forecast_size, df_source, nearest_in_miles = 0.01, quantile = .9, minimum_neighbors = 0, verbose = False):
    try:
        columns = list('YX')
        coordinates = df_source.loc[:,columns].values
        bt = BallTree(np.radians(coordinates), metric='haversine')
        temp_df = pd.DataFrame()
        n_count = 0
        while n_count < forecast_size:
            max_index = nearest_neighbors_max_index(bt, df_source)
            temp_df = temp_df.append(df_source.iloc[max_index], ignore_index=True)
            geohash = df_source.iloc[max_index]['geohash_6']
            df_source = df_source[~(df_source.geohash_6 == geohash)]
            if verbose == True:
                print('Geohash -', geohash)
                print('Max Index -', max_index)
                print('Counter -', n_count)
                print('DF Len -', len(df_source))
            n_count += 1
        return temp_df
    except Exception as e: 
        print(e)
        return pdf.DataFrame()

In [4]:
def nearest_neighbors_max_index(bt, df_source, nearest_in_miles = 0.01, quantile = .9, minimum_neighbors = 0):
    columns = list('YX')
    coordinates = df_source.loc[:,columns].values
    radius = nearest_in_miles / 3959.87433
    neighbors = bt.query_radius(np.radians(coordinates), r=radius, count_only=True, return_distance=False)
    return np.argmax(neighbors)

In [5]:
# Helpers for Prophet
def get_day_max_temperature(dfw, target_date):
    return dfw[(dfw.DATE == target_date)]['TEMPERATURE'].max()

def get_day_min_temperature(dfw, target_date):
    return dfw[(dfw.DATE == target_date)]['TEMPERATURE'].min()

def get_day_total_precipitation(dfw, target_date):
    return dfw[(dfw.DATE == target_date)]['PRECIPITATION'].sum()

def get_day_solar_azimuth(dfw, target_date):
    return dfw[(dfw.DATE == target_date)]['SOLARAZIMUTH'].max()

def get_daily_forecast_summary_string(target_day=[0]):
    try:
        return calendar.day_name[target_day[0]] + ' (' + str(get_daily_forecast(target_day)) + ')'
    except:
        print('Forecast not available')
        return calendar.day_name[target_day[0]]

# Todo - just grabs the value based on ordinal, this doesn't line up to what's actually in the DF
def get_daily_forecast(target_day=[0]):
    try:
        return round(int(forecast['yhat'][target_day])) # prediction is a float
    except:
        print('Forecast not available')
        return np.nan

In [6]:
def plot_forecast_map(forecast_size, marker_cluster, days = [0,1,2,3,4,5,6], reduce = True, start_date = '2015-12-31', stop_date = '2020-01-01',\
             with_rain = False, cluster_color = 'red', regional=True, quantile=0.9):
    if with_rain == False:
        query_filter = "INCDTTM > @start_date & INCDTTM < @stop_date & HITPARKEDCAR == 0 & PRECIPITATION == 0 & WEEKDAY in @days"
    else:
        query_filter = "INCDTTM > @start_date & INCDTTM < @stop_date & HITPARKEDCAR == 0 & PRECIPITATION > 0 & WEEKDAY in @days"
    
    #df_filtered = df.query(query_filter)
    df_filtered = nearest_neighbors_forecast(forecast_size, df.query(query_filter))
    for index, row in df_filtered.iterrows():
        folium.CircleMarker(location=[row['Y'],row['X']],
                            radius= 10,
                            color=cluster_color,
                            fill_color='orange',
                            fill_opacity=0.5,
                            fill=True).add_to(marker_cluster)

In [10]:
def display_week_left_forecast_right_actual(m, forecast_day = tuesday, rain_flag=False):

    fg_1a = folium.FeatureGroup(name=get_daily_forecast_summary_string(monday), show=True).add_to(m.m1)
    fg_1b = folium.FeatureGroup(name=get_daily_forecast_summary_string(tuesday), show=False).add_to(m.m1)
    fg_1c = folium.FeatureGroup(name=get_daily_forecast_summary_string(wednesday), show=False).add_to(m.m1)
    fg_1d = folium.FeatureGroup(name=get_daily_forecast_summary_string(thursday), show=False).add_to(m.m1)
    fg_1e = folium.FeatureGroup(name=get_daily_forecast_summary_string(friday), show=False).add_to(m.m1)
    fg_1f = folium.FeatureGroup(name=get_daily_forecast_summary_string(saturday), show=False).add_to(m.m1)
    fg_1g = folium.FeatureGroup(name=get_daily_forecast_summary_string(sunday), show=False).add_to(m.m1)

    fg_2a = folium.FeatureGroup(name='Monday', show=True).add_to(m.m2)
    fg_2b = folium.FeatureGroup(name='Tuesday', show=False).add_to(m.m2)
    fg_2c = folium.FeatureGroup(name='Wednesday', show=False).add_to(m.m2)
    fg_2d = folium.FeatureGroup(name='Thursday', show=False).add_to(m.m2)
    fg_2e = folium.FeatureGroup(name='Friday', show=False).add_to(m.m2)
    fg_2f = folium.FeatureGroup(name='Saturday', show=False).add_to(m.m2)
    fg_2g = folium.FeatureGroup(name='Sunday', show=False).add_to(m.m2)

    mc_1a = folium.plugins.MarkerCluster(name="Monday")
    plot_forecast_map(get_daily_forecast(monday), mc_1a, monday, with_rain=rain_flag, cluster_color=monday_color)
    mc_1b = folium.plugins.MarkerCluster(name="Tuesday")
    plot_forecast_map(get_daily_forecast(tuesday), mc_1b, tuesday, with_rain=rain_flag, cluster_color=tuesday_color)
    mc_1c = folium.plugins.MarkerCluster(name="Wednesday")
    plot_forecast_map(get_daily_forecast(wednesday), mc_1c, wednesday, with_rain=rain_flag, cluster_color=wednesday_color)
    mc_1d = folium.plugins.MarkerCluster(name="Thursday")
    plot_forecast_map(get_daily_forecast(thursday), mc_1d, thursday, with_rain=rain_flag, cluster_color=thursday_color)
    mc_1e = folium.plugins.MarkerCluster(name="Friday")
    plot_forecast_map(get_daily_forecast(friday), mc_1e, friday, with_rain=rain_flag, cluster_color=friday_color)
    mc_1f = folium.plugins.MarkerCluster(name="Saturday")
    plot_forecast_map(get_daily_forecast(saturday), mc_1f, saturday, with_rain=rain_flag, cluster_color=saturday_color)
    mc_1g = folium.plugins.MarkerCluster(name="Sunday")
    plot_forecast_map(get_daily_forecast(sunday), mc_1g, sunday, with_rain=rain_flag, cluster_color=sunday_color)

    mc_1a.add_to(fg_1a)
    mc_1b.add_to(fg_1b)
    mc_1c.add_to(fg_1c)
    mc_1d.add_to(fg_1d)
    mc_1e.add_to(fg_1e)
    mc_1f.add_to(fg_1f)
    mc_1g.add_to(fg_1g)
    
    mc_2a = folium.plugins.MarkerCluster(name="Monday")
    plot_map(mc_2a, monday, with_rain=rain_flag, cluster_color=monday_color)
    mc_2b = folium.plugins.MarkerCluster(name="Tuesday")
    plot_map(mc_2b, tuesday, with_rain=rain_flag, cluster_color=tuesday_color)
    mc_2c = folium.plugins.MarkerCluster(name="Wednesday")
    plot_map(mc_2c, wednesday, with_rain=rain_flag, cluster_color=wednesday_color)
    mc_2d = folium.plugins.MarkerCluster(name="Thursday")
    plot_map(mc_2d, thursday, with_rain=rain_flag, cluster_color=thursday_color)
    mc_2e = folium.plugins.MarkerCluster(name="Friday")
    plot_map(mc_2e, friday, with_rain=rain_flag, cluster_color=friday_color)
    mc_2f = folium.plugins.MarkerCluster(name="Saturday")
    plot_map(mc_2f, saturday, with_rain=rain_flag, cluster_color=saturday_color)
    mc_2g = folium.plugins.MarkerCluster(name="Sunday")
    plot_map(mc_2g, sunday, with_rain=rain_flag, cluster_color=sunday_color)

    mc_2a.add_to(fg_2a)
    mc_2b.add_to(fg_2b)
    mc_2c.add_to(fg_2c)
    mc_2d.add_to(fg_2d)
    mc_2e.add_to(fg_2e)
    mc_2f.add_to(fg_2f)
    mc_2g.add_to(fg_2g)

    folium.LayerControl().add_to(m)
    return

In [11]:
def plot_map(marker_cluster, days = [0,1,2,3,4,5,6], reduce = True, start_date = '2015-12-31', stop_date = '2020-01-01',\
             with_rain = False, cluster_color = 'red', regional=True, quantile=0.9):
    if with_rain == False:
        query_filter = "INCDTTM > @start_date & INCDTTM < @stop_date & HITPARKEDCAR == 0 & PRECIPITATION == 0 & WEEKDAY in @days"
    else:
        query_filter = "INCDTTM > @start_date & INCDTTM < @stop_date & HITPARKEDCAR == 0 & PRECIPITATION > 0 & WEEKDAY in @days"

    if regional == True:
        df_filtered = df.query(query_filter).groupby('MCCP')
        for name, group in df_filtered:
            if reduce == True:
                group = nearest_neighbors(group, quantile=quantile)
            for index, row in group.iterrows():
                folium.CircleMarker(location=[row['Y'],row['X']],
                                    radius= 10,
                                    color=cluster_color,
                                    fill_color='orange',
                                    fill_opacity=0.5,
                                    fill=True).add_to(marker_cluster)
    else:
        df_filtered = df.query(query_filter)
        if reduce == True:
            df_filtered = nearest_neighbors(df_filtered, quantile=quantile)
        for index, row in df_filtered.iterrows():
            folium.CircleMarker(location=[row['Y'],row['X']],
                                radius= 10,
                                color=cluster_color,
                                fill_color='orange',
                                fill_opacity=0.5,
                                fill=True).add_to(marker_cluster)

In [12]:
def display_week_view(m, rain_flag=False):
    fg_1a = folium.FeatureGroup(name='Week 1', show=True).add_to(m.m1)
    fg_2a = folium.FeatureGroup(name='Week 2', show=True).add_to(m.m2)

    mc_1a = folium.plugins.MarkerCluster(name="Week 1")
    plot_map(mc_1a, week, with_rain=rain_flag, cluster_color=monday_color, regional=False, quantile=.99)
    mc_1a.add_to(fg_1a)

    mc_2a = folium.plugins.MarkerCluster(name="Week 2")
    plot_map(mc_2a, week, with_rain=rain_flag, cluster_color=tuesday_color, quantile=.99)
    mc_2a.add_to(fg_2a)
    folium.LayerControl().add_to(m)
    return

In [13]:
def display_workweek_weekend_view(m, rain_flag=False):
    fg_1a = folium.FeatureGroup(name='Workweek', show=True).add_to(m.m1)
    fg_2a = folium.FeatureGroup(name='Weekend', show=True).add_to(m.m2)

    mc_1a = folium.plugins.MarkerCluster(name="Workweek")
    plot_map(mc_1a, workweek, with_rain=rain_flag, cluster_color=monday_color)
    mc_1a.add_to(fg_1a)

    mc_2a = folium.plugins.MarkerCluster(name="Weekend")
    plot_map(mc_2a, weekend, with_rain=rain_flag, cluster_color=tuesday_color)
    mc_2a.add_to(fg_2a)
    folium.LayerControl().add_to(m)
    return

In [14]:
def display_workweek_view(m, rain_flag=False):
    fg_1a = folium.FeatureGroup(name='Workweek', show=True).add_to(m.m1)
    fg_2a = folium.FeatureGroup(name='Workweek', show=True).add_to(m.m2)

    mc_1a = folium.plugins.MarkerCluster(name="Workweek")
    plot_map(mc_1a, workweek, with_rain=rain_flag, cluster_color=monday_color)
    mc_1a.add_to(fg_1a)

    mc_2a = folium.plugins.MarkerCluster(name="Workweek")
    plot_map(mc_2a, workweek, with_rain=rain_flag, cluster_color=monday_color)
    mc_2a.add_to(fg_2a)
    folium.LayerControl().add_to(m)
    return

In [15]:
def display_weekend_view(m, rain_flag=False):
    fg_1a = folium.FeatureGroup(name='Weekend', show=True).add_to(m.m1)
    fg_2a = folium.FeatureGroup(name='Weekend', show=True).add_to(m.m2)

    mc_1a = folium.plugins.MarkerCluster(name="Weekend")
    plot_map(mc_1a, weekend, with_rain=rain_flag, cluster_color=monday_color)
    mc_1a.add_to(fg_1a)

    mc_2a = folium.plugins.MarkerCluster(name="Weekend")
    plot_map(mc_2a, weekend, with_rain=rain_flag, cluster_color=monday_color)
    mc_2a.add_to(fg_2a)
    folium.LayerControl().add_to(m)
    return

In [16]:
def display_daily_view(m, rain_flag=False):
    fg_1a = folium.FeatureGroup(name='Monday', show=True).add_to(m.m1)
    fg_1b = folium.FeatureGroup(name='Tuesday', show=False).add_to(m.m1)
    fg_1c = folium.FeatureGroup(name='Wednesday', show=False).add_to(m.m1)
    fg_1d = folium.FeatureGroup(name='Thursday', show=False).add_to(m.m1)
    fg_1e = folium.FeatureGroup(name='Friday', show=False).add_to(m.m1)
    fg_1f = folium.FeatureGroup(name='Saturday', show=False).add_to(m.m1)
    fg_1g = folium.FeatureGroup(name='Sunday', show=False).add_to(m.m1)

    fg_2a = folium.FeatureGroup(name='Monday', show=True).add_to(m.m2)
    fg_2b = folium.FeatureGroup(name='Tuesday', show=False).add_to(m.m2)
    fg_2c = folium.FeatureGroup(name='Wednesday', show=False).add_to(m.m2)
    fg_2d = folium.FeatureGroup(name='Thursday', show=False).add_to(m.m2)
    fg_2e = folium.FeatureGroup(name='Friday', show=False).add_to(m.m2)
    fg_2f = folium.FeatureGroup(name='Saturday', show=False).add_to(m.m2)
    fg_2g = folium.FeatureGroup(name='Sunday', show=False).add_to(m.m2)

    mc_1a = folium.plugins.MarkerCluster(name="Monday")
    plot_map(mc_1a, monday, with_rain=rain_flag, cluster_color=monday_color)
    mc_1b = folium.plugins.MarkerCluster(name="Tuesday")
    plot_map(mc_1b, tuesday, with_rain=rain_flag, cluster_color=tuesday_color)
    mc_1c = folium.plugins.MarkerCluster(name="Wednesday")
    plot_map(mc_1c, wednesday, with_rain=rain_flag, cluster_color=wednesday_color)
    mc_1d = folium.plugins.MarkerCluster(name="Thursday")
    plot_map(mc_1d, thursday, with_rain=rain_flag, cluster_color=thursday_color)
    mc_1e = folium.plugins.MarkerCluster(name="Friday")
    plot_map(mc_1e, friday, with_rain=rain_flag, cluster_color=friday_color)
    mc_1f = folium.plugins.MarkerCluster(name="Saturday")
    plot_map(mc_1f, saturday, with_rain=rain_flag, cluster_color=saturday_color)
    mc_1g = folium.plugins.MarkerCluster(name="Sunday")
    plot_map(mc_1g, sunday, with_rain=rain_flag, cluster_color=sunday_color)

    mc_1a.add_to(fg_1a)
    mc_1b.add_to(fg_1b)
    mc_1c.add_to(fg_1c)
    mc_1d.add_to(fg_1d)
    mc_1e.add_to(fg_1e)
    mc_1f.add_to(fg_1f)
    mc_1g.add_to(fg_1g)

    mc_2a = folium.plugins.MarkerCluster(name="Monday")
    plot_map(mc_2a, monday, with_rain=rain_flag, cluster_color=monday_color)
    mc_2b = folium.plugins.MarkerCluster(name="Tuesday")
    plot_map(mc_2b, tuesday, with_rain=rain_flag, cluster_color=tuesday_color)
    mc_2c = folium.plugins.MarkerCluster(name="Wednesday")
    plot_map(mc_2c, wednesday, with_rain=rain_flag, cluster_color=wednesday_color)
    mc_2d = folium.plugins.MarkerCluster(name="Thursday")
    plot_map(mc_2d, thursday, with_rain=rain_flag, cluster_color=thursday_color)
    mc_2e = folium.plugins.MarkerCluster(name="Friday")
    plot_map(mc_2e, friday, with_rain=rain_flag, cluster_color=friday_color)
    mc_2f = folium.plugins.MarkerCluster(name="Saturday")
    plot_map(mc_2f, saturday, with_rain=rain_flag, cluster_color=saturday_color)
    mc_2g = folium.plugins.MarkerCluster(name="Sunday")
    plot_map(mc_2g, sunday, with_rain=rain_flag, cluster_color=sunday_color)

    mc_2a.add_to(fg_2a)
    mc_2b.add_to(fg_2b)
    mc_2c.add_to(fg_2c)
    mc_2d.add_to(fg_2d)
    mc_2e.add_to(fg_2e)
    mc_2f.add_to(fg_2f)
    mc_2g.add_to(fg_2g)

    folium.LayerControl().add_to(m)

    return

# Main

In [17]:
df = pd.read_csv('Seattle_Collisions_Final.csv', low_memory = False, parse_dates=True, index_col=0)
#print('File downloaded')
df['INCDTTM'] = pd.to_datetime(df['INCDTTM'])
df_weather = pd.read_csv('Seattle_Weather_Daily.csv', low_memory = False, parse_dates=True, index_col=0)
df_weather.info()

<class 'pandas.core.frame.DataFrame'>
Index: 143567 entries, 2004-01-01-0 to 2020-06-01-0
Data columns (total 14 columns):
 #   Column             Non-Null Count   Dtype  
---  ------             --------------   -----  
 0   TEMPERATURE        143567 non-null  float64
 1   WINDCHILL          41012 non-null   float64
 2   PRECIPITATION      143567 non-null  float64
 3   SNOWDEPTH          2 non-null       float64
 4   VISIBILITY         143456 non-null  float64
 5   CLOUDCOVER         143567 non-null  float64
 6   WEATHERCONDITIONS  143567 non-null  object 
 7   SUNSET             143567 non-null  object 
 8   SUNRISE            143567 non-null  object 
 9   MOONPHASE          143567 non-null  float64
 10  TIMESTAMP          143567 non-null  object 
 11  HOUR               143567 non-null  int64  
 12  DATE               143567 non-null  object 
 13  SOLARAZIMUTH       143567 non-null  int64  
dtypes: float64(7), int64(2), object(5)
memory usage: 16.4+ MB


In [18]:
# Load model from JSON, this is wired up in the training notebook
with open('seattle_collision_model.json', 'r') as fin:
    m = model_from_json(json.load(fin))

# Toggle include history to get the dump what's in the model, model is hardwired for seven days now
future = m.make_future_dataframe(periods=7,freq='D',include_history=False)
# Goal would be to make this dynamic and pull weather from somewhere
future['rain'] = future.apply(lambda x: get_day_total_precipitation(df_weather, x.ds.strftime('%Y-%m-%d')), axis=1)
future['temp'] = future.apply(lambda x: get_day_min_temperature(df_weather, x.ds.strftime('%Y-%m-%d')), axis=1)
future['solar_azimuth'] = future.apply(lambda x: get_day_solar_azimuth(df_weather, x.ds.strftime('%Y-%m-%d')), axis=1)
#future.head(), future.tail()
forecast = m.predict(future)
forecast

INFO:numexpr.utils:NumExpr defaulting to 1 threads.


Unnamed: 0,ds,trend,yhat_lower,yhat_upper,trend_lower,trend_upper,additive_terms,additive_terms_lower,additive_terms_upper,daily,...,temp,temp_lower,temp_upper,weekly,weekly_lower,weekly_upper,yearly,yearly_lower,yearly_upper,yhat
0,2020-01-01,3.827799,11.143914,27.107222,3.827799,3.827799,-0.16911,-0.16911,-0.16911,4.828173,...,-0.16911,-0.16911,-0.16911,0.30171,0.30171,0.30171,-0.848952,-0.848952,-0.848952,19.633146
1,2020-01-02,3.827595,12.890236,28.702004,3.827595,3.827595,-0.318158,-0.318158,-0.318158,4.828173,...,-0.318158,-0.318158,-0.318158,0.624014,0.624014,0.624014,-0.824234,-0.824234,-0.824234,20.811308
2,2020-01-03,3.827392,13.944902,30.227028,3.827392,3.827392,-0.034968,-0.034968,-0.034968,4.828173,...,-0.034968,-0.034968,-0.034968,0.797556,0.797556,0.797556,-0.789467,-0.789467,-0.789467,22.138069
3,2020-01-04,3.827188,9.532111,25.067961,3.827188,3.827188,-0.303253,-0.303253,-0.303253,4.828173,...,-0.303253,-0.303253,-0.303253,-0.516659,-0.516659,-0.516659,-0.74585,-0.74585,-0.74585,17.326394
4,2020-01-05,3.826985,6.409495,22.570039,3.826985,3.826985,-0.357903,-0.357903,-0.357903,4.828173,...,-0.357903,-0.357903,-0.357903,-1.318223,-1.318223,-1.318223,-0.694767,-0.694767,-0.694767,14.646118
5,2020-01-06,3.826781,10.150479,26.347497,3.826781,3.826781,-0.318158,-0.318158,-0.318158,4.828173,...,-0.318158,-0.318158,-0.318158,-0.272679,-0.272679,-0.272679,-0.637748,-0.637748,-0.637748,18.32712
6,2020-01-07,3.826578,14.227622,29.774099,3.826578,3.826578,0.014714,0.014714,0.014714,4.828173,...,0.014714,0.014714,0.014714,0.384282,0.384282,0.384282,-0.576434,-0.576434,-0.576434,21.984719


In [20]:
m = folium.plugins.DualMap(location=(47.6062, -122.3321), zoom_start=12, control_scale=True)
#display_daily_view(m, rain_flag=False)
#display_workweek_view(m, rain_flag=True)
#display_workweek_weekend_view(m, rain_flag=False)
#display_week_view(m, rain_flag=False)
display_week_left_forecast_right_actual(m, rain_flag=True)
m