<center> 
<h1> Safe House </h1>
<h3> An Interative Dashboard of the Crime Around Your Home in San Francisco </h3>
</center>

<center> <i>Data</i>$^x$ Project by Anya Stepnova, Nhut Nuygen, and Chad Wakamiya </center>



#### Instructions
1. Compare crime trends between two San Francisco addresses or police districts.
2. Select at least one crime type to forecast. 
3. Click <i> Show Forecast</i>. Our machine learning model leverages advanced algorithms to forecast the number of reported crimes per month into the future. The forecasts and historical data will be plotted in an interactive graph.

In [1]:
# Load required modules
import plotly.express as px
from ipywidgets import interactive, interact, interact_manual, HBox,VBox, Layout,ButtonStyle
import ipywidgets as widgets
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from geopy.geocoders import Nominatim
%matplotlib inline

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [3]:
%%capture
import nbimporter
from Forecasting_Models import ensemble_predict

In [4]:
# Read in data
df1 = pd.read_csv('Police_Department_Incident_Reports__Historical_2003_to_May_2018.csv')

In [5]:
# Reformat date columns
df1['Date'] = pd.to_datetime(df1['Date'])
df1['Year-Month'] = df1['Date'].map(lambda x: '{year}-{month}'.format(year=x.year, month=x.month, day=x.day))
df1['Year'] = df1['Date'].dt.year.astype(int)
df1['Week_Number'] = df1['Date'].dt.week
df1['Month_Number'] = df1['Date'].dt.month

In [6]:
# Define functions to help search by location
def address_to_coordinates(search_address):
    geolocator = Nominatim(user_agent="sf-crime-dash")
    location = geolocator.geocode(search_address)
    return {'latitude':location.latitude, 'longitude': location.longitude, 'address': location.address}

def get_num_miles_equal_to_1_degree_long(lat):
    return np.cos(lat) * 69.172

def filter_around_coordinates(df, lat, long, radius):
    lat_per_mile = 1/69 * radius
    long_per_mile = 1/get_num_miles_equal_to_1_degree_long(lat) * radius

    y_upper = lat + lat_per_mile
    y_lower = lat - lat_per_mile
    x_upper = long + long_per_mile
    x_lower = long - long_per_mile
    return df[(df['X'] < x_upper) & (df['X'] > x_lower) & (df['Y'] < y_upper) & (df['Y'] > y_lower)]

def clean_address(address_raw):
    return address_raw.split(',')[0] + ","+ address_raw.split(',')[1]

In [7]:
# Filter by type of crime and year
def filter_by_crimes_year(crime):
    # Splice relevant columns
    dfDecompose = df1[['IncidntNum','Category','Year','Week_Number', "Month_Number", 'Date', 'X', 'Y','PdDistrict']]

    # Select ASSAULT crimes only
    crimes = [c.upper() for c in crime]
    dfDecompose = dfDecompose[dfDecompose['Category'].isin(crimes)]

    # Drop records from 2018 since data from only part of the year is available
    dfDecompose = dfDecompose[(dfDecompose['Year'] < 2018)]
    
    return  dfDecompose

In [8]:
def filter_and_group_crimes_address(crime, location, radius):
    
    dfDecompose = filter_by_crimes_year(crime)
    
    # Filter around coordinates
    dfDecompose = filter_around_coordinates(dfDecompose, location['latitude'], location['longitude'], radius)
    dfDecompose.drop(columns = ['X','Y'], inplace = True)    
    
    # Group by month
    monthGrouped = dfDecompose.resample('M', on='Date')[['IncidntNum']].count() #.reset_index().sort_values(by='Date')
    monthGrouped.rename(columns = {'IncidntNum': "Incidents/Month"}, inplace = True)
    return monthGrouped

def filter_and_group_crimes_pd(crime, police_district, radius):
    
    dfDecompose = filter_by_crimes_year(crime)
        
    # Filter around police districts    
    dfDecompose = dfDecompose[dfDecompose['PdDistrict'] == police_district.upper()]
    
    # Group by month
    monthGrouped = dfDecompose.resample('M', on='Date')[['IncidntNum']].count() #.reset_index().sort_values(by='Date')
    monthGrouped.rename(columns = {'IncidntNum': "Incidents/Month"}, inplace = True)
    return monthGrouped

In [9]:
def create_plot(train, forecast, fig, color, address):
    train = train.reset_index()
    train = train[['Date','Incidents/Month']]

    predictions = pd.DataFrame(forecast).reset_index()
    predictions.rename(columns ={'index':'Date', 'avg_fcst':'Incidents/Month'}, inplace = True)
    
    last_row_train =  train.iloc[-1:]
    predictions = pd.concat([last_row_train,predictions])

    # Create traces
    fig.add_trace(go.Scatter(x=train['Date'], y=train['Incidents/Month'],
                        mode='lines',
                        name='Observed Data (' + address + ')',
                        line=dict(color=color)))
    fig.add_trace(go.Scatter(x=predictions['Date'], y = predictions['Incidents/Month'],
                        mode='lines',
                        name='Forecast (' + address + ')',
                        line=dict(color=color, dash='dot')))
                  
    fig.update_xaxes(rangeslider_visible=True)
    

In [10]:
def forecast_crime_address(search_address, search_address_2, months_to_forecast, radius, crime):    
    
    # GRAPH FIRST ADDRESS
    # Search Address
    location = address_to_coordinates(search_address)
    address1 = clean_address(location['address'])
    
    # Create time series and forecast
    time_series = filter_and_group_crimes_address(crime, location, radius)
    train = time_series[time_series.index.year < 2017]
    forecast = ensemble_predict(train, months_to_forecast)

    # Create Plotly figure
    fig = go.Figure()
    
    # Plot forecast on Plotly figure
    create_plot(train, forecast, fig, 'royalblue', address1)    
    
    # Save cleaned address name for graph subtitle
    address_names = address1

    # GRAPH SECOND ADRESS
    if search_address_2:
        # Search Address
        location = address_to_coordinates(search_address_2)
        address2 = clean_address(location['address'])
             
        # Create time series and forecast
        time_series = filter_and_group_crimes_address(crime, location, radius)
        train = time_series[time_series.index.year < 2017]
        forecast = ensemble_predict(train, months_to_forecast)

        # Plot forecast on Plotly figure
        create_plot(train, forecast, fig, 'indianred',address2)
        
        # Append cleaned address name for graph subtitle
        address_names += " and " + address2

    print("| Model Complete!")

    # Format Plots
    main_title = "<b> Reported Crimes Forecast " + "</b>"
    sub_title = "Within <i>" + str(radius) +"</i> Mile of " + address_names

    fig.update_layout(
        title={
        'text': main_title + " <br><sub>" + sub_title+ "</sub>",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        font=dict(size=14),
        yaxis_title="Reported Crimes Per Month",
        legend_orientation="h",
        legend=dict(x= 0, y=-0.5)
    )

    fig.show()


In [11]:
def forecast_crime_pd(pd1, pd2, months_to_forecast, crime):    
    
    # GRAPH FIRST ADDRESS
    # Filter by PD and forecast
    time_series = filter_and_group_crimes_pd(crime, pd1, radius)
    train = time_series[time_series.index.year < 2017]    
    forecast = ensemble_predict(train, months_to_forecast)

    # Create Plotly figure
    fig = go.Figure()
    
    # Plot on Plotly figure
    create_plot(train, forecast, fig, 'royalblue', pd1)
    
    # Save cleaned address name for graph subtitle
    pd_names = pd1

    # GRAPH SECOND ADRESS
    if pd2:
        # Filter by PD and forecast
        time_series = filter_and_group_crimes_pd(crime, pd2, radius)
        train = time_series[time_series.index.year < 2017]
        forecast = ensemble_predict(train, months_to_forecast)

        # Plot on Plotly figure
        create_plot(train, forecast, fig, 'indianred',pd2)
        
        # Append cleaned address name for graph subtitle
        pd_names += " and " + pd2

    print("| Model Complete!")

    # Format Plots
    main_title = "<b> Reported Crimes Forecast " + "</b>"
    sub_title = pd_names + " Police Districts"

    fig.update_layout(
        title={
        'text': main_title + " <br><sub>" + sub_title+ "</sub>",
        'y':0.9,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top'},
        font=dict(size=14),
        yaxis_title="Reported Crimes Per Month",
        legend_orientation="h",
        legend=dict(x= 0, y=-0.5)
    )

    fig.show()


In [12]:
# Create list of options for widgets
crimes_list = list(set(df1.Category.str.title()))
pd_list = list(set(df1.PdDistrict.dropna().str.title()))
crimes_list.sort()
pd_list.sort()

# Define CSS style
style = {'description_width': 'initial'}

# Create widgets
address = widgets.Text(
    value='415 Mission St, San Francisco, CA',
    placeholder='415 Mission St, San Francisco, CA',
    description='Search an Address',
    style = style,
    disabled=False,
    layout= widgets.Layout(width='45%')
)

address2 = widgets.Text(
    value='123 Market St, San Francisco, CA',
    description='Compare with Another Address',
    style = style,
    disabled=False,
    layout= widgets.Layout(width='45%')
)


crimes = widgets.SelectMultiple(
    options=crimes_list,
    value= ['Assault'],
    description='Type of Crime',
    disabled=False,
    style = style,
    layout= widgets.Layout(width='45%')
)

police_district1 = widgets.Dropdown(
    options=pd_list,
    value= 'Tenderloin',
    description='Police District',
    disabled=False,
    style = style,
    layout= widgets.Layout(width='45%')
)

police_district2 = widgets.Dropdown(
    options=pd_list,
    value= 'Northern',
    description='Compare With Another Police District',
    disabled=False,
    style = style,
    layout= widgets.Layout(width='45%')
)

months_to_forecast = widgets.IntSlider(
    description='Number of Forecast Months', 
    style=style, 
    min=0, 
    max=24, 
    step=1, 
    value=12,
    layout= widgets.Layout(width='45%')
)

radius = widgets.FloatSlider(
    description='Crime Radius (Miles)', 
    style=style, 
    min=0.25, 
    max=5, 
    step=0.25, 
    value= 1,
    orientation='horizontal',
    readout=True,
    readout_format='.1f',
    layout= widgets.Layout(width='45%')
)


In [13]:
# Pass widgets into graphing function
widget = interactive(forecast_crime_address, {'manual': True}, search_address  = address,search_address_2  = address2, months_to_forecast = months_to_forecast, radius = radius, crime = crimes) ;
input_fields = VBox(widget.children[:-2], layout = Layout(flex_flow='row wrap', justify_content = 'space-between'))
type_crime = widget.children[-3]
output = widget.children[-1]
button = widget.children[-2]
button.description = 'Show Forecast'
button.style = ButtonStyle(button_color='moccasin')

widget2 = interactive(forecast_crime_pd, {'manual': True}, pd1  = police_district1, pd2  = police_district2, months_to_forecast = months_to_forecast, crime = crimes) ;
input_fields2 = VBox(widget2.children[:-2], layout = Layout(flex_flow='row wrap', justify_content = 'space-between',))
type_crime2 = widget2.children[-3]
output2 = widget2.children[-1]
button2 = widget2.children[-2]
button2.description = 'Show Forecast'
button2.style = ButtonStyle(button_color='moccasin')

In [14]:
# Create tabs for UI
tab1 = VBox(children=[input_fields, button, output])
tab2 = VBox(children=[input_fields2, button2, output2])
tab = widgets.Tab(children=[tab1, tab2])
tab.set_title(0, 'Address')
tab.set_title(1, 'Police District')
VBox(children=[tab])

VBox(children=(Tab(children=(VBox(children=(VBox(children=(Text(value='415 Mission St, San Francisco, CA', des…

References
 - Interactive plots created with Voila: https://github.com/mkcor/covid-plots/blob/master/covid_it_fr_lag.ipynb
 - Data graphed with ploty: https://medium.com/plotly/introducing-plotly-express-808df010143d