# Set-up
## Import all packages

In [1]:
import pandas as pd
import numpy as np
import glob
import os
from flask import Flask
import sys

import geopandas
import geopy
from geopy.geocoders import Nominatim
import folium
from geopy.extra.rate_limiter import RateLimiter
from folium import plugins
from folium.plugins import MarkerCluster
from folium.plugins import HeatMap

import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

from dash import Dash
import dash
from dash import html
from dash import dcc
from dash.dependencies import Input, Output
import dash_bootstrap_components as dbc
import flask

import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
from datetime import timedelta
import glob
import os
from tqdm import tqdm as tqdm
import time
import seaborn as sns

%matplotlib inline
import statsmodels.api as sm
from statsmodels.compat import lzip
from statsmodels.formula.api import ols
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller

import warnings
warnings.filterwarnings('ignore')

from numpy import cumsum
from datetime import datetime as dt

## Create functions

In [2]:
def getAllFiles(path, file_name_type, start_year, start_month, end_year, end_month, included_columns):
    li = []

    for x in tqdm(list(os.walk(path))):

        all_files = glob.glob(x[0] + "/*" + file_name_type + ".csv")

        for file_path in all_files:
            if (inTimePeriod(file_path, start_year, start_month, end_year, end_month)):
                df = pd.read_csv(file_path, index_col=None, header=0, usecols=included_columns)
                df['Year'] = df['Month'].apply(monthYearToYear)
                df['Month'] = df['Month'].apply(monthYearToMonth)
                li.append(df)
                
    complete_df = pd.concat(li, axis=0, ignore_index=True)
    complete_df.rename_axis('index')
    
    complete_df['Crime type'] = complete_df['Crime type'].replace('Violent crime', 'Violence and sexual offences')
    complete_df['Crime type'] = complete_df['Crime type'].replace('Public disorder and weapons','Other crime')

    return complete_df

def inTimePeriod(file_path, start_year, start_month, end_year, end_month):
    file_path_parts = file_path.split("\\")
    file_name = file_path_parts[len(file_path_parts) - 1]
    year = int(file_name[:4])
    month = int(file_name[5:7])
    start_bool = (year > start_year) or (year == start_year and month >= start_month)
    end_bool = (year < end_year) or (year == end_year and month <= end_month)
    if (start_bool and end_bool):
        return True
    return False

def LSOAtoMSOA(LSOA):
    return LSOA[:-1]

def monthYearToYear(monthYear):
    return int(monthYear[:4])

def monthYearToMonth(monthYear):
    return int(monthYear[5:7])

def MonthYearRegionDF(dataframe, month, year, region):
    return dataframe[(dataframe['Month'] == month) & (dataframe['Year'] == year) & 
                           (dataframe['Falls within'] == region)]

def ARIMA_DATA(df, msoa, category):
    ###Gets the data of a specific MSOA and category to use in the ARIMA_OPTIMAL function
    ###

    df = df[(df['MSOA'] == msoa) & (df['Crime type'] == category)]
    df = df[['Date', 'count']]
    df = df.set_index('Date')
    
    return df

def ARIMA_STATIONARY(df):
    ###Returns a stationary dataframe, created by ARIMA_DATA
    ###
    if adfuller(df['count'])[1] > 0.05:
        df = df.diff().dropna()
    
    return df

def ARIMA_PREDICT(df, df_parameters, MSOA, category):
    ###Final forecasting
    ###
    
    arima_data = ARIMA_DATA(df, MSOA, category)
    stationary_data = ARIMA_STATIONARY(arima_data)
    
    best_model = df_parameters[(df_parameters['MSOA'] == MSOA) & (df_parameters['Crime type'] == category)]
    p = best_model.iloc[0]['p']
    q = best_model.iloc[0]['q']
    
    model = SARIMAX(stationary_data, order=(p,0,q), trend='c')
    results = model.fit(disp=0)
    forecast = results.get_forecast(steps=10)
    mean_forecast = forecast.predicted_mean
    confidence_intervals = forecast.conf_int(alpha=0.10)
    return arima_data, mean_forecast.to_frame(), confidence_intervals

def ARIMA_OPTIMAL(stationary_data, stationary_test):
    ### Looks for the best ARMA(p,q) + constant model according to MSOA and crime type
    ###
    
    order_aic_bic = list()

    # Loop over AR order
    for p in range(1, 5):
        # Loop over MA order
        for q in range(1, 5):
            # Fit model
            model = SARIMAX(stationary_data, order=(p,0,q), trend='c')
            try:
                results = model.fit(disp=0)
                # Add order and scores to list
                order_aic_bic.append((p, q, results.aic, results))
            except:
                order_aic_bic.append((p, q, np.inf, None))
            
    order_df = pd.DataFrame(order_aic_bic, columns=['p', 'q', 'aic', 'results'])
    optimum = order_df[order_df['aic'] == order_df['aic'].min()]
    optimum.reset_index(inplace=True)

    # MASE
    mase = 0
    if optimum['results'][0] is not None:
        forecast = results.get_forecast(steps=len(stationary_test.index) + 1)
        mean_forecast = forecast.predicted_mean.to_frame()['predicted_mean']
        mean_forecast.index = pd.to_datetime(mean_forecast.index, format = '%Y-%m-%d').strftime('%Y-%m')
        mase = mase_loss(y_train=stationary_data['count'], y_pred=mean_forecast, y_test=stationary_test['count'])

    return optimum['p'][0], optimum['q'][0], optimum['aic'][0], mase

# From sktime, as package could not be imported
def mase_loss(y_test, y_pred, y_train, sp=1):
    #  naive seasonal prediction
    y_train = np.asarray(y_train)
    y_pred_naive = y_train[:-sp]
    
    # mean absolute error of naive seasonal prediction
    mae_naive = np.mean(np.abs(y_train[sp:] - y_pred_naive))
    
    # if training data is flat, mae may be zero,
    # return np.nan to avoid divide by zero error
    # and np.inf values
    if mae_naive == 0:
        return np.nan
    else:
        return np.mean(np.abs(y_test - y_pred)) / mae_naive


def get_Best_Models(df, df_test):
    param = list()
    
    for msoa in tqdm(df['MSOA'].unique()):
        for category in df['Crime type'].unique():
            arima_data = ARIMA_DATA(df, msoa, category)
            stationary_data = ARIMA_STATIONARY(arima_data)
            arima_test = ARIMA_DATA(df_test, msoa, category)
            stationary_test = ARIMA_STATIONARY(arima_test)
            p, q, aic, mase = ARIMA_OPTIMAL(stationary_data, stationary_test)
            param.append((msoa, category, p, q, aic, mase))
            
    return pd.DataFrame(param, columns=['MSOA', 'Crime type', 'p', 'q', 'aic', 'MASE'])

## Import data

In [None]:
dataset_path_1 = input("Enter the path to the dataset folder containing street data (provided by the course): ")

In [None]:
dataset_path_2 = input("Enter the path to the dataset folder containing complete.csv and best_models.csv: ")

In [4]:
df_streets = getAllFiles(dataset_path_1, "street", 2012, 1, 2021, 10,
                         included_columns=['Crime ID', 'Month', 'Falls within', 'Longitude', 'Latitude', 'LSOA name', 'Crime type'])
df_streets

100%|██████████| 132/132 [03:56<00:00,  1.79s/it]


Unnamed: 0,Crime ID,Month,Falls within,Longitude,Latitude,LSOA name,Crime type,Year
0,,1,Avon and Somerset Constabulary,-2.516919,51.423683,Bath and North East Somerset 001A,Anti-social behaviour,2012
1,,1,Avon and Somerset Constabulary,-2.510162,51.410998,Bath and North East Somerset 001A,Anti-social behaviour,2012
2,,1,Avon and Somerset Constabulary,-2.511927,51.409435,Bath and North East Somerset 001A,Anti-social behaviour,2012
3,b1a34824199f9d587ef05668511759f3cb9e69a35e9842...,1,Avon and Somerset Constabulary,-2.494870,51.422276,Bath and North East Somerset 001A,Other theft,2012
4,67ea8d3cff9fcf5bbea27e1bdab08608ea141883c23ad6...,1,Avon and Somerset Constabulary,-2.510162,51.410998,Bath and North East Somerset 001A,Other theft,2012
...,...,...,...,...,...,...,...,...
60418833,49092dec9f064d1f55e4c1274f8cdc1d6050766a0e95a4...,10,Wiltshire Police,-1.758262,50.993385,Wiltshire 062E,Criminal damage and arson,2021
60418834,baef9c23158d3a83a8c34fb25d27cfb78df23ab526c474...,10,Wiltshire Police,-1.761165,50.991215,Wiltshire 062E,Public order,2021
60418835,735b07a0bb5229f360aa31bb0e6ce458635bc550c3b1e5...,10,Wiltshire Police,-1.737950,50.999681,Wiltshire 062E,Vehicle crime,2021
60418836,368a849dc629c19ba90a46068503a6e4a922ae1e8bfc16...,10,Wiltshire Police,-1.757983,50.992242,Wiltshire 062E,Violence and sexual offences,2021


In [6]:
Complete = pd.read_csv(str(dataset_path_2) + r"\\complete.csv")
Complete.drop("Unnamed: 0", axis=1, inplace=True)

Complete

Unnamed: 0,Date,MSOA,Crime type,count
0,2012-01,"Abberley, Holt Heath & Hallow",Anti-social behaviour,6
1,2012-01,"Abberley, Holt Heath & Hallow",Bicycle theft,0
2,2012-01,"Abberley, Holt Heath & Hallow",Burglary,1
3,2012-01,"Abberley, Holt Heath & Hallow",Criminal damage and arson,0
4,2012-01,"Abberley, Holt Heath & Hallow",Drugs,0
...,...,...,...,...
11645083,2021-10,Ystradgynlais & Tawe Uchaf,Robbery,0
11645084,2021-10,Ystradgynlais & Tawe Uchaf,Shoplifting,2
11645085,2021-10,Ystradgynlais & Tawe Uchaf,Theft from the person,0
11645086,2021-10,Ystradgynlais & Tawe Uchaf,Vehicle crime,1


In [7]:
Best_Models = pd.read_csv(str(dataset_path_2) + r"\\best_models.csv") 
Best_Models.drop("Unnamed: 0", axis=1, inplace=True)
Best_Models.drop("Unnamed: 0.1", axis=1, inplace=True)

Best_Models

Unnamed: 0,MSOA,Crime type,p,q,aic,MASE
0,"Abberley, Holt Heath & Hallow",Anti-social behaviour,2,2,489.384443,2.331706
1,"Abberley, Holt Heath & Hallow",Bicycle theft,1,1,31.219505,0.589988
2,"Abberley, Holt Heath & Hallow",Burglary,3,3,426.688176,0.826900
3,"Abberley, Holt Heath & Hallow",Criminal damage and arson,2,2,341.449159,0.847437
4,"Abberley, Holt Heath & Hallow",Drugs,3,4,121.634511,0.828079
...,...,...,...,...,...,...
99857,Ystradgynlais & Tawe Uchaf,Robbery,1,1,127.626177,0.651113
99858,Ystradgynlais & Tawe Uchaf,Shoplifting,1,1,269.478958,1.588960
99859,Ystradgynlais & Tawe Uchaf,Theft from the person,2,2,-28.011987,1.756758
99860,Ystradgynlais & Tawe Uchaf,Vehicle crime,2,2,349.912842,1.322706


# Dashboard
## Elements

In [8]:
# Getting lists
police_list = df_streets['Falls within'].sort_values(ascending=True).unique().tolist() #police force list
crime_list = df_streets['Crime type'].sort_values(ascending=True).unique().tolist() #crime type list
msoa_list = Best_Models['MSOA'].sort_values(ascending=True).unique().tolist() #MSOA list
msoa_list = list(filter(("Abbey Hey").__ne__, msoa_list))
year_list = df_streets['Year'].sort_values(ascending=True).unique().tolist() #year list
month_list = df_streets['Month'].sort_values(ascending=True).unique().tolist() #month list

In [9]:
#Dataframe 
##Bar
df_selected = df_streets.loc[df_streets['Falls within'] == 'Avon and Somerset Constabulary']
df_grouped_bar = df_selected.groupby(['Falls within', 'Crime type']).agg({'Month' : 'count'})
df_grouped_bar_new = df_grouped_bar.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
df_grouped_bar_new['Number of crimes'] = df_grouped_bar['Month']
df_grouped_bar_new = df_grouped_bar_new.squeeze()
df_grouped_bar_perc = df_grouped_bar_new.rename(columns = {'Month' : 'Percentage crimes per police force'}).reset_index()
df_grouped_bar_perc = df_grouped_bar_perc.round(2)

#Line chart
data, mean_forecasts, confidence_intervals = ARIMA_PREDICT(Complete, Best_Models, 'Abberley, Holt Heath & Hallow', 'Burglary')
lower_limits = confidence_intervals[['lower count']]
num = lower_limits._get_numeric_data()
num[num < 0] = 0
upper_limits = confidence_intervals[['upper count']]
num = upper_limits._get_numeric_data()
num[num < 0] = 0


lower_limits.reset_index(inplace=True)
lower_limits['Date'] = lower_limits['index'].dt.strftime('%Y-%m')
lower_limits.drop(columns=['index'], inplace=True, axis=1)
lower_limits = lower_limits.rename(columns={"lower count": "lower_count"})
lower_limits['lower_count'] = lower_limits['lower_count'].abs()
lower_limits.set_index('Date', inplace=True) 

upper_limits.reset_index(inplace=True)
upper_limits['Date'] = upper_limits['index'].dt.strftime('%Y-%m')
upper_limits.drop(columns=['index'], inplace=True, axis=1)
upper_limits = upper_limits.rename(columns={"upper count": "upper_count"})
upper_limits['upper_count'] = upper_limits['upper_count'].abs()
upper_limits.set_index('Date', inplace=True) 

df_plotting = mean_forecasts.reset_index()
df_plotting['Date'] = df_plotting['index'].dt.strftime('%Y-%m')
df_plotting.drop(columns=['index'], inplace=True, axis=1)
df_plotting.set_index('Date', inplace=True) 
df_plotting = pd.concat([df_plotting, lower_limits, upper_limits], axis=1, join="inner")
df_plotting = pd.concat([data, df_plotting], axis=1, join="outer")
num = df_plotting._get_numeric_data()
num[num < 0] = 0

dates = df_plotting.index.tolist()
df_plotting = df_plotting.reset_index()

Abberley, Holt Heath & Hallow Burglary
         count
Date          
2012-01      1
2012-02      4
2012-03      3
2012-04      4
2012-05      4
...        ...
2021-06      3
2021-07      2
2021-08      8
2021-09      5
2021-10      5

[118 rows x 1 columns]
data          count
Date          
2012-02    3.0
2012-03   -1.0
2012-04    1.0
2012-05    0.0
2012-06    1.0
...        ...
2021-06   -1.0
2021-07   -1.0
2021-08    6.0
2021-09   -3.0
2021-10    0.0

[117 rows x 1 columns]
PREDICT             lower count  upper count
2021-11-01    -3.504120     5.836918
2021-12-01    -6.920696     4.766772
2022-01-01    -7.603777     4.083771
2022-02-01    -4.918199     6.801596
2022-03-01    -4.857162     7.033039
2022-04-01    -7.029451     4.891205
2022-05-01    -6.641018     5.389616
2022-06-01    -4.973331     7.059235
2022-07-01    -5.783676     6.331389
2022-08-01    -6.959602     5.156516


In [10]:
lower_limits.describe()

Unnamed: 0,lower_count
count,10.0
mean,0.0
std,0.0
min,0.0
25%,0.0
50%,0.0
75%,0.0
max,0.0


In [11]:
#Bar chart
bar_chart = px.bar(data_frame=df_grouped_bar_perc,
                    x = 'Falls within',
                    y = 'Percentage crimes per police force',
                    color = 'Crime type',
                    barmode='group',
                    orientation = 'v',
                    text_auto=True,
                    hover_name="Falls within", 
                    hover_data={
                        'Falls within': False,
                        'Number of crimes' : True
                    },
                   color_discrete_sequence=px.colors.qualitative.Safe,
                   title = f'Comparison of {", ".join(crime_list)} in {police_list[0]}',
                   )

#Heatmap
hm = folium.Map([53.5500,-2.4333], zoom_start=6) 
hm.save('Heatmap.html')

#Line chart
line_chart = go.Figure()
line_chart.add_trace(go.Scatter(x=df_plotting['Date'], y=df_plotting['lower_count'],
                    mode='lines',
                    line={'color': 'rgb(221, 204, 119)'},
                    name = 'Confidence interval',
                    showlegend=False
                    ))
line_chart.add_trace(go.Scatter(x=df_plotting['Date'], y=df_plotting['upper_count'],
                    mode='lines', fill = 'tonexty', #fill_color = 'rgb(221, 204, 119)',
                    line={'color': 'rgb(221, 204, 119)'},
                    name = 'Confidence interval'
                    ))
line_chart.add_trace(go.Scatter(x=df_plotting['Date'], y=df_plotting['predicted_mean'],
                    mode='lines',
                    name='Predicted mean of crimes',
                    line={'color': 'rgb(102, 17, 0)'},
                    ))
line_chart.add_trace(go.Scatter(x=df_plotting['Date'], y=df_plotting['count'],
                    mode='lines',
                    name='Number of crimes',
                    line={'color': 'rgb(51, 34, 136)'},
                    ))
line_chart.update_layout(xaxis_title="Time", yaxis_title="Number of crimes",)


#Graph
bar_graph = dcc.Graph(
        id='bar_graph',
        figure=bar_chart,
    )

heat_graph = html.Iframe(id = 'heat_graph',
                        srcDoc = open('Heatmap.html', 'r',).read(),
                        width = '1100',
                        height = '600')

line_graph = dcc.Graph(
        id='line_graph',
        figure=line_chart,
    )

In [12]:
#Widgets
##Tab 1
police_dropdown = dcc.Dropdown(
    id = 'police_dropdown',
    options = police_list, 
    multi = True,
)
crime_checklist = dcc.Checklist(
    id = 'crime_checklist',
    options = crime_list, 
    value = crime_list,
    labelStyle = dict(display='block'))
value_type_radio = dcc.RadioItems(
    id  = 'value_type_radio',
    options = ['Percentages', 'Absolute values'], 
    value = 'Percentages')


##Tab 2
police_dropdown2 = dcc.Dropdown(
    id = 'police_dropdown2',
    options = police_list, 
)
deselect_button = html.Button("Deselect all", id="deselect_button")
crime_radio = dcc.RadioItems(
    id = 'crime_radio',
    options = crime_list, 
    value = crime_list[0],
    labelStyle = dict(display='block'))
submit_button = html.Button("Submit changes to map", id="submit_button")


##Tab 3
msoa_dropdown = dcc.Dropdown(
    id = 'msoa_dropdown',
    options = msoa_list, 
    value = msoa_list[0],)
crime_radio2 = dcc.RadioItems(
    id = 'crime_radio2',
    options = crime_list, 
    value = crime_list[0],
    labelStyle = dict(display='block'))
month_dropdown = dcc.Dropdown(
    id = 'month_dropdown',
    options = month_list, 
    value = month_list[0],)
year_dropdown = dcc.Dropdown(
    id = 'year_dropdown',
    options = year_list, 
    value = year_list[0],)

p_tool = html.Div(
    [html.Div(id='p_value', 
              children=['P: {}'.format(None)]),
    dbc.Tooltip("The number of lag observations in the model; also known as the lag orde.",
                target="p_value"),
    ])
q_tool = html.Div(
    [html.Div(id='q_value', 
              children=['Q: {}'.format(None)]),
    dbc.Tooltip("The size of the moving average window; also known as the order of the moving average.",
                target="q_value"),
    ])
aic_tool = html.Div(
    [html.Div(id='aic_value', 
              children=['AIC: {}'.format(None)]),
    dbc.Tooltip("The Akaike information criterion (AIC) is an estimator of out-of-sample prediction error and \
                thereby relative quality of statistical models for a given set of data.",
                target="aic_value"),
    ])
mase_tool = html.Div(
    [html.Div(id='mase_value', 
              children=['MASE: {}'.format(None)]),
    dbc.Tooltip("Mean Absolute Scaled Error (MASE) is a scale-free error metric that gives each error as a ratio \
                compared to a baseline’s average error.",
                target="mase_value"),
    ])

## Update charts

In [13]:
# server = flask.Flask(__name__)
# app = dash.Dash(__name__, server=server)
app = dash.Dash(__name__)

@app.callback(
    Output('bar_graph', 'figure'),
    [Input("police_dropdown", "value"), Input("crime_checklist", "value"), Input("value_type_radio", "value")]
)
def update_bar(police_force, crime_type, value_type):
    ##update dataframes
    df_selected = df_streets.loc[df_streets['Falls within'].isin(police_force) & \
    df_streets['Crime type'].isin(crime_type)]
    df_grouped = df_selected.groupby(['Falls within', 'Crime type']).agg({'Month' : 'count'})
    df_grouped_new = df_grouped.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
    df_grouped_new['Number of crimes'] = df_grouped['Month']
    df_grouped_new = df_grouped_new.squeeze()
    df_grouped_perc = df_grouped_new.rename(columns = {'Month' : 'Percentage crimes per police force'}).reset_index()
    df_grouped_perc = df_grouped_perc.round(2)
    
    if value_type == 'Absolute values':
        y_value = 'Number of crimes'
    elif value_type == 'Percentages':
        y_value = 'Percentage crimes per police force'
    
    ##update graphs
    chart = px.bar(data_frame=df_grouped_perc,
                    x = 'Falls within',
                    y = y_value,
                    color = 'Crime type',
                    barmode='group',
                    orientation = 'v',
                    text_auto=True,
                    hover_name="Falls within", 
                    hover_data={
                        'Falls within': False,
                        'Percentage crimes per police force' : True,
                        'Number of crimes' : True
                    },
                   color_discrete_sequence=px.colors.qualitative.Safe,
                   title = f'Comparison of {", ".join(crime_type)} in {" and ".join(police_force)}',
                   )
    return chart
    
@app.callback(
    Output('crime_checklist', "value"),
    [Input('deselect_button', "n_clicks")]
)
def deselect_crime_type(n_clicks):
    if n_clicks is None:
        raise dash.exceptions.PreventUpdate
    else:
        return []

In [14]:
@app.callback(
    [Output('heat_graph', 'srcDoc'), Output('submit_button', "n_clicks")],
    [Input("police_dropdown2", "value"), Input("crime_radio", "value"), \
     Input('year_dropdown', 'value'), Input('month_dropdown', 'value'),
     Input('submit_button', "n_clicks")
    ]
)
def update_heat(police_force, crime_type, year, month, n_clicks):
    ##update dataframes
    df_grouped_heat = MonthYearRegionDF(df_streets, month, year, police_force)
    df_grouped_heat2 = df_grouped_heat[df_grouped_heat['Crime type'] == crime_type].dropna()
    heat_data = [[row['Latitude'],row['Longitude']] for index, row in df_grouped_heat2.iterrows()]
    
    hm = folium.Map([53.5500,-2.4333], zoom_start=6) 
    HeatMap(heat_data).add_to(hm)
    hm.save('Heatmap.html')
    if n_clicks is None:
        raise dash.exceptions.PreventUpdate
    else:
        return open('Heatmap.html', 'r',).read(), None

In [15]:
@app.callback(
    Output('line_graph', 'figure'),
    [Input("msoa_dropdown", "value"), Input("crime_radio2", "value")]
)
def update_line(MSOA, crime):
    #recalculating the data
    data, mean_forecasts, confidence_intervals = ARIMA_PREDICT(Complete, Best_Models, MSOA, crime)
    lower_limits = confidence_intervals[['lower count']]
    # num = lower_limits._get_numeric_data()
    # num[num < 0] = 0
    upper_limits = confidence_intervals[['upper count']]
    # num = upper_limits._get_numeric_data()
    # num[num < 0] = 0
    
    
    lower_limits.reset_index(inplace=True)
    lower_limits['Date'] = lower_limits['index'].dt.strftime('%Y-%m')
    lower_limits.drop(columns=['index'], inplace=True, axis=1)
    lower_limits = lower_limits.rename(columns={"lower count": "lower_count"})
    lower_limits['lower_count'] = lower_limits['lower_count'].abs()
    lower_limits.set_index('Date', inplace=True) 

    upper_limits.reset_index(inplace=True)
    upper_limits['Date'] = upper_limits['index'].dt.strftime('%Y-%m')
    upper_limits.drop(columns=['index'], inplace=True, axis=1)
    upper_limits = upper_limits.rename(columns={"upper count": "upper_count"})
    upper_limits['upper_count'] = upper_limits['upper_count'].abs()
    upper_limits.set_index('Date', inplace=True) 

    df_plotting = mean_forecasts.reset_index()
    df_plotting['Date'] = df_plotting['index'].dt.strftime('%Y-%m')
    df_plotting.drop(columns=['index'], inplace=True, axis=1)
    df_plotting.set_index('Date', inplace=True) 
    df_plotting = pd.concat([df_plotting, lower_limits, upper_limits], axis=1, join="inner")
    df_plotting = pd.concat([data, df_plotting], axis=1, join="outer")
    num = df_plotting._get_numeric_data()
    num[num < 0] = 0

    dates = df_plotting.index.tolist()
    df_plotting = df_plotting.reset_index()
    
    
    # updating chart
    line_chart = go.Figure()
    line_chart.add_trace(go.Scatter(x=df_plotting['Date'], y=df_plotting['lower_count'],
                    mode='lines',
                    line={'color': 'rgb(221, 204, 119)'},
                    name = 'Confidence interval',
                    showlegend=False
                    ))
    line_chart.add_trace(go.Scatter(x=df_plotting['Date'], y=df_plotting['upper_count'],
                    mode='lines', fill = 'tonexty', #fill_color = 'rgb(221, 204, 119)',
                    line={'color': 'rgb(221, 204, 119)'},
                    name = 'Confidence interval'
                    ))
    line_chart.add_trace(go.Scatter(x=df_plotting['Date'], y=df_plotting['predicted_mean'],
                    mode='lines',
                    name='Predicted mean of crimes',
                    line={'color': 'rgb(102, 17, 0)'},
                    ))
    line_chart.add_trace(go.Scatter(x=df_plotting['Date'], y=df_plotting['count'],
                    mode='lines',
                    name='Number of crimes',
                    line={'color': 'rgb(51, 34, 136)'},
                    ))
    line_chart.update_layout(title= f"Forecast crime counts for {crime} in {MSOA}",
                            xaxis_title="Time", yaxis_title="Number of crimes",)
    
    return line_chart


@app.callback(
    [Output('p_value', 'children'), Output('q_value', 'children'), Output('aic_value', 'children'), Output('mase_value', 'children')],
    [Input("msoa_dropdown", "value"), Input("crime_radio2", "value")]
)
def update_values(MSOA, crime):
    p = Best_Models[(Best_Models['MSOA']==MSOA) & (Best_Models['Crime type']==crime)]['p']
    p_str = f'P: {p}'
    
    q = Best_Models[
        Best_Models['MSOA']==MSOA][
        Best_Models['Crime type']==crime][
        'q']
    q_str = f'Q: {q}'

    AIC = Best_Models[
        Best_Models['MSOA']==MSOA][
        Best_Models['Crime type']==crime][
        'aic']
    aic_str = f'AIC: {AIC}'

    MASE = Best_Models[
        Best_Models['MSOA']==MSOA][
        Best_Models['Crime type']==crime][
        'MASE']
    mase_str = f'MASE: {MASE}'
    
    return p_str, q_str, aic_str, mase_str

## Layout dashboard

In [16]:
#Create several widgets and graphs layout
## Tab 1
widgets1 = [html.H2('Widgets', style={'font-family' : 'Verdana'}),
           html.H3('Location:', style={'font-family' : 'Verdana'}),
               dcc.Markdown('Police force:', style={'font-family' : 'Verdana'}), police_dropdown,
           html.Br(),
           html.H3('Crime type:', style={'font-family' : 'Verdana'}), \
               deselect_button, crime_checklist,
           html.Br(),
           html.H3('Unit crimes:', style={'font-family' : 'Verdana'}), value_type_radio
           ]
graphs1 = [html.H2('Graph', style={'font-family' : 'Verdana'}),
          bar_graph]

## Tab 2
widgets2 = [html.H2('Widgets', style={'font-family' : 'Verdana'}),
           html.H3('Location:', style={'font-family' : 'Verdana'}),
            dcc.Markdown('Police force:', style={'font-family' : 'Verdana'}), police_dropdown2,
           html.Br(),
           html.H3('Crime type:', style={'font-family' : 'Verdana'}), crime_radio,
           html.Br(),
           html.H3('Time:', style={'font-family' : 'Verdana'}), 
            dcc.Markdown('Month:', style={'font-family' : 'Verdana'}), month_dropdown, 
            dcc.Markdown('Year:', style={'font-family' : 'Verdana'}), year_dropdown, 
            html.Br(),
           submit_button]
graphs2 = [html.H2('Graph', style={'font-family' : 'Verdana'}),
           heat_graph]

## Tab 3
widgets3 = [html.H2('Widgets', style={'font-family' : 'Verdana'}),
            html.H3('Location:', style={'font-family' : 'Verdana'}), 
            dcc.Markdown('Select corresponding MSOA:', style={'font-family' : 'Verdana'}), msoa_dropdown,
            html.Br(),
            html.H3('Crime type:', style={'font-family' : 'Verdana'}), crime_radio2,
            html.Br(),
            html.H3('ARIMA model statistics:', style={'font-family' : 'Verdana'}),
            html.Br(), p_tool, 
            html.Br(), q_tool, 
            html.Br(), aic_tool, 
            html.Br(), mase_tool
           ]
        
graphs3 = [html.H2('Graph', style={'font-family' : 'Verdana'}),
          line_graph]

In [17]:
#Create column layouts
widget_layout1 = html.Div(children = widgets1, style = {'width' : '20%', 'display' : 'inline-block'})
graph_layout1 = html.Div(children = graphs1, style = {'width' : '78%', 'display' : 'inline-block', 'float' : 'right'})

widget_layout2 = html.Div(children = widgets2, style = {'width' : '20%', 'display' : 'inline-block'})
graph_layout2 = html.Div(children = graphs2, style = {'width' : '78%', 'display' : 'inline-block', 'float' : 'right'})

widget_layout3 = html.Div(children = widgets3, style = {'width' : '20%', 'display' : 'inline-block'})
graph_layout3 = html.Div(children = graphs3, style = {'width' : '78%', 'display' : 'inline-block', 'float' : 'right'})

In [18]:
#Create full layout
full_layout = [html.H1("Data exploration tool", style={'textAlign': 'center', 'font-family' : 'Verdana'}),
               dcc.Tabs(id="tabs", value='tab_1', children=[
                dcc.Tab(label = 'Analysis: Comparison', id='tab_1', value = 'tab_1', children = [widget_layout1, graph_layout1]), 
                dcc.Tab(label = 'Analysis: Hotspots', id='tab_2', children = [widget_layout2, graph_layout2]),
                dcc.Tab(label = 'Model: ARIMA forecasting', id='tab_3', children = [widget_layout3, graph_layout3])
               ]),
            html.Div(id='tabs_content')
            ]
#Create layout
app.layout = html.Div(children = full_layout)

## Run application

In [None]:
if __name__ == "__main__":
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app '__main__' (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [08/Apr/2022 22:19:13] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 22:19:14] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 22:19:14] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 22:19:14] "GET /_dash-component-suites/dash/dcc/async-markdown.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 22:19:14] "GET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 22:19:14] "GET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 22:19:14] "GET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 22:19:14] "POST /_dash-update-component HTTP/1.1" 204 -


Abberley, Holt Heath & Hallow Anti-social behaviour


127.0.0.1 - - [08/Apr/2022 22:19:15] "GET /_dash-component-suites/dash/dcc/async-highlight.js HTTP/1.1" 200 -


         count
Date          
2012-01      6
2012-02     10
2012-03      7
2012-04     14
2012-05     13
...        ...
2021-06      0
2021-07      0
2021-08      0
2021-09      0
2021-10      0

[118 rows x 1 columns]
Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\20202810\.conda\envs\my_env\lib\site-packages\flask\app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\20202810\.conda\envs\my_env\lib\site-packages\flask\app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\20202810\.conda\envs\my_env\lib\site-packages\flask\app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\20202810\.conda\envs\my_env\lib\site-packages\flask\app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "C:\Users\20202810\.conda\envs\my_env\lib\site-packages\dash

127.0.0.1 - - [08/Apr/2022 22:19:15] "POST /_dash-update-component HTTP/1.1" 500 -
127.0.0.1 - - [08/Apr/2022 22:19:15] "POST /_dash-update-component HTTP/1.1" 204 -
127.0.0.1 - - [08/Apr/2022 22:19:15] "POST /_dash-update-component HTTP/1.1" 200 -


data          count
Date          
2012-01      6
2012-02     10
2012-03      7
2012-04     14
2012-05     13
...        ...
2021-06      0
2021-07      0
2021-08      0
2021-09      0
2021-10      0

[118 rows x 1 columns]
PREDICT             lower count  upper count
2021-11-01    -4.954392    13.896376
2021-12-01    -3.846024    18.315143
2022-01-01    -2.841179    20.071964
2022-02-01    -2.548897    20.401743
2022-03-01    -2.553826    20.396823
2022-04-01    -2.586873    20.364194
2022-05-01    -2.600316    20.350820
2022-06-01    -2.602744    20.348395
2022-07-01    -2.602445    20.348694
2022-08-01    -2.602079    20.349061


127.0.0.1 - - [08/Apr/2022 22:19:17] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [08/Apr/2022 22:19:31] "POST /_dash-update-component HTTP/1.1" 200 -


Abberley, Holt Heath & Hallow Burglary
         count
Date          
2012-01      1
2012-02      4
2012-03      3
2012-04      4
2012-05      4
...        ...
2021-06      3
2021-07      2
2021-08      8
2021-09      5
2021-10      5

[118 rows x 1 columns]


127.0.0.1 - - [08/Apr/2022 22:19:32] "POST /_dash-update-component HTTP/1.1" 200 -


data          count
Date          
2012-02    3.0
2012-03   -1.0
2012-04    1.0
2012-05    0.0
2012-06    1.0
...        ...
2021-06   -1.0
2021-07   -1.0
2021-08    6.0
2021-09   -3.0
2021-10    0.0

[117 rows x 1 columns]
PREDICT             lower count  upper count
2021-11-01    -3.504120     5.836918
2021-12-01    -6.920696     4.766772
2022-01-01    -7.603777     4.083771
2022-02-01    -4.918199     6.801596
2022-03-01    -4.857162     7.033039
2022-04-01    -7.029451     4.891205
2022-05-01    -6.641018     5.389616
2022-06-01    -4.973331     7.059235
2022-07-01    -5.783676     6.331389
2022-08-01    -6.959602     5.156516


127.0.0.1 - - [08/Apr/2022 22:19:42] "POST /_dash-update-component HTTP/1.1" 204 -
127.0.0.1 - - [08/Apr/2022 22:19:44] "POST /_dash-update-component HTTP/1.1" 204 -
127.0.0.1 - - [08/Apr/2022 22:19:47] "POST /_dash-update-component HTTP/1.1" 200 -
