In [None]:
import pandas as pd
import numpy as np
import glob
import os
from flask import Flask
import sys

In [None]:
import plotly.express as px
import plotly.graph_objects as go
import plotly.figure_factory as ff

from dash import Dash
import dash
from dash import html
from dash import dcc
from dash.dependencies import Input, Output

In [None]:
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
import math
from datetime import timedelta
import glob
import os
from tqdm import tqdm as tqdm
import time
import seaborn as sns

%matplotlib inline
import statsmodels.api as sm
from statsmodels.compat import lzip
from statsmodels.formula.api import ols
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller

import warnings
warnings.filterwarnings('ignore')

from numpy import cumsum
from datetime import datetime as dt

# Data import

## Iris method

In [None]:
path = r'C:\Users\20201222\Courses\Y2\Data challenge 2\all_months'
path2 = r'C:\Users\20203927\Documents\BDS\Y2 - Q3\DC2\all_months_2012'

In [None]:
all_streets = glob.glob(path2 + "/2012*street.csv")
df_streets = pd.concat(map(pd.read_csv, all_streets))
df_streets

In [None]:
# all_outcomes = glob.glob(path + "/2012*outcomes.csv")
# df_outcomes = pd.concat(map(pd.read_csv, all_outcomes))
# df_outcomes

In [None]:
df_streets['MSOA name'] = df_streets['LSOA name'].str[:-4]
df_streets

In [None]:
#Dataframe
##Percentages
df_selected = df_streets.loc[df_streets['Reported by'] == 'Avon and Somerset Constabulary']
df_grouped = df_selected.groupby(['Reported by', 'Crime type']).agg({'Month' : 'count'})
df_grouped_new = df_grouped.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
df_grouped_new['Number of crimes'] = df_grouped['Month']
df_grouped_new = df_grouped_new.squeeze()
df_grouped_perc = df_grouped_new.rename(columns = {'Month' : 'Percentage crimes per police force'}).reset_index()
df_grouped_perc = df_grouped_perc.round(2)

## Stijn method

In [None]:
data_path = input("Enter the path to the data folder: ")
# C:\Users\20203927\Documents\BDS\Y2 - Q3\DC2\drive data

In [None]:
Train = pd.read_csv(str(data_path) + r"\\covid_train.csv")
Train.drop("Unnamed: 0", axis=1, inplace=True)

#TestCovid = pd.read_csv(data_path + r"\\covid_test.csv")
#TestCovid.drop("Unnamed: 0", axis=1, inplace=True)

#TestNoCovid = pd.read_csv(data_path + r"\\no_covid_test.csv")
#TestNoCovid.drop("Unnamed: 0", axis=1, inplace=True)

#TrainWithNoCovid = pd.read_csv(data_path + r"\\no_covid_train.csv")
#TrainWithNoCovid.drop("Unnamed: 0", axis=1, inplace=True)

best_models_covid = pd.read_csv(str(data_path) + r"\\covid_best_models.csv")

# Functionality

## Model functions

In [None]:
def ARIMA_DATA(df, MSOA, category):
    ###Gets the data of a specific MSOA and category to use in the ARIMA_OPTIMAL function
    ###
    
    df = df[(df['MSOA'] == int(MSOA)) & (df['Crime type'] == category)]
    df = df[['Date', 'count']]
    df = df.set_index('Date')
    
    return df

def ARIMA_STATIONARY(df):
    ###Returns a stationary dataframe, created by ARIMA_DATA
    ###
    if adfuller(df['count'])[1] > 0.05:
        df = df.diff().dropna()
    
    return df

def ARIMA_OPTIMAL(stationary_data, MSOA, category):
    ### Looks for the best ARMA(p,q) + constant model according to MSOA and crime type
    ###
    
    order_aic_bic = list()

    # Loop over AR order
    for p in range(1,4):
        # Loop over MA order
        for q in range(1,4):
            #for d in range(3):
            try:
            # Fit model
                model = SARIMAX(stationary_data, order=(p,0,q), trend='c')
                results = model.fit()
                # Add order and scores to list
                order_aic_bic.append((p, q, results.aic))
            except:
                continue
            
    order_df = pd.DataFrame(order_aic_bic, columns=['p','q','aic'])
    optimum = order_df[order_df['aic'] == order_df['aic'].min()]
    optimum.reset_index(inplace=True)
    return optimum['p'][0], optimum['q'][0], optimum['aic'][0]

def ARIMA_PREDICT(df, MSOA, category):
    ###Forecasts via ARIMA approach
    ###
    
    arima_data = ARIMA_DATA(df, MSOA, category)
    stationary_data = ARIMA_STATIONARY(arima_data)
    
    p,q = ARIMA_OPTIMAL(stationary_data, MSOA, category)[0:2]
    
    model = SARIMAX(stationary_data, order=(p,0,q), trend='c')
    results = model.fit()
    forecast = results.get_prediction(start=-25)
    mean_forecast = cumsum(forecast.predicted_mean) + stationary_data.iloc[-1,0]
    confidence_intervals = cumsum(forecast.conf_int())
    return arima_data, mean_forecast.to_frame(), confidence_intervals

def ARIMA_SUMMARY(df, MSOA, category):
    
    stationary_data = ARIMA_STATIONARY(ARIMA_DATA(df, MSOA, category))
    
    p,q = ARIMA_OPTIMAL(stationary_data, MSOA, category)[0:2]
    
    model = SARIMAX(stationary_data, order=(p,0,q), trend='c')
    results = model.fit()
    
    return results.summary()

def model_predict(df, msoa, category):
    data, mean_forecasts, confidence_intervals = ARIMA_PREDICT(df, msoa, category)
    
    lower_limits = confidence_intervals['lower count']
    upper_limits = confidence_intervals['upper count']
    
    return data, mean_forecasts, confidence_intervals, lower_limits, upper_limits

In [None]:
data, mean_forecasts, confidence_intervals, lower_limits, upper_limits = model_predict(Train, '001', 'Anti-social behaviour')
    
test = mean_forecasts.reset_index()
test['Date'] = test['index'].dt.strftime('%Y-%m')
test.drop(columns=['index'], inplace=True, axis=1)
test.set_index('Date', inplace=True)
test = test.rename(columns={0:'predicted_mean'})

plot_data = pd.concat([data, test], axis=1)

## Widgets

In [None]:
# getting lists

police_list = df_streets['Reported by'].sort_values(ascending=True).unique().tolist() #police force list
crime_list = df_streets['Crime type'].sort_values(ascending=True).unique().tolist() #crime type list
msoa_list = best_models_covid['MSOA'].sort_values(ascending=True).unique().tolist() #MSOA list


In [None]:
#Widgets
police_dropdown = dcc.Dropdown(
    id = 'police_dropdown',
    options = police_list, 
    value = police_list[0],
    multi = True,
)

msoa_dropdown = dcc.Dropdown(
    id = 'msoa_dropdown',
    options = msoa_list, 
    value = msoa_list[0],
)

crime_checklist = dcc.Checklist(
    id = 'crime_checklist',
    options = crime_list, 
    value = crime_list,
    labelStyle = dict(display='block')
)

crime_radio_1 = dcc.RadioItems(
    id = 'crime_radio_1',
    options = crime_list, 
    value = crime_list[0],
    labelStyle = dict(display='block')
)

crime_radio_2 = dcc.RadioItems(
    id = 'crime_radio_2',
    options = crime_list, 
    value = crime_list[0],
    labelStyle = dict(display='block')
)

# select_button = html.Button("Select all", id="select_button")
deselect_button = html.Button("Deselect all", id="deselect_button")

value_type_radio = dcc.RadioItems(
    id  = 'value_type_radio',
    options = ['Percentages', 'Absolute values'], 
    value = 'Percentages',
)

p_value = html.Div(id='p_value', children=['P: {}'.format(None)])
q_value = html.Div(id='q_value', children=['Q: {}'.format(None)])

## Graphs

In [None]:
#Chart
bar_chart = px.bar(data_frame=df_grouped_perc,
                    x = 'Reported by',
                    y = 'Percentage crimes per police force',
                    color = 'Crime type',
                    barmode='group',
                    orientation = 'v',
                    text_auto=True,
                    hover_name="Reported by", 
                    hover_data={
                        'Reported by': False,
                        'Number of crimes' : True
                    },
                   color_discrete_sequence=px.colors.qualitative.Safe,
                   )

#Graph
bar_graph = dcc.Graph(
        id='bar_graph',
        figure=bar_chart,
    )

In [None]:
line_chart = px.line(data_frame = plot_data, 
                     x=plot_data.index, 
                     y="count")

line_graph = dcc.Graph(
        id='line_graph',
        figure=line_chart,
    )

## Function

In [None]:
app = dash.Dash(__name__)

@app.callback(
    Output('bar_graph', 'figure'),
    [Input("police_dropdown", "value"), Input("crime_checklist", "value"), Input("value_type_radio", "value")]
)

def update_bar(police_force, crime_type, value_type):
    ##update dataframes
    df_selected = df_streets.loc[df_streets['Reported by'].isin(police_force) & \
    df_streets['Crime type'].isin(crime_type)]
    df_grouped = df_selected.groupby(['Reported by', 'Crime type']).agg({'Month' : 'count'})
    df_grouped_new = df_grouped.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))
    df_grouped_new['Number of crimes'] = df_grouped['Month']
    df_grouped_new = df_grouped_new.squeeze()
    df_grouped_perc = df_grouped_new.rename(columns = {'Month' : 'Percentage crimes per police force'}).reset_index()
    df_grouped_perc = df_grouped_perc.round(2)
    
    if value_type == 'Absolute values':
        y_value = 'Number of crimes'
    elif value_type == 'Percentages':
        y_value = 'Percentage crimes per police force'
    
    ##update graphs
    chart = px.bar(data_frame=df_grouped_perc,
                    x = 'Reported by',
                    y = y_value,
                    color = 'Crime type',
                    barmode='group',
                    orientation = 'v',
                    text_auto=True,
                    hover_name="Reported by", 
                    hover_data={
                        'Reported by': False,
                        'Percentage crimes per police force' : True,
                        'Number of crimes' : True
                    },
                   color_discrete_sequence=px.colors.qualitative.Safe,
                   )
    return chart
    
@app.callback(
    Output('crime_checklist', "value"),
    [Input('deselect_button', "n_clicks")]
)
def deselect_crime_type(n_clicks):
    if n_clicks is None:
        raise dash.exceptions.PreventUpdate
    else:
        return []

In [None]:
@app.callback(
    Output('line_graph', 'figure'),
    [Input("msoa_dropdown", "value"), Input("crime_radio_2", "value")]
)

def update_chart(MSOA, crime):
    data, mean_forecasts, confidence_intervals, lower_limits, upper_limits = model_predict(Train, MSOA, crime)
    
    test = mean_forecasts.reset_index()
    test['Date'] = test['index'].dt.strftime('%Y-%m')
    test.drop(columns=['index'], inplace=True, axis=1)
    test.set_index('Date', inplace=True)
    test = test.rename(columns={0:'predicted_mean'})

    plot_data = pd.concat([data, test], axis=1)
    
    chart = px.line(plot_data, x="Date", y="count")

# Layout

## Page layouts

In [None]:
#Create several widgets and graphs layout
## Tab 1
widgets1 = [html.H2('Widgets', style={'font-family' : 'Verdana'}),
           html.H3('Location:', style={'font-family' : 'Verdana'}),
               dcc.Markdown('Police force:', style={'font-family' : 'Verdana'}), police_dropdown,
           html.Br(),
           html.H3('Crime type:', style={'font-family' : 'Verdana'}), \
               deselect_button, crime_checklist,
           html.Br(),
           html.H3('Unit crimes:', style={'font-family' : 'Verdana'}), value_type_radio
           ]
graphs1 = [html.H2('Graph', style={'font-family' : 'Verdana'}),
          bar_graph]

## Tab 2
widgets2 = [html.H2('Widgets', style={'font-family' : 'Verdana'}),
           html.H3('Location:', style={'font-family' : 'Verdana'}),
#                dcc.Markdown('Police force:', style={'font-family' : 'Verdana'}), police_dropdown,
           html.Br(),
           html.H3('Crime type:', style={'font-family' : 'Verdana'}), crime_radio_1,
           ]
graphs2 = [html.H2('Graph', style={'font-family' : 'Verdana'}),]

In [None]:
## Tab 3
#widgets3 = [html.H2('Widgets', style={'font-family' : 'Verdana'}),
#           html.H3('Location:', style={'font-family' : 'Verdana'}),
#               dcc.Markdown('MSOA name: *not linked yet*', style={'font-family' : 'Verdana'}), msoa_dropdown,
#           html.Br(),
#           html.H3('Crime type:', style={'font-family' : 'Verdana'}), crime_radio,
#           html.Br(),
#            html.H3('ARIMA model:', style={'font-family' : 'Verdana'}), \
#               dcc.Markdown('*not linked yet*', style={'font-family' : 'Verdana'}), p_value, q_value,
#           ]
#graphs3 = [html.H2('Graph', style={'font-family' : 'Verdana'}),]        

widgets3 = [html.H2('Widgets', style={'font-family' : 'Verdana'}),
            
            html.H3('Location:', style={'font-family' : 'Verdana'}), 
            dcc.Markdown('Select corresponding MSOA:', style={'font-family' : 'Verdana'}), msoa_dropdown,
            html.Br(),
            
            html.H3('Crime type:', style={'font-family' : 'Verdana'}), crime_radio_2,
            html.Br(),
            
            html.H3('ARIMA model:', style={'font-family' : 'Verdana'}), 
            dcc.Markdown('*not linked yet*', style={'font-family' : 'Verdana'}), p_value, q_value,
           ]
            
        
graphs3 = [html.H2('Graph', style={'font-family' : 'Verdana'}),
          line_graph 
          ]

## Page layout options

In [None]:
#Create column layouts
widget_layout1 = html.Div(children = widgets1, style = {'width' : '20%', 'display' : 'inline-block'})
graph_layout1 = html.Div(children = graphs1, style = {'width' : '78%', 'display' : 'inline-block', 'float' : 'right'})

widget_layout2 = html.Div(children = widgets2, style = {'width' : '20%', 'display' : 'inline-block'})
graph_layout2 = html.Div(children = graphs2, style = {'width' : '78%', 'display' : 'inline-block', 'float' : 'right'})

widget_layout3 = html.Div(children = widgets3, style = {'width' : '20%', 'display' : 'inline-block'})
graph_layout3 = html.Div(children = graphs3, style = {'width' : '78%', 'display' : 'inline-block', 'float' : 'right'})

## Full layout

In [None]:
#Create full layout
full_layout = [html.H1("Title", style={'textAlign': 'center', 'font-family' : 'Verdana'}),
               dcc.Tabs(id="tabs", value='tab_1', children=[
                dcc.Tab(label = 'Analysis: Comparison', id='tab_1', value = 'tab_1', children = [widget_layout1, graph_layout1]), 
                dcc.Tab(label = 'Analysis: Hotspots', id='tab_2', children = [widget_layout2, graph_layout2]),
                dcc.Tab(label = 'Model: ARIMA forecasting', id='tab_3', children = [widget_layout3, graph_layout3])
               ]),
            html.Div(id='tabs_content')
            ]
#Create layout
app.layout = html.Div(children = full_layout)

# Execution 

In [None]:
if __name__ == "__main__":
    app.run_server(debug=False)

In [None]:
# Extra cell in case previous cell gets deleted
# if __name__ == "__main__":
#     app.run_server(debug=False)

In [None]:
#Choose font family
# app.layout = html.Div(children=[
#                        html.H1('Times New Roman', style={'font-family' : 'Times New Roman'}),
#                        html.H1('Georgia', style={'font-family' : 'Georgia'}),
#                        html.H1('Garamond', style={'font-family' : 'Garamond'}),
#                        html.H1('Arial', style={'font-family' : 'Arial'}),
#                        html.H1('Verdana', style={'font-family' : 'Verdana'}),
#                        html.H1('Helvetica', style={'font-family' : 'Helvetica'}),
#                        html.H1('Courier New', style={'font-family' : 'Courier New'}),
#                        html.H1('Lucida Console', style={'font-family' : 'Lucida Console'}),
#                        html.H1('Monaco', style={'font-family' : 'Monaco'}),
    
#                        html.P('Times New Roman', style={'font-family' : 'Times New Roman'}),
#                        html.P('Georgia', style={'font-family' : 'Georgia'}),
#                        html.P('Garamond', style={'font-family' : 'Garamond'}),
#                        html.P('Arial', style={'font-family' : 'Arial'}),
#                        html.P('Verdana', style={'font-family' : 'Verdana'}),
#                        html.P('Helvetica', style={'font-family' : 'Helvetica'}),
#                        html.P('Courier New', style={'font-family' : 'Courier New'}),
#                        html.P('Lucida Console', style={'font-family' : 'Lucida Console'}),
#                        html.P('Monaco', style={'font-family' : 'Monaco'}),
#                         ])

# if __name__ == "__main__":
#     app.run_server(debug=False)