In [1]:
import dash
import dash_core_components as dcc
import dash_bootstrap_components as dbc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
import plotly.express as px

import pandas as pd, numpy as np
import pickle
import os
import glob
import base64
import warnings
warnings.filterwarnings('ignore')

from fbprophet import Prophet

In [2]:
import matplotlib.pyplot as plt

In [3]:
ts_df = pd.read_csv(os.path.join(os.getcwd(), "dashboard_components","data","time_series_df.csv"))
ts_df.set_index('date',inplace=True)
ts_df.index = pd.DatetimeIndex(ts_df.index)
ts_df.head()

Unnamed: 0_level_0,Brampton,Calgary,Markham,Mississauga,Montreal,North York,Richmond Hill,Scarborough,Toronto,Vaughan
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2010-01-16,1,8,0,0,1,0,0,2,27,0
2010-01-17,0,10,0,0,3,1,0,1,29,0
2010-01-18,0,4,4,0,0,0,0,2,21,0
2010-01-19,0,2,0,0,0,1,0,0,25,0
2010-01-20,0,8,0,3,3,1,0,0,37,0


In [4]:
def get_city_ts(df, city_fil):
    df = df.loc[:,city_fil].reset_index()
    df.columns = ['ds','y']
    return df

def get_forecast(fact_df, city_fil, num_steps):
    model_path = os.path.join(os.getcwd(),'dashboard_components','models',city_fil.lower().replace(' ','_')+'_prophet.pkl')
    with open(model_path,'rb') as f:
        model = pickle.load(f)
    future_pd = model.make_future_dataframe(
    periods=num_steps*7,
    include_history=True, 
    freq='D')

    fcast = model.predict(future_pd)
    fcast['y'] = np.pad(city_df.y.values,pad_width=(0,num_steps*7),mode='constant')

    return fcast[['ds','y','yhat','yhat_lower','yhat_upper']]

def get_anomalies(city_fil):
    anomaly_path = os.path.join(os.getcwd(),'dashboard_components','data',city_fil.lower().replace(' ','_')+'_anomalies.csv')
    anomaly_df = pd.read_csv(anomaly_path)
    anomaly_df.drop('Unnamed: 0', axis=1, inplace=True)
    return anomaly_df

In [5]:
city_df = get_city_ts(ts_df, 'Toronto')
city_df.head()

Unnamed: 0,ds,y
0,2010-01-16,27
1,2010-01-17,29
2,2010-01-18,21
3,2010-01-19,25
4,2010-01-20,37


In [6]:
%%time
num_steps=52
fcast = get_forecast(city_df, 'Toronto', num_steps) 
fcast.head()

INFO:numexpr.utils:Note: NumExpr detected 12 cores but "NUMEXPR_MAX_THREADS" not set, so enforcing safe limit of 8.
INFO:numexpr.utils:NumExpr defaulting to 8 threads.
Wall time: 7.41 s


Unnamed: 0,ds,y,yhat,yhat_lower,yhat_upper
0,2010-01-16,27,33.882428,-39.665842,103.593982
1,2010-01-17,29,30.526631,-41.283199,97.549289
2,2010-01-18,21,16.501683,-48.882786,87.565894
3,2010-01-19,25,17.956621,-51.833419,85.46154
4,2010-01-20,37,19.794336,-50.512709,94.283527


In [7]:
anomaly_df = get_anomalies('Toronto')
anomaly_df.head()

Unnamed: 0,ds,trend,yhat,yhat_lower,yhat_upper,fact,anomaly,importance
0,2010-09-15,26.910482,125.744472,53.80857,193.84923,198,1,4.15077
1,2010-12-25,31.767883,135.691983,66.105116,203.134646,39,-1,27.105116
2,2010-12-26,31.815976,199.857948,125.789028,269.702765,75,-1,50.789028
3,2011-01-01,31.933782,200.479485,127.095393,273.241429,80,-1,47.095393
4,2011-03-15,31.289611,135.350913,64.498522,203.966552,211,1,7.033448


In [24]:
logo_path = os.path.join(os.getcwd(),'dashboard_components','images','yelp_logo.png')
encoded_image = base64.b64encode(open(logo_path, 'rb').read())

NAVBAR = dbc.Navbar(
    children=[
        # html.A(
            # Use row and col to control vertical alignment of logo / brand
            dbc.Row(
                [
                    dbc.Col(
                        html.Img(src='data:image/png;base64,{}'.format(encoded_image.decode()), height="30px"), md=1),
                    dbc.Col(
                        dbc.NavbarBrand("YELP Merchant Insights Dashboard", className="ml-2")
                    ),
                ],
                align="center",
                no_gutters=True,
            ),
            # href="https://plot.ly",
        # )
    ],
    color="dark",
    dark=True,
    sticky="top",
)

In [25]:
TS_FORECAST = [
    dbc.CardHeader(html.H5("Demand Forecast with anomalies")),
    dbc.CardBody(
        [
            dcc.Loading(
                id="loading-bigrams-comps",
                children=[
                    dbc.Alert(
                        "Something's gone wrong! Give us a moment, but try loading this page again if problem persists.",
                        id="no-data-alert-bigrams_comp",
                        color="warning",
                        style={"display": "none"},
                    ),
                    dbc.Row(
                        [
                            dbc.Col(html.P("Choose the city and number of forecast steps:")
                            , md=12),
                            dbc.Col(
                                [
                                    html.Label('City:'),
                                    dcc.Dropdown(
                                        id="city-dropdown",
                                        options=[
                                            {"label": i, "value": i}
                                            for i in ts_df.columns
                                        ],
                                        value="Toronto",
                                    )
                                ],
                                md=4,
                            ),
                            dbc.Col(
                                [
                                    html.Label('No. weeks to forecast:'),
                                    dcc.Slider(
                                        id="forecast-slider",
                                        min=26,
                                        max=104,
                                        step=26,
                                        value=52,
                                        marks={
                                            26: '26',
                                            52: '52',
                                            78: '78',
                                            104: '104'
                                            })
                                ],
                                md=4,
                            ),
                            dbc.Col(
                                [
                                    html.Label('% of anomalies:'),
                                    dcc.Slider(
                                        id="anomaly-slider",
                                        min=25,
                                        max=100,
                                        step=25,
                                        value=25,
                                        marks={
                                            25: '25%',
                                            50: '50%',
                                            75: '75%',
                                            100: '100%'
                                            })
                                ],
                                md=4,
                            ),
                        ]
                    ),
                    dcc.Graph(
                        id='city-forecast',
                        figure=px.line(city_df, x='ds', y='y',
                        title='Toronto')
                    ),
        ],
        type='default',)
        ],
        style={"marginTop": 0, "marginBottom": 0},
    ),
]

BODY = dbc.Container(
    [
        dbc.Row([dbc.Col(dbc.Card(TS_FORECAST))], style={"marginTop": 30}),
    ],
    className='mt-12',
)

app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server
app.layout = html.Div(children=[NAVBAR, BODY])

In [26]:
%%time
@app.callback(
    Output('city-forecast', 'figure'),
    [Input('city-dropdown', 'value'),
    Input('forecast-slider', 'value'),
    Input('anomaly-slider', 'value'),]
)
def update_business(input_city, num_steps, anomaly_per):
    out_df = get_city_ts(ts_df,input_city)
    fcast = get_forecast(out_df, input_city, num_steps) 
    anomaly_df = get_anomalies(input_city)
    anomaly_df = anomaly_df.sort_values('importance',ascending=False) \
                        [:int((anomaly_per/100)*anomaly_df.shape[0])]
    anomaly_df['importance'] = (50 *
                            anomaly_df.importance.values/
                            np.linalg.norm(anomaly_df['importance']))
    anomaly_pos_df = anomaly_df.loc[anomaly_df.anomaly == 1, :].copy()
    anomaly_neg_df = anomaly_df.loc[anomaly_df.anomaly == -1, :].copy()

    yhat = go.Scatter(
       x = fcast['ds'][-num_steps*7:],
       y = fcast['yhat'][-num_steps*7:],
       mode = 'lines',
       marker = {
           'color': 'Coral',

       },
       line = {
           'width': 3
       },
       name = 'Forecast'
    )

    yhat_lower = go.Scatter(
        x = fcast['ds'][:-num_steps*7],
        y = fcast['yhat_lower'][:-num_steps*7],
        marker = {
            'color': 'rgba(0,0,0,0)'
        },
        showlegend = False,
        hoverinfo = 'none',
    )

    yhat_upper = go.Scatter(
        x = fcast['ds'][:-num_steps*7],
        y = fcast['yhat_upper'][:-num_steps*7],
        fill='tonexty',
        fillcolor = 'LightPink',
        name = 'Confidence',
        hoverinfo = 'none',
        mode = 'none'
    )

    actual = go.Scatter(
       x = out_df['ds'],
       y = out_df['y'],
       mode = 'lines',
       marker = {
           'color': 'RoyalBlue'
       },
       line = {
           'width': 3
       },
       name = 'Actual'
    )
    
    anomaly_pos = go.Scatter(
        x = anomaly_pos_df['ds'],
        y = anomaly_pos_df['fact'],
        mode = 'markers',
        marker = {
            'color' : 'MediumSeaGreen',
            'size' : anomaly_pos_df.importance,
        },
        name = '+ve Anomaly'

    )

    anomaly_neg = go.Scatter(
        x = anomaly_neg_df['ds'],
        y = anomaly_neg_df['fact'],
        mode = 'markers',
        marker = {
            'color' : 'IndianRed',
            'size' : anomaly_neg_df.importance,
        },
        name = '-ve Anomaly'

    )

    layout = go.Layout(
        yaxis = {
            'title': input_city,
        },
        margin = {
            't': 20,
            'b': 50,
            'l': 60,
            'r': 10
        },
        legend = {
            'bgcolor': 'White'
        }
    )


    data = [yhat_lower, yhat_upper, yhat, actual, anomaly_neg, anomaly_pos]

    fig = go.Figure(dict(data = data, layout = layout))

    return fig

Wall time: 0 ns


In [27]:
app.run_server(debug=True, use_reloader=False)

Running on http://127.0.0.1:8050/
Running on http://127.0.0.1:8050/
Running on http://127.0.0.1:8050/
Running on http://127.0.0.1:8050/
Running on http://127.0.0.1:8050/
INFO:__main__:Running on http://127.0.0.1:8050/
Debugger PIN: 911-327-694
Debugger PIN: 911-327-694
Debugger PIN: 911-327-694
Debugger PIN: 911-327-694
Debugger PIN: 911-327-694
INFO:__main__:Debugger PIN: 911-327-694
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: on
