In [6]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from prophet import Prophet

In [7]:
from pandas.tseries.holiday import *
from datetime import datetime
from sklearn.model_selection import train_test_split 

In [8]:
from prophet.diagnostics import performance_metrics
from prophet.diagnostics import cross_validation
import plotly.express as px

In [9]:
#Reading the data
data = pd.read_csv('GlobalSuperstoreData.csv')
data.head()




Unnamed: 0,Order Date,Segment,Market,Sales,Profit
0,31-07-2012,Consumer,US,2309.65,762.1845
1,05-02-2013,Corporate,APAC,3709.395,-288.765
2,17-10-2013,Consumer,APAC,5175.171,919.971
3,28-01-2013,Home Office,EU,2892.51,-96.54
4,05-11-2013,Consumer,Africa,2832.96,311.52


In [6]:
data.shape

(51290, 5)

In [7]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51290 entries, 0 to 51289
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Order Date  51290 non-null  object 
 1   Segment     51290 non-null  object 
 2   Market      51290 non-null  object 
 3   Sales       51290 non-null  float64
 4   Profit      51290 non-null  float64
dtypes: float64(2), object(3)
memory usage: 2.0+ MB


In [67]:
import plotly.graph_objects as go
import plotly.express as px
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
from dash import dash_table
from plotly.subplots import make_subplots
import dash_daq as daq

In [25]:
#EDA
data.head()

Unnamed: 0,Order Date,Segment,Market,Sales,Profit,Market_segment
0,31-07-2012,Consumer,US,2309.65,762.1845,US_Consumer
1,05-02-2013,Corporate,APAC,3709.395,-288.765,APAC_Corporate
2,17-10-2013,Consumer,APAC,5175.171,919.971,APAC_Consumer
3,28-01-2013,Home Office,EU,2892.51,-96.54,EU_Home Office
4,05-11-2013,Consumer,Africa,2832.96,311.52,Africa_Consumer


In [26]:
data['Order Date'] = pd.to_datetime(data['Order Date'])


In [27]:
data =data.sort_values(by='Order Date')


Exploratory Data Analysis

In [29]:
df_grouped = data.groupby(['Segment','Market'],as_index=False).sum()

In [36]:
#Time series
df_sales = data.groupby(['Order Date'],as_index=False).sum()
fig_salsets = px.line(df_sales, x='Order Date', y="Sales",markers =True,title='Sales time series')
fig_profitts = px.line(df_sales, x="Order Date", y="Profit",markers =True,title='Profit time series')


In [53]:
fig1 = make_subplots(rows=1,cols =4, subplot_titles = ("Segment wise Sales","Box Plot sales","Segment wise Box plot","Market wise boxplots"))
fig1.add_trace(
    go.Bar(x=df_grouped['Segment'], y=df_grouped['Sales']
           ),
             row=1,col=1
)
fig1.add_trace(
    go.Box(y=df_grouped['Sales'] ),
    row=1,col =2
)

fig1.add_trace(
    go.Box(x=df_grouped['Segment'],y=df_grouped['Sales'] ),
    row=1,col =3
)
fig1.add_trace(
    go.Box(x=df_grouped['Market'],y=df_grouped['Sales'] ),
    row=1,col =4
)

In [54]:
fig2 = make_subplots(rows=1,cols =4, subplot_titles = ("Segment wise Profit","Box Plot Profit","Segment wise Box plot","Market wise boxplots"))
fig2.add_trace(
    go.Bar(x=df_grouped['Segment'], y=df_grouped['Profit']
           ),
             row=1,col=1
)
fig2.add_trace(
    go.Box(y=df_grouped['Profit'] ),
    row=1,col =2
)

fig2.add_trace(
    go.Box(x=df_grouped['Segment'],y=df_grouped['Profit'] ),
    row=1,col =3
)
fig2.add_trace(
    go.Box(x=df_grouped['Market'],y=df_grouped['Profit'] ),
    row=1,col =4
)

In [60]:
df_sales.head()

Unnamed: 0,Order Date,Sales,Profit
0,2011-01-01,808.563,198.873
1,2011-01-02,2455.67,603.4736
2,2011-01-03,12565.7663,1905.1539
3,2011-01-04,2582.801,-308.7874
4,2011-01-06,11480.4927,2460.2536


In [61]:
fig3 = make_subplots(rows=1,cols =4, subplot_titles = ("Sales time series","Segment wise sales","Profit time series","Segment wise profit"))
fig3.add_trace(
    go.Scatter(x=df_sales['Order Date'], y=df_grouped['Sales']
           ),
             row=1,col=1
)
df_segment = data.groupby(['Order Date','Segment'],as_index=False).sum()
fig3.add_trace(
    go.Scatter(x=df_segment['Order Date'], y=df_segment['Sales'] ),
    row=1,col =2
)

fig3.add_trace(
    go.Scatter(x=df_sales['Order Date'], y=df_grouped['Profit']
           ),
             row=1,col=3
)
fig3.add_trace(
    go.Scatter(x=df_segment['Order Date'], y=df_segment['Profit'] ),
    row=1,col =4
)

In [76]:
#Importing Model trained earlier (https://github.com/kumarkaushaliimu/Time-series-forecasting/blob/c6abd75d10a5310c46f4e14df9f1e4f8aa30a7a1/Global_superstore_Time_series_forecasting.ipynb)
import pickle

model = pickle.load(open('model-globalstore.pkl', 'rb'))


In [83]:

# build jupyter dash app 
app = JupyterDash(__name__)
# add html components and figure to app
app.layout = html.Div([
    html.H1("Global-Store Analysis",style ={'fontSize':24,'color':'blue','textAlign':'center'}),
    dcc.Tabs([
        dcc.Tab(label='Exploratory Data Analysis',
                children=[
                    html.Div([
                        html.H3("Sales data"),
                        dcc.Graph(id='sales-plots',figure =fig1)
                    ]),
                    html.Div([
                        html.H3("Profit data"),
                        dcc.Graph(id='profit-plots',figure =fig2)
                    ]),
                    html.Div([
                        html.H3("Sales-Profit data time series"),
                        dcc.Graph(id='timeseries-plots',figure =fig3)
                    ])
                ]),
        dcc.Tab(label='Time Series Forecasting',
                children=[
                    html.Div([
                        html.H3("Input number of days for prediction"),
                        daq.NumericInput(
                            id='ndays',
                            value=0
                        ),
                         html.H3("Forecast data"),
                        dcc.Graph(id='tsforecast'),
                    ])
                ])
    ])




])

#callback for prediction
@app.callback(
    Output('tsforecast','figure'),
    Input('ndays','value')
)

def ts1_forecast(days):
  last_date = pd.to_datetime('31-12-2014')
  last_date = last_date.date()
  forecast_days = days
  future=list()
  for i in range(forecast_days):
    future.append([last_date])
    last_date +=timedelta(days=1)
  future = pd.DataFrame(future)
  future.columns = ['ds']
  future['ds']=pd.to_datetime(future['ds'])

  forecast = model.predict(future)
  fig = go.Scatter(x=forecast['ds'],y=forecast['yhat'])
  return fig
# run app inline
app.run_server(mode='inline')

<IPython.core.display.Javascript object>