In [21]:
# imports
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go

import warnings
warnings.filterwarnings('ignore')
pd.options.plotting.backend = 'plotly'

In [2]:
# load data
score_ARIMA_7 = pd.read_pickle("results/f8/ARIMA_result_7.pkl").set_index("store")
score_PROPHET_7 = pd.read_pickle("results/f8/PROPHET_result_7.pkl").set_index("store")
score_RF_7 = pd.read_pickle('results/grouping/result_RF_base.pkl').set_index("store")
score_XGB_7 = pd.read_pickle("results/f8/XGB_result_7.pkl").set_index("store")


score_ALL_7 = score_ARIMA_7.join(score_PROPHET_7).join(score_RF_7).join(score_XGB_7)[['mape_ARIMA','mape_PROPHET','mape_RF','mape_XGB']].reset_index()
#score_ALL_7 = score_ARIMA_7.join(score_PROPHET_7).join(score_RF_7)[['mape_ARIMA','mape_PROPHET','mape_RF']].reset_index()
score_ALL_7['store'] = score_ALL_7['store'].map(lambda x:x[6:])
score_ALL_7.head(3)



Unnamed: 0,store,mape_ARIMA,mape_PROPHET,mape_RF,mape_XGB
0,307222,0.281,0.234,0.048,0.1905
1,307244,0.3455,0.309,0.0625,0.20525
2,307248,0.363,0.239,0.0585,0.185


In [3]:
data = {}
models = ["ARIMA", "PROPHET", "RF","XGB"]
groups = ["base","store_level", "province", "cluster"] 
for model in models:
    for group in groups:
        if group == "base":
            data[model+"_"+group]= score_ALL_7[['store','mape_'+model]].rename({'mape_'+model:'mape'},axis=1)
        else:
            data[model+"_"+group] = pd.read_pickle("results/grouping/result_"+model+"_"+group+".pkl")



In [4]:
df = pd.DataFrame(columns=['store','mape','group','model'])
for model in models:    
    for group in groups:
        df=pd.concat([
                    df,
                    pd.DataFrame({
                        'store':data[model+"_"+group]['store'],
                        'mape':data[model+"_"+group]['mape'],
                        'group':group,
                        'model':model
                    })
        ])

In [5]:
fig = px.box(df, x="model", y="mape", color="group")
fig.update_traces(quartilemethod="exclusive") # or "inclusive", or "linear" by default
title_text = 'Grouping Results'
fig.update_traces(
    mode='lines+markers',
    opacity=0.8, 
    selector=dict(type='scatter'),
    ) 
fig.update_yaxes(title_text='Mean Absolute Percentage Error')
fig.update_layout(
    height=500, width=800,
    title_text=title_text,
    paper_bgcolor='rgb(243, 243, 243)',
    plot_bgcolor='rgb(243, 243, 243)',
    )
fig.show()

#fig.write_image(file="results/f8/plots/"+title_text+".png")

In [7]:
df.groupby(['model','group'])['mape'].mean()

model    group      
ARIMA    base           0.377895
         cluster        0.353553
         province       0.358724
         store_level    0.360336
PROPHET  base           0.338375
         cluster        0.328105
         province       0.341961
         store_level    0.325638
RF       base           0.104414
         cluster        0.173243
         province       0.159770
         store_level    0.150118
XGB      base           0.243296
         cluster        0.264368
         province       0.252724
         store_level    0.251579
Name: mape, dtype: float64

In [8]:

SNAIVE = []
for i in [7,14,21,28]:
    p = "results/f8/SNAIVE_result_" + str(i) + ".pkl"
    SNAIVE.append(pd.read_pickle(p).set_index("store"))
SNAIVE = pd.concat(SNAIVE, axis=1, keys=['7','14','21','28'])

ARIMA = []
for i in [7,14,21,28]:
    p = "results/f8/ARIMA_result_" + str(i) + ".pkl"
    ARIMA.append(pd.read_pickle(p).set_index("store"))
ARIMA = pd.concat(ARIMA, axis=1, keys=['7','14','21','28'])

PROPHET = []
for i in [7,14,21,28]:
    p = "results/f8/PROPHET_result_" + str(i) + ".pkl"
    PROPHET.append(pd.read_pickle(p).set_index("store"))
PROPHET = pd.concat(PROPHET, axis=1, keys=['7','14','21','28'])

RF = []
for i in [7,14,21,28]:
    p = "results/f8/RF_result_" + str(i) + ".pkl"
    RF.append(pd.read_pickle(p).set_index("store"))
RF = pd.concat(RF, axis=1, keys=['7','14','21','28'])

XGB = []
for i in [7,14,21,28]:
    p = "results/f8/XGB_result_" + str(i) + ".pkl"
    XGB.append(pd.read_pickle(p).set_index("store"))
XGB = pd.concat(XGB, axis=1, keys=['7','14','21','28'])


In [23]:
scores = pd.DataFrame(columns=['model','rmse','mae','mape'])
for model,name in zip([SNAIVE, ARIMA, PROPHET, RF, XGB],['SNAIVE', 'ARIMA', 'PROPHET', 'RF', 'XGB']):
    for i in [7,14,21,28]:
        scores= pd.concat([
            scores,
            pd.DataFrame(
                {'model':name,
                'rmse':model[str(i)].mean()['rmse_'+str(name)],
                'mae':model[str(i)].mean()['mae_'+str(name)],
                'mape':model[str(i)].mean()['mape_'+str(name)]
                },index=[i])])

In [25]:
scores

Unnamed: 0,model,rmse,mae,mape
7,SNAIVE,11.243112,8.907645,0.418559
14,SNAIVE,10.204243,7.875046,0.41377
21,SNAIVE,12.528967,9.523651,0.463493
28,SNAIVE,11.432289,8.404243,0.455599
7,ARIMA,9.321796,7.471224,0.377895
14,ARIMA,9.331454,7.297184,0.415862
21,ARIMA,12.4365,9.586493,0.493171
28,ARIMA,11.067072,8.253395,0.478355
7,PROPHET,8.870763,7.010829,0.338375
14,PROPHET,8.707921,6.64152,0.367395
