# 02 - M4 Example Model Evaluation

In [2]:
import pandas as pd
import numpy as np
from scalecast.util import metrics
import os

In [3]:
ready = (
    'Hourly',
    #'Daily',
    #'Weekly',
    #'Monthly',
    #'Quarterly',
    #'Yearly',
)

In [4]:
df_overall = pd.DataFrame(
    {
        'sMAPE':[13.564,12.669],
        'MASE':[1.912,1.666],
        'OWA':[1.0,0.903],
    },
    index = ['Naive2','ARIMA - Standard for Comp.'],
)

df_smape = pd.DataFrame(
    {
        'Hourly':[18.383],
        'Daily':[3.045],
        'Weekly':[9.161],
        'Monthly':[14.427],
        'Quarterly':[11.012],
        'Yearly':[16.342],
    },
    index = ['Naive2'],
)

df_mase = pd.DataFrame(
    {
        'Hourly':[2.395],
        'Daily':[3.278],
        'Weekly':[2.777],
        'Monthly':[1.063],
        'Quarterly':[1.371],
        'Yearly':[3.974],
    },
    index = ['Naive2'],
)

df_owa = pd.DataFrame(
    {
        'Hourly':[1],
        'Daily':[1],
        'Weekly':[1],
        'Monthly':[1],
        'Quarterly':[1],
        'Yearly':[1],
    },
    index = ['Naive2'],
)

In [5]:
mmap = {
    'Yearly':1,
    'Quarterly':4,
    'Monthly':12,
    'Weekly':1,
    'Daily':1,
    'Hourly':24,
}

info = pd.read_csv(
    'm4/M4-info.csv',
    index_col=0,
    parse_dates = ['StartingDate'],
    dayfirst=True,
)
train = {
    freq:pd.read_csv(
        f'm4/train/{freq}-train.csv',
        index_col=0,
    ) for freq in ready
}
test = {
    freq:pd.read_csv(
        f'm4/test/{freq}-test.csv',
        index_col=0,
    ) for freq in ready
}

In [9]:
for freq in ready:
    print('----------------------')
    print(freq)
    print('----------------------')
    template = pd.DataFrame(
        index=test[freq].index,
    )
    smapes = template.copy()
    mases = template.copy()
    for model_csv in os.listdir(os.path.join('M4/model_results',freq)):
        if not model_csv.endswith('.csv'):
            continue
        model = model_csv.split('.csv')[0]
        print(f'evaluating {model}')
        fcst = pd.read_csv(
            f'm4/model_results/{freq}/{model_csv}',
            index_col=0,
        )
        smapes = smapes.loc[fcst.index]
        mases = mases.loc[fcst.index]
        smapes[model] = [
            metrics.smape(
                test[freq].loc[i].dropna().to_list(),
                fcst.loc[i].dropna().to_list(),
            )*100 for i in fcst.index
        ]
        df_smape.loc[model,freq] = smapes[model].mean()
        
        mases[model] = [
            metrics.mase(
                test[freq].loc[i].dropna().to_list(),
                fcst.loc[i].dropna().to_list(),
                train[freq].loc[i].dropna(),
                m = mmap[freq],
            ) for i in fcst.index
        ]
        df_mase.loc[model,freq] = mases[model].mean()
    
    smapes.to_csv(f'M4/smape/{freq}.csv')
    mases.to_csv(f'M4/mase/{freq}.csv')

----------------------
Hourly
----------------------
evaluating auto_select
evaluating avg_all
evaluating avg_top2
evaluating avg_top4
evaluating elasticnet
evaluating knn
evaluating lasso
evaluating lightgbm
evaluating ridge
evaluating xgboost


In [10]:
for freq in df_smape:
    df_smape[freq] = df_smape[freq].fillna(df_smape.loc['Naive2',freq])
    df_mase[freq] = df_mase[freq].fillna(df_mase.loc['Naive2',freq])

for freq in df_smape:
    for mod in smapes:
        df_owa.loc[mod,freq] = (
            df_smape.loc[mod,freq] / 
            df_smape.loc['Naive2',freq] + 
            df_mase.loc[mod,freq] /
            df_mase.loc['Naive2',freq]
        ) / 2

for mod in smapes:
    df_overall.loc[mod,'sMAPE'] = (
        df_smape.loc[
            mod,'Yearly'
        ] * 23_000 / 100_000 +
        df_smape.loc[
            mod,'Quarterly'
        ] * 24_000 / 100_000 +
        df_smape.loc[
            mod,'Monthly'
        ] * 48_000 / 100_000 +
        df_smape.loc[
            mod,'Weekly'
        ] * 359 / 100_000 +
        df_smape.loc[
            mod,'Daily'
        ] * 4_227 / 100_000 +
        df_smape.loc[
            mod,'Hourly'
        ] * 414 / 100_000
    )
    df_overall.loc[mod,'MASE'] = (
        df_mase.loc[
            mod,'Yearly'
        ] * (23_000 / 100_000) +
        df_mase.loc[
            mod,'Quarterly'
        ] * (24_000 / 100_000) +
        df_mase.loc[
            mod,'Monthly'
        ] * (48_000 / 100_000) +
        df_mase.loc[
            mod,'Weekly'
        ] * (359 / 100_000) +
        df_mase.loc[
            mod,'Daily'
        ] * (4_227 / 100_000) +
        df_mase.loc[
            mod,'Hourly'
        ] * (414 / 100_000)
    )
        
df_smape['Avg'] = df_smape.mean(axis=1)
df_mase['Avg'] = df_mase.mean(axis=1)
df_owa['Avg'] = df_owa.mean(axis=1)

df_overall['OWA'] = (
    df_overall['sMAPE'] / 
    df_overall.loc['Naive2','sMAPE'] + 
    df_overall['MASE'] / 
    df_overall.loc['Naive2','MASE']
) / 2

In [11]:
df_overall.sort_values('OWA')

Unnamed: 0,sMAPE,MASE,OWA
ARIMA - Standard for Comp.,12.669,1.666,0.902678
avg_top4,13.549495,1.905824,0.99785
auto_select,13.549427,1.905848,0.997854
avg_top2,13.550002,1.905848,0.997875
avg_all,13.551131,1.905995,0.997955
xgboost,13.551239,1.906468,0.998083
knn,13.552741,1.90626,0.998084
ridge,13.557688,1.905997,0.998197
elasticnet,13.556275,1.906803,0.998356
lightgbm,13.557709,1.906741,0.998393


In [12]:
df_smape.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
auto_select,14.813316,3.045,9.161,14.427,11.012,16.342,11.466719
avg_top4,14.829776,3.045,9.161,14.427,11.012,16.342,11.469463
avg_top2,14.952168,3.045,9.161,14.427,11.012,16.342,11.489861
avg_all,15.22473,3.045,9.161,14.427,11.012,16.342,11.535288
xgboost,15.250889,3.045,9.161,14.427,11.012,16.342,11.539648
knn,15.613829,3.045,9.161,14.427,11.012,16.342,11.600138
elasticnet,16.467247,3.045,9.161,14.427,11.012,16.342,11.742375
ridge,16.808676,3.045,9.161,14.427,11.012,16.342,11.799279
lightgbm,16.81364,3.045,9.161,14.427,11.012,16.342,11.800107
lasso,18.121653,3.045,9.161,14.427,11.012,16.342,12.018109


In [13]:
df_mase.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
avg_top4,0.964543,3.278,2.777,1.063,1.371,3.974,2.237924
auto_select,0.970512,3.278,2.777,1.063,1.371,3.974,2.238919
avg_top2,0.970529,3.278,2.777,1.063,1.371,3.974,2.238922
avg_all,1.005899,3.278,2.777,1.063,1.371,3.974,2.244817
ridge,1.006334,3.278,2.777,1.063,1.371,3.974,2.244889
knn,1.069931,3.278,2.777,1.063,1.371,3.974,2.255488
xgboost,1.120133,3.278,2.777,1.063,1.371,3.974,2.263855
lightgbm,1.186036,3.278,2.777,1.063,1.371,3.974,2.274839
elasticnet,1.201209,3.278,2.777,1.063,1.371,3.974,2.277368
lasso,1.467493,3.278,2.777,1.063,1.371,3.974,2.321749


In [14]:
df_owa.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
avg_top4,0.604722,1.0,1.0,1.0,1.0,1.0,0.93412
auto_select,0.60552,1.0,1.0,1.0,1.0,1.0,0.934253
avg_top2,0.6093,1.0,1.0,1.0,1.0,1.0,0.934883
avg_all,0.624098,1.0,1.0,1.0,1.0,1.0,0.93735
knn,0.648049,1.0,1.0,1.0,1.0,1.0,0.941341
xgboost,0.648658,1.0,1.0,1.0,1.0,1.0,0.941443
ridge,0.667271,1.0,1.0,1.0,1.0,1.0,0.944545
elasticnet,0.698668,1.0,1.0,1.0,1.0,1.0,0.949778
lightgbm,0.704922,1.0,1.0,1.0,1.0,1.0,0.95082
lasso,0.799258,1.0,1.0,1.0,1.0,1.0,0.966543
