# 02 - M4 Example Model Evaluation

In [1]:
import pandas as pd
import numpy as np
from scalecast.util import metrics
import os

In [6]:
ready = (
    'Hourly',
    #'Daily',
    'Weekly',
    #'Monthly',
    #'Quarterly',
    'Yearly',
)

In [7]:
df_overall = pd.DataFrame(
    {
        'sMAPE':[13.564,12.669],
        'MASE':[1.912,1.666],
        'OWA':[1.0,0.903],
    },
    index = ['Naive2','ARIMA - Standard for Comp.'],
)

df_smape = pd.DataFrame(
    {
        'Hourly':[18.383],
        'Daily':[3.045],
        'Weekly':[9.161],
        'Monthly':[14.427],
        'Quarterly':[11.012],
        'Yearly':[16.342],
    },
    index = ['Naive2'],
)

df_mase = pd.DataFrame(
    {
        'Hourly':[2.395],
        'Daily':[3.278],
        'Weekly':[2.777],
        'Monthly':[1.063],
        'Quarterly':[1.371],
        'Yearly':[3.974],
    },
    index = ['Naive2'],
)

df_owa = pd.DataFrame(
    {
        'Hourly':[1],
        'Daily':[1],
        'Weekly':[1],
        'Monthly':[1],
        'Quarterly':[1],
        'Yearly':[1],
    },
    index = ['Naive2'],
)

In [8]:
mmap = {
    'Yearly':1,
    'Quarterly':4,
    'Monthly':12,
    'Weekly':1,
    'Daily':1,
    'Hourly':24,
}

info = pd.read_csv(
    'm4/M4-info.csv',
    index_col=0,
    parse_dates = ['StartingDate'],
    dayfirst=True,
)
train = {
    freq:pd.read_csv(
        f'm4/train/{freq}-train.csv',
        index_col=0,
    ) for freq in ready
}
test = {
    freq:pd.read_csv(
        f'm4/test/{freq}-test.csv',
        index_col=0,
    ) for freq in ready
}

In [9]:
for freq in ready:
    print(freq)
    template = pd.DataFrame(
        index=test[freq].index,
    )
    smapes = template.copy()
    mases = template.copy()
    for model_csv in os.listdir(os.path.join('M4/model_results',freq)):
        model = model_csv.split('.csv')[0]
        print(f'evaluating {model}')
        fcst = pd.read_csv(
            f'm4/model_results/{freq}/{model_csv}',
            index_col=0,
        )
        smapes[model] = [
            metrics.smape(
                test[freq].loc[i].dropna().to_list(),
                fcst.loc[i].dropna().to_list(),
            )*100 for i in fcst.index
        ]
        df_smape.loc[model,freq] = smapes[model].mean()
        
        mases[model] = [
            metrics.mase(
                test[freq].loc[i].dropna().to_list(),
                fcst.loc[i].dropna().to_list(),
                train[freq].loc[i].dropna(),
                m = mmap[freq],
            ) for i in fcst.index
        ]
        df_mase.loc[model,freq] = mases[model].mean()
    
    smapes.to_csv(f'M4/smape/{freq}.csv')
    mases.to_csv(f'M4/mase/{freq}.csv')

Hourly
Weekly


In [10]:
for freq in ready:
    for mod in smapes:
        df_owa.loc[mod,freq] = (
            df_smape.loc[mod,freq] / 
            df_smape.loc['Naive2',freq] + 
            df_mase.loc[mod,freq] /
            df_mase.loc['Naive2',freq]
        ) / 2
        
for mod in smapes:
    df_overall.loc[mod,'sMAPE'] = (
        df_smape.loc[
            mod,'Yearly'
        ] * 23_000 / 100_000 +
        df_smape.loc[
            mod,'Quarterly'
        ] * 24_000 / 100_000 +
        df_smape.loc[
            mod,'Monthly'
        ] * 48_000 / 100_000 +
        df_smape.loc[
            mod,'Weekly'
        ] * 359 / 100_000 +
        df_smape.loc[
            mod,'Daily'
        ] * 4_227 / 100_000 +
        df_smape.loc[
            mod,'Hourly'
        ] * 414 / 100_000
    )
    df_overall.loc[mod,'MASE'] = (
        df_mase.loc[
            mod,'Yearly'
        ] * 23_000 / 100_000 +
        df_mase.loc[
            mod,'Quarterly'
        ] * 24_000 / 100_000 +
        df_mase.loc[
            mod,'Monthly'
        ] * 48_000 / 100_000 +
        df_mase.loc[
            mod,'Weekly'
        ] * 359 / 100_000 +
        df_mase.loc[
            mod,'Daily'
        ] * 4_227 / 100_000 +
        df_mase.loc[
            mod,'Hourly'
        ] * 414 / 100_000
    )
        
df_smape['Avg'] = df_smape.mean(axis=1)
df_mase['Avg'] = df_mase.mean(axis=1)
df_owa['Avg'] = df_owa.mean(axis=1)

df_overall['OWA'] = (
    df_overall['sMAPE'] / 
    df_overall.loc['Naive2','sMAPE'] + 
    df_overall['MASE'] / 
    df_overall.loc['Naive2','MASE']
) / 2

In [11]:
df_overall.sort_values('OWA')

Unnamed: 0,sMAPE,MASE,OWA
ARIMA - Standard for Comp.,12.669,1.666,0.902678
Naive2,13.564,1.912,1.0
auto_select,,,
avg_top_2,,,
avg_top_4,,,
avg_top_6,,,
avg_top_8,,,
elasticnet,,,
knn,,,
lightgbm,,,


In [12]:
df_smape.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
avg_top_4,13.30035,,6.971702,,,,10.136026
avg_top_6,13.302063,,6.971505,,,,10.136784
avg_top_8,13.304469,,6.972633,,,,10.138551
avg_top_2,13.319552,,6.985012,,,,10.152282
weighted_top_8,13.337885,,6.991446,,,,10.164666
weighted_top_6,13.344149,,6.994252,,,,10.169201
weighted_top_4,13.358293,,6.998967,,,,10.17863
weighted_top_2,13.352395,,7.007183,,,,10.179789
auto_select,13.39412,,7.018052,,,,10.206086
xgboost,12.663358,,7.9397,,,,10.301529


In [13]:
df_mase.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
weighted_top_8,0.887651,,2.172838,,,,1.530244
avg_top_8,0.886085,,2.175077,,,,1.530581
weighted_top_6,0.888135,,2.173534,,,,1.530834
avg_top_6,0.886119,,2.176221,,,,1.53117
weighted_top_4,0.889193,,2.175232,,,,1.532213
avg_top_4,0.886296,,2.17931,,,,1.532803
weighted_top_2,0.89168,,2.178085,,,,1.534883
auto_select,0.89043,,2.182381,,,,1.536405
avg_top_2,0.887925,,2.191491,,,,1.539708
mlr,0.91421,,2.195848,,,,1.555029


In [14]:
df_owa.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
avg_top_8,0.546855,,0.772184,,,,0.65952
avg_top_6,0.546797,,0.772329,,,,0.659563
avg_top_4,0.546787,,0.772896,,,,0.659841
weighted_top_8,0.548091,,0.772808,,,,0.660449
weighted_top_6,0.548362,,0.773086,,,,0.660724
weighted_top_4,0.548968,,0.773649,,,,0.661309
avg_top_2,0.54765,,0.775815,,,,0.661732
weighted_top_2,0.549327,,0.774612,,,,0.661969
auto_select,0.550201,,0.775978,,,,0.663089
mlr,0.620949,,0.775802,,,,0.698375
