# 02 - M4 Example Model Evaluation

In [1]:
import pandas as pd
import numpy as np
from scalecast.util import metrics
import os

In [2]:
ready = (
    'Hourly',
    'Daily',
    'Weekly',
    'Monthly',
    'Quarterly',
    #'Yearly',
)

In [3]:
df_overall = pd.DataFrame(
    {
        'sMAPE':[13.564,12.669],
        'MASE':[1.912,1.666],
        'OWA':[1.0,0.903],
    },
    index = ['Naive2','ARIMA - Standard for Comp.'],
)

df_smape = pd.DataFrame(
    {
        'Hourly':[18.383],
        'Daily':[3.045],
        'Weekly':[9.161],
        'Monthly':[14.427],
        'Quarterly':[11.012],
        'Yearly':[16.342],
    },
    index = ['Naive2'],
)

df_mase = pd.DataFrame(
    {
        'Hourly':[2.395],
        'Daily':[3.278],
        'Weekly':[2.777],
        'Monthly':[1.063],
        'Quarterly':[1.371],
        'Yearly':[3.974],
    },
    index = ['Naive2'],
)

df_owa = pd.DataFrame(
    {
        'Hourly':[1],
        'Daily':[1],
        'Weekly':[1],
        'Monthly':[1],
        'Quarterly':[1],
        'Yearly':[1],
    },
    index = ['Naive2'],
)

In [4]:
mmap = {
    'Yearly':1,
    'Quarterly':4,
    'Monthly':12,
    'Weekly':1,
    'Daily':1,
    'Hourly':24,
}

info = pd.read_csv(
    'm4/M4-info.csv',
    index_col=0,
    parse_dates = ['StartingDate'],
    dayfirst=True,
)
train = {
    freq:pd.read_csv(
        f'm4/train/{freq}-train.csv',
        index_col=0,
    ) for freq in ready
}
test = {
    freq:pd.read_csv(
        f'm4/test/{freq}-test.csv',
        index_col=0,
    ) for freq in ready
}

In [5]:
for freq in ready:
    print('----------------------')
    print(freq)
    print('----------------------')
    template = pd.DataFrame(
        index=test[freq].index,
    )
    smapes = template.copy()
    mases = template.copy()
    for model_csv in os.listdir(os.path.join('M4/model_results',freq)):
        if not model_csv.endswith('.csv'):
            continue
        model = model_csv.split('.csv')[0]
        print(f'evaluating {model}')
        fcst = pd.read_csv(
            f'm4/model_results/{freq}/{model_csv}',
            index_col=0,
        )
        smapes = smapes.loc[fcst.index]
        mases = mases.loc[fcst.index]
        smapes[model] = [
            metrics.smape(
                test[freq].loc[i].dropna().to_list(),
                fcst.loc[i].dropna().to_list(),
            )*100 for i in fcst.index
        ]
        df_smape.loc[model,freq] = smapes[model].mean()
        
        mases[model] = [
            metrics.mase(
                test[freq].loc[i].dropna().to_list(),
                fcst.loc[i].dropna().to_list(),
                train[freq].loc[i].dropna(),
                m = mmap[freq],
            ) for i in fcst.index
        ]
        df_mase.loc[model,freq] = mases[model].mean()
    
    smapes.to_csv(f'M4/smape/{freq}.csv')
    mases.to_csv(f'M4/mase/{freq}.csv')

----------------------
Hourly
----------------------
evaluating auto_select
evaluating avg_top_2
evaluating avg_top_4
evaluating avg_top_6
evaluating elasticnet
evaluating gbt
evaluating knn
evaluating lightgbm
evaluating svr
evaluating weighted_top_2
evaluating weighted_top_4
evaluating weighted_top_6
evaluating xgboost
----------------------
Daily
----------------------
evaluating auto_select
evaluating avg_top_2
evaluating avg_top_4
evaluating avg_top_6
evaluating elasticnet
evaluating gbt
evaluating knn
evaluating lightgbm
evaluating svr


KeyboardInterrupt: 

In [None]:
for freq in df_smape:
    df_smape[freq] = df_smape[freq].fillna(df_smape.loc['Naive2',freq])
    df_mase[freq] = df_mase[freq].fillna(df_mase.loc['Naive2',freq])

for freq in df_smape:
    for mod in smapes:
        df_owa.loc[mod,freq] = (
            df_smape.loc[mod,freq] / 
            df_smape.loc['Naive2',freq] + 
            df_mase.loc[mod,freq] /
            df_mase.loc['Naive2',freq]
        ) / 2

for mod in smapes:
    df_overall.loc[mod,'sMAPE'] = (
        df_smape.loc[
            mod,'Yearly'
        ] * 23_000 / 100_000 +
        df_smape.loc[
            mod,'Quarterly'
        ] * 24_000 / 100_000 +
        df_smape.loc[
            mod,'Monthly'
        ] * 48_000 / 100_000 +
        df_smape.loc[
            mod,'Weekly'
        ] * 359 / 100_000 +
        df_smape.loc[
            mod,'Daily'
        ] * 4_227 / 100_000 +
        df_smape.loc[
            mod,'Hourly'
        ] * 414 / 100_000
    )
    df_overall.loc[mod,'MASE'] = (
        df_mase.loc[
            mod,'Yearly'
        ] * (23_000 / 100_000) +
        df_mase.loc[
            mod,'Quarterly'
        ] * (24_000 / 100_000) +
        df_mase.loc[
            mod,'Monthly'
        ] * (48_000 / 100_000) +
        df_mase.loc[
            mod,'Weekly'
        ] * (359 / 100_000) +
        df_mase.loc[
            mod,'Daily'
        ] * (4_227 / 100_000) +
        df_mase.loc[
            mod,'Hourly'
        ] * (414 / 100_000)
    )
        
df_smape['Avg'] = df_smape.mean(axis=1)
df_mase['Avg'] = df_mase.mean(axis=1)
df_owa['Avg'] = df_owa.mean(axis=1)

df_overall['OWA'] = (
    df_overall['sMAPE'] / 
    df_overall.loc['Naive2','sMAPE'] + 
    df_overall['MASE'] / 
    df_overall.loc['Naive2','MASE']
) / 2

In [None]:
df_overall.sort_values('OWA')

In [None]:
df_smape.sort_values('Avg')

In [None]:
df_mase.sort_values('Avg')

In [None]:
df_owa.sort_values('Avg')