# 02 - M4 Example Model Evaluation

In [1]:
import pandas as pd
import numpy as np
from scalecast.util import metrics
import os

In [2]:
ready = (
    'Hourly',
    'Daily',
    'Weekly',
    'Monthly',
    'Quarterly',
    'Yearly',
)

In [3]:
df_overall = pd.DataFrame(
    {
        'sMAPE':[13.564,12.669],
        'MASE':[1.912,1.666],
        'OWA':[1.0,0.903],
    },
    index = ['Naive2','ARIMA - Standard for Comp.'],
)

df_smape = pd.DataFrame(
    {
        'Hourly':[18.383],
        'Daily':[3.045],
        'Weekly':[9.161],
        'Monthly':[14.427],
        'Quarterly':[11.012],
        'Yearly':[16.342],
    },
    index = ['Naive2'],
)

df_mase = pd.DataFrame(
    {
        'Hourly':[2.395],
        'Daily':[3.278],
        'Weekly':[2.777],
        'Monthly':[1.063],
        'Quarterly':[1.371],
        'Yearly':[3.974],
    },
    index = ['Naive2'],
)

df_owa = pd.DataFrame(
    {
        'Hourly':[1],
        'Daily':[1],
        'Weekly':[1],
        'Monthly':[1],
        'Quarterly':[1],
        'Yearly':[1],
    },
    index = ['Naive2'],
)

In [4]:
mmap = {
    'Yearly':1,
    'Quarterly':4,
    'Monthly':12,
    'Weekly':1,
    'Daily':1,
    'Hourly':24,
}

info = pd.read_csv(
    'm4/M4-info.csv',
    index_col=0,
    parse_dates = ['StartingDate'],
    dayfirst=True,
)
train = {
    freq:pd.read_csv(
        f'm4/train/{freq}-train.csv',
        index_col=0,
    ) for freq in ready
}
test = {
    freq:pd.read_csv(
        f'm4/test/{freq}-test.csv',
        index_col=0,
    ) for freq in ready
}

In [10]:
for freq in ready:
    print('----------------------')
    print(freq)
    print('----------------------')
    template = pd.DataFrame(
        index=test[freq].index,
    )
    smapes = template.copy()
    mases = template.copy()
    for model_csv in os.listdir(os.path.join('M4/model_results',freq)):
        model = model_csv.split('.csv')[0]
        print(f'evaluating {model}')
        fcst = pd.read_csv(
            f'm4/model_results/{freq}/{model_csv}',
            index_col=0,
        )
        smapes[model] = [
            metrics.smape(
                test[freq].loc[i].dropna().to_list(),
                fcst.loc[i].dropna().to_list(),
            )*100 for i in fcst.index
        ]
        df_smape.loc[model,freq] = smapes[model].mean()
        
        mases[model] = [
            metrics.mase(
                test[freq].loc[i].dropna().to_list(),
                fcst.loc[i].dropna().to_list(),
                train[freq].loc[i].dropna(),
                m = mmap[freq],
            ) for i in fcst.index
        ]
        df_mase.loc[model,freq] = mases[model].mean()
    
    smapes.to_csv(f'M4/smape/{freq}.csv')
    mases.to_csv(f'M4/mase/{freq}.csv')

------------
Hourly
------------
evaluating weighted_top_4
evaluating weighted_top_6
evaluating weighted_top_2
evaluating svr
evaluating lightgbm
evaluating auto_select
evaluating avg_top_8
evaluating avg_top_4
evaluating knn
evaluating avg_top_6
evaluating mlr
evaluating avg_top_2
evaluating mlp
evaluating sgd
evaluating xgboost
evaluating weighted_top_8
evaluating elasticnet
------------
Daily
------------
evaluating weighted_top_4
evaluating weighted_top_6
evaluating weighted_top_2
evaluating svr
evaluating lightgbm
evaluating auto_select
evaluating avg_top_8
evaluating avg_top_4
evaluating knn
evaluating avg_top_6
evaluating mlr
evaluating avg_top_2
evaluating mlp
evaluating sgd
evaluating xgboost
evaluating weighted_top_8
evaluating elasticnet
------------
Weekly
------------
evaluating weighted_top_4
evaluating weighted_top_6
evaluating weighted_top_2
evaluating svr
evaluating lightgbm
evaluating auto_select
evaluating avg_top_8
evaluating avg_top_4
evaluating knn
evaluating avg_

In [11]:
for freq in df_smape:
    df_smape[freq] = df_smape[freq].fillna(df_smape.loc['Naive2',freq])
    df_mase[freq] = df_mase[freq].fillna(df_mase.loc['Naive2',freq])


for freq in df_smape:
    for mod in smapes:
        df_owa.loc[mod,freq] = (
            df_smape.loc[mod,freq] / 
            df_smape.loc['Naive2',freq] + 
            df_mase.loc[mod,freq] /
            df_mase.loc['Naive2',freq]
        ) / 2

for mod in smapes:
    df_overall.loc[mod,'sMAPE'] = (
        df_smape.loc[
            mod,'Yearly'
        ] * 23_000 / 100_000 +
        df_smape.loc[
            mod,'Quarterly'
        ] * 24_000 / 100_000 +
        df_smape.loc[
            mod,'Monthly'
        ] * 48_000 / 100_000 +
        df_smape.loc[
            mod,'Weekly'
        ] * 359 / 100_000 +
        df_smape.loc[
            mod,'Daily'
        ] * 4_227 / 100_000 +
        df_smape.loc[
            mod,'Hourly'
        ] * 414 / 100_000
    )
    df_overall.loc[mod,'MASE'] = (
        df_mase.loc[
            mod,'Yearly'
        ] * (23_000 / 100_000) +
        df_mase.loc[
            mod,'Quarterly'
        ] * (24_000 / 100_000) +
        df_mase.loc[
            mod,'Monthly'
        ] * (48_000 / 100_000) +
        df_mase.loc[
            mod,'Weekly'
        ] * (359 / 100_000) +
        df_mase.loc[
            mod,'Daily'
        ] * (4_227 / 100_000) +
        df_mase.loc[
            mod,'Hourly'
        ] * (414 / 100_000)
    )
        
df_smape['Avg'] = df_smape.mean(axis=1)
df_mase['Avg'] = df_mase.mean(axis=1)
df_owa['Avg'] = df_owa.mean(axis=1)

df_overall['OWA'] = (
    df_overall['sMAPE'] / 
    df_overall.loc['Naive2','sMAPE'] + 
    df_overall['MASE'] / 
    df_overall.loc['Naive2','MASE']
) / 2

In [12]:
df_overall.sort_values('OWA')

Unnamed: 0,sMAPE,MASE,OWA
ARIMA - Standard for Comp.,12.669,1.666,0.902678
elasticnet,13.259215,1.678357,0.927666
sgd,13.774556,1.774934,0.971918
mlr,13.983649,1.746158,0.972101
knn,14.428374,1.763285,0.992973
Naive2,13.564,1.912,1.0
avg_top_2,14.458214,1.81419,1.007385
avg_top_4,14.554557,1.827766,1.014486
avg_top_6,14.601984,1.834514,1.017999
avg_top_8,14.629504,1.838442,1.020041


In [13]:
df_smape.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
elasticnet,16.485414,3.212941,7.558592,14.346776,11.008481,15.215352,11.304593
knn,11.771977,3.34391,9.197263,16.159735,11.946412,15.571571,11.331811
avg_top_2,13.24208,3.467041,7.075014,15.600218,11.915365,16.88543,11.364191
avg_top_4,13.192382,3.483951,7.074487,15.697314,11.991784,17.019729,11.409941
avg_top_6,13.179528,3.493048,7.079302,15.746778,12.027585,17.083832,11.435012
avg_top_8,13.17284,3.497954,7.083289,15.776177,12.047945,17.120039,11.449707
mlr,15.812724,3.561807,6.970394,15.086237,11.470332,16.297084,11.533096
weighted_top_8,13.15733,3.539029,7.127401,16.004577,12.202408,17.377742,11.568081
weighted_top_6,13.155843,3.545458,7.133839,16.036054,12.221986,17.405599,11.58313
weighted_top_4,13.153432,3.556098,7.143848,16.082718,12.255299,17.452031,11.607238


In [14]:
df_mase.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
mlr,0.91421,3.704913,2.195848,1.026944,1.239512,3.423773,2.0842
avg_top_2,0.895053,3.630907,2.19214,1.051444,1.301071,3.618201,2.114803
avg_top_4,0.890885,3.648478,2.189538,1.057396,1.311645,3.650655,2.124766
avg_top_6,0.88968,3.657394,2.191181,1.060558,1.316867,3.666305,2.130331
avg_top_8,0.889118,3.662595,2.193108,1.062455,1.319916,3.675269,2.133744
weighted_top_8,0.887519,3.702246,2.212811,1.078313,1.3421,3.740863,2.160642
weighted_top_6,0.887596,3.707662,2.215901,1.080292,1.344855,3.747521,2.163971
weighted_top_4,0.887983,3.716932,2.220904,1.083568,1.349469,3.75877,2.169604
weighted_top_2,0.889507,3.736082,2.229031,1.090614,1.359221,3.782552,2.181168
auto_select,0.889513,3.740155,2.233623,1.090649,1.361419,3.79639,2.185291


In [15]:
df_owa.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
mlr,0.620949,1.149979,0.775802,1.005888,0.972857,0.929397,0.909145
avg_top_2,0.547031,1.12313,0.780844,1.035225,1.015515,0.971861,0.912268
avg_top_4,0.544809,1.128587,0.780347,1.04139,1.022841,0.980053,0.916338
avg_top_6,0.544208,1.131441,0.780906,1.044591,1.026371,0.983984,0.918583
avg_top_8,0.543908,1.13304,0.78147,1.046503,1.028407,0.986219,0.919924
elasticnet,0.905185,1.038109,0.825118,0.965414,0.944762,0.875266,0.925642
weighted_top_8,0.543153,1.145832,0.787425,1.061877,1.043511,1.002357,0.930692
weighted_top_6,0.543128,1.147714,0.788333,1.063899,1.045404,1.004047,0.932088
weighted_top_4,0.543143,1.150875,0.78978,1.067057,1.0486,1.006883,0.93439
weighted_top_2,0.541714,1.157384,0.792112,1.07367,1.055619,1.012487,0.938831
