In [3]:
import pandas as pd
import numpy as np
from scalecast.util import metrics

In [4]:
df_overall = pd.DataFrame(
    {
        'sMAPE':[13.564,12.669],
        'MASE':[1.912,1.666],
        'OWA':[1.0,0.903],
    },
    index = ['Naive2','ARIMA - Standard for Comp.'],
)

df_smape = pd.DataFrame(
    {
        'Hourly':[18.383],
        'Daily':[3.045],
        'Weekly':[9.161],
        'Monthly':[14.427],
        'Quarterly':[11.012],
        'Yearly':[16.342],
    },
    index = ['Naive2'],
)

df_mase = pd.DataFrame(
    {
        'Hourly':[2.395],
        'Daily':[3.278],
        'Weekly':[2.777],
        'Monthly':[1.063],
        'Quarterly':[1.371],
        'Yearly':[3.974],
    },
    index = ['Naive2'],
)

df_owa = pd.DataFrame(
    {
        'Hourly':[1],
        'Daily':[1],
        'Weekly':[1],
        'Monthly':[1],
        'Quarterly':[1],
        'Yearly':[1],
    },
    index = ['Naive2'],
)

In [5]:
df_overall

Unnamed: 0,sMAPE,MASE,OWA
Naive2,13.564,1.912,1.0
ARIMA - Standard for Comp.,12.669,1.666,0.903


In [5]:
mmap = {
    'Yearly':1,
    'Quarterly':4,
    'Monthly':12,
    'Weekly':1,
    'Daily':1,
    'Hourly':24,
}

info = pd.read_csv(
    'm4/M4-info.csv',
    index_col=0,
    parse_dates = ['StartingDate'],
    dayfirst=True,
)
train = {
    freq:pd.read_csv(
        f'm4/train/{freq}-train.csv',
        index_col=0,
    ) for freq in mmap
}
test = {
    freq:pd.read_csv(
        f'm4/test/{freq}-test.csv',
        index_col=0,
    ) for freq in mmap
}

In [7]:
# get rid of models that are unreasonable
for freq, res in results.items():
    for i, f in res.items():
        mpop = [
            m for m, r in f.history.items() if np.max(
                r['LevelForecast']
            ) > np.max(f.levely)*10 or min(
                r['LevelForecast']
            ) < 0
        ]
        if len(mpop) < len(f.history) - 1:
            f.pop(*mpop)

In [8]:
ffa = {
    freq:[
        (
            test[freq].loc[i].dropna().values, # actuals
            f.export(
                'lvl_fcsts',
                models='top_1',
                determine_best_by='LevelTestSetMAPE',
            ).iloc[:,1].values, # predictions
            train[freq].loc[i].dropna().values, # obs
        ) for i, f in d.items()
    ] for freq, d in results.items()
}

ffa_allmods = {
    freq:[
        (
            test[freq].loc[i].dropna().values, # actuals
            f.export(
                'lvl_fcsts',
                models='all',
            ), # predictions
            train[freq].loc[i].dropna().values, # obs
        ) for i, f in d.items()
    ] for freq, d in results.items()
}

In [9]:
for freq in ffa:
    df_smape.loc['auto select',freq] = np.mean(
        [sMAPE(af[0],af[1]) for af in ffa[freq]]
    )
    df_mase.loc['auto select',freq] = np.mean(
        [MASE(*afp,m=mmap[freq]) for afp in ffa[freq]]
    )
    
    for mod in models:
        df_smape.loc[mod,freq] = np.mean(
            [
                sMAPE(
                    af[0],
                    af[1][mod]
                ) for af in ffa_allmods[freq] if mod in af[1]
            ]
        )
        df_mase.loc[mod,freq] = np.mean(
            [
                MASE(
                    af[0],
                    af[1][mod],
                    af[2],
                    m=mmap[freq]
                ) for af in ffa_allmods[freq] if mod in af[1]
            ]
        )
        
smape_st1 = (
    [
        [
            sMAPE(af[0],af[1]) for af in ffa[freq]
        ] for freq in ffa
    ] 
)
mase_st1 = (
    [
        [
            MASE(*afp,m=mmap[freq]) for afp in ffa[freq]
        ] for freq in ffa
    ] 
)
df_overall.loc['auto select','sMAPE'] = np.mean(
    [i for sub in smape_st1 for i in sub]
)
df_overall.loc['auto select','MASE'] = np.mean(
    [i for sub in mase_st1 for i in sub]
)

for mod in models:
    smape_st1 = (
        [
            [
                sMAPE(
                    af[0],
                    af[1][mod].values,
                ) for af in ffa_allmods[freq] if mod in af[1]
            ] for freq in ffa_allmods
        ] 
    )
    mase_st1 = (
        [
            [
                MASE(
                    af[0],
                    af[1][mod].values,
                    af[2],
                    m=mmap[freq],
                ) for af in ffa_allmods[freq] if mod in af[1]
            ] for freq in ffa_allmods
        ] 
    )
    df_overall.loc[mod,'sMAPE'] = np.mean(
        [i for sub in smape_st1 for i in sub]
    )
    df_overall.loc[mod,'MASE'] = np.mean(
        [i for sub in mase_st1 for i in sub]
    )

In [10]:
df_overall['OWA'] = (
    df_overall['sMAPE'] / 
    df_overall.loc['Naive2','sMAPE'] + 
    df_overall['MASE'] / 
    df_overall.loc['Naive2','MASE']
) / 2

for freq in df_owa:
    for mod in ('auto select',) + models:
        df_owa.loc[mod,freq] = (
            df_smape.loc[mod,freq] / 
            df_smape.loc['Naive2',freq] + 
            df_mase.loc[mod,freq] /
            df_mase.loc['Naive2',freq]
        ) / 2
        
df_smape['Avg'] = df_smape.mean(axis=1)
df_mase['Avg'] = df_mase.mean(axis=1)
df_owa['Avg'] = df_owa.mean(axis=1)

In [11]:
df_overall.sort_values('OWA') # this will change with more results coming in

Unnamed: 0,sMAPE,MASE,OWA
Theta - Benchmark,12.309,1.696,0.897252
Comb - Benchmark,12.555,1.663,0.897691
ARIMA - Standard for Comp.,12.669,1.666,0.902678
Naive2,13.564,1.912,1.0
avg,12.481354,2.145746,1.021217
elasticnet,12.51081,2.150086,1.023438
knn,12.874399,2.191516,1.047675
lightgbm,12.989806,2.222369,1.059997
svr,13.174093,2.320395,1.092425
auto select,13.420214,2.313637,1.09973


In [12]:
df_smape.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
Naive2,18.383,3.045,9.161,14.427,11.012,16.342,12.061667
avg,,,,,10.876075,14.160376,12.518226
elasticnet,,,,,11.077288,14.013179,12.545234
knn,,,,,11.268728,14.552217,12.910473
lightgbm,,,,,11.623757,14.40869,13.016224
svr,,,,,11.841246,14.565091,13.203169
auto select,,,,,11.564414,15.356701,13.460557
xgboost,,,,,12.656762,16.786555,14.721659


In [13]:
df_mase.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
avg,,,,,1.234773,3.098567,2.16667
elasticnet,,,,,1.273991,3.068258,2.171124
knn,,,,,1.249872,3.175471,2.212672
lightgbm,,,,,1.317309,3.162435,2.239872
auto select,,,,,1.289012,3.382812,2.335912
svr,,,,,1.444929,3.234058,2.339494
Naive2,2.395,3.278,2.777,1.063,1.371,3.974,2.476333
xgboost,,,,,1.505896,3.916675,2.711285


In [14]:
df_owa.sort_values('Avg')

Unnamed: 0,Hourly,Daily,Weekly,Monthly,Quarterly,Yearly,Avg
avg,,,,,0.944147,0.823106,0.883626
elasticnet,,,,,0.967586,0.814789,0.891187
knn,,,,,0.967482,0.844771,0.906126
lightgbm,,,,,1.008196,0.838739,0.923467
auto select,,,,,0.995181,0.895472,0.945327
svr,,,,,1.064614,0.852536,0.958575
Naive2,1.0,1.0,1.0,1.0,1.0,1.0,1.0
xgboost,,,,,1.123877,1.006389,1.065133


In [15]:
for freq, r in results.items():
    print(f'forecasts evaluated for {freq} series: {len(r)}')

forecasts evaluated for Yearly series: 23000
forecasts evaluated for Quarterly series: 24000
