In [1]:
import pycaret
from pycaret.time_series import *
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sktime.performance_metrics.forecasting import MeanAbsolutePercentageError, MeanAbsoluteScaledError, MedianAbsoluteScaledError 


In [126]:
df_in = pd.read_csv("data/monthly_in.csv")
df_out = pd.read_csv("data/monthly_oos.csv")[3:]

In [69]:
forecast_horizon = [1,2,3,6,12,18,24]

In [155]:
mape = MeanAbsolutePercentageError()
mase = MeanAbsoluteScaledError()
mdase = MedianAbsoluteScaledError()

In [178]:
experiments = []
for col in df_in.columns:
    df_train = df_in.loc[:df_in[col].notna()[::-1].idxmax(),col]
    df_test = df_out.loc[:df_out[col].notna()[::-1].idxmax(),col]
    for fh in forecast_horizon:
        print(f"training model: {col}, fh: {fh}")
        exp = {}
        s = setup(df_train,fh=fh, session_id=42,verbose=False)
        best = compare_models(verbose=False, sort="MAPE")
        best_final = finalize_model(best)
        
        df_pred = predict_model(best_final, fh=fh)
        df_test_fh = df_test[:fh]
        exp["serie"] = col
        exp["fh"] = fh
        exp["best_model"] = best_final
        exp["model_path"] = f"models/{col}_{fh}"
        exp["MAPE"] = mape(y_true=df_test_fh,y_pred=df_pred)
        exp["MASE"] = mase(y_true=df_test_fh,y_pred=df_pred,y_train=df_train)
        exp["MdASE"] = mdase(y_true=df_test_fh,y_pred=df_pred,y_train=df_train)
        save_model(best_final, f"models/{col}_{fh}")
        experiments.append(exp)
        if(len(experiments)%50==0):
            pd.DataFrame(experiments).to_csv("results/model_results.csv",index=False)

pd.DataFrame(experiments).to_csv("results/model_results.csv",index=False)

training model: m1, fh: 1
Transformation Pipeline and Model Successfully Saved
training model: m1, fh: 2
Transformation Pipeline and Model Successfully Saved
training model: m1, fh: 3
Transformation Pipeline and Model Successfully Saved
training model: m1, fh: 6
Transformation Pipeline and Model Successfully Saved
training model: m1, fh: 12
Transformation Pipeline and Model Successfully Saved
training model: m1, fh: 18
Transformation Pipeline and Model Successfully Saved
training model: m1, fh: 24
Transformation Pipeline and Model Successfully Saved
training model: m2, fh: 1
Transformation Pipeline and Model Successfully Saved
training model: m2, fh: 2
Transformation Pipeline and Model Successfully Saved
training model: m2, fh: 3
Transformation Pipeline and Model Successfully Saved
training model: m2, fh: 6
Transformation Pipeline and Model Successfully Saved
training model: m2, fh: 12
Transformation Pipeline and Model Successfully Saved
training model: m2, fh: 18
Transformation Pipeli

ValueError: Not Enough Data Points, set a lower number of folds or fh

In [2]:
df = pd.read_csv("results/model_results_146.csv")

In [4]:
df.groupby("fh")[["MAPE","MASE","MdASE"]].mean()

Unnamed: 0_level_0,MAPE,MASE,MdASE
fh,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,0.155821,1.077929,2.183776
2,0.133685,0.881015,1.617559
3,0.147418,0.882172,1.567817
6,0.195138,1.210358,2.098275
12,0.211483,1.279099,2.169362
18,0.199667,1.15302,1.968321
24,0.201196,1.277571,2.223683


In [8]:
#1-3
df.groupby("fh")[["MAPE","MASE","MdASE"]].mean()[:3].mean()

MAPE     0.145642
MASE     0.947038
MdASE    1.789717
dtype: float64

In [7]:
#1-12
df.groupby("fh")[["MAPE","MASE","MdASE"]].mean()[:5].mean()

MAPE     0.168709
MASE     1.066114
MdASE    1.927358
dtype: float64

In [6]:
#1-24
df.groupby("fh")[["MAPE","MASE","MdASE"]].mean().mean()

MAPE     0.177773
MASE     1.108737
MdASE    1.975542
dtype: float64