##### Imports

In [25]:
import pytest
import os
import pathlib
import pandas as pd
import logging
import matplotlib.pyplot as plt

from tot.dataset import Dataset
from tot.models import BayesianRidgeModel, LinearRegressionModel, NeuralProphetModel, ProphetModel, SeasonalNaiveModel
from tot.experiment import SimpleExperiment, CrossValidationExperiment
from tot.benchmark import SimpleBenchmark, CrossValidationBenchmark
from tot.benchmark import ManualBenchmark, ManualCVBenchmark
from tot.metrics import ERROR_FUNCTIONS
from neuralprophet import set_random_seed

In [2]:
log = logging.getLogger("tot.benchmarking")
log.setLevel("INFO")
log.parent.setLevel("INFO")

In [3]:
# TODO: datasets should have a simple .load() command

DIR = os.path.abspath('..')
DATA_DIR = os.path.join(DIR, "tests", "test-data")
PEYTON_FILE = os.path.join(DATA_DIR, "wp_log_peyton_manning.csv")
AIR_FILE = os.path.join(DATA_DIR, "air_passengers.csv")
ERCOT_FILE = os.path.join(DATA_DIR, "ercot_load.csv")
SAVE_DIR = os.path.join(DIR, "tests", "test-logs")
ERCOT_REGIONS = ["NORTH", "EAST", "FAR_WEST"]
if not os.path.isdir(SAVE_DIR):
    os.makedirs(SAVE_DIR)

EPOCHS = 40
BATCH_SIZE = 64
LR = 1.0
ERCOT_REGIONS = ["NORTH", "EAST", "FAR_WEST"]

PLOT = False

In [4]:
# peyton_manning_df = pd.read_csv(PEYTON_FILE)
air_passengers_df = pd.read_csv(AIR_FILE)
# print(peyton_manning_df.shape)
dataset_list = [
    Dataset(df=air_passengers_df, name="air_passengers", freq="MS", seasonality_mode="multiplicative"),
    # Dataset(df=peyton_manning_df, name="peyton_manning", freq="D", seasonalities=[7, 365.25]),
]

In [5]:
model_classes_and_params = [
    (NeuralProphetModel, {"n_lags": 9, "n_forecasts": 3, "seasonality_mode": "multiplicative", "yearly_seasonality": True, "num_hidden_layers": 1, "d_hidden": 5}),
    (NeuralProphetModel, {"n_lags": 12, "n_forecasts": 3, "seasonality_mode": "multiplicative", "yearly_seasonality": True, "num_hidden_layers": 5, "d_hidden": 12}), #,"learning_rate": 0.003, "epochs": EPOCHS
    (NeuralProphetModel, {"n_lags": 9, "n_forecasts": 3, "ar_reg": 1, "seasonality_mode": "multiplicative", "yearly_seasonality": True}), #,"learning_rate": 0.003, "epochs": EPOCHS
    (NeuralProphetModel, {"n_lags": 9, "n_forecasts": 3, "seasonality_mode": "multiplicative", "yearly_seasonality": True}), #, "learning_rate": 0.003, "epochs": EPOCHS
    (NeuralProphetModel, {"n_forecasts": 3, "seasonality_mode": "multiplicative", "yearly_seasonality": True}),#, "learning_rate": 0.003, "epochs": EPOCHS
    (ProphetModel, {"seasonality_mode": "multiplicative"}),
    (SeasonalNaiveModel, {"n_forecasts": 3, "season_length": 12}),
    (LinearRegressionModel, {"n_lags": 9, "output_chunk_length": 3, "n_forecasts": 3}),
    # (BayesianRidgeModel, {"n_lags": 36, "output_chunk_length": 3, "n_forecasts": 3}),
]
log.info("{}".format(model_classes_and_params))

In [20]:
model_classes_and_params = [
    (NeuralProphetModel, {"n_lags": 10, "n_forecasts": 3, "seasonality_mode": "multiplicative", "yearly_seasonality": True, "num_hidden_layers": 6, "d_hidden": 5, "epochs":200}),
     (ProphetModel, {"seasonality_mode": "multiplicative"}),
]
log.info("{}".format(model_classes_and_params))

In [28]:
# Important to set seed for reproducibility
set_random_seed(42)

benchmark = SimpleBenchmark(
    model_classes_and_params=model_classes_and_params,
    datasets=dataset_list,
    metrics=["MAPE"],
    # metrics=["MAE", "MSE", "MASE", "RMSE", "MAPE", "SMAPE"],
    test_percentage=40,
)
results_train, results_test = benchmark.run()

log.warning("{}".format(results_test))



INFO - (NP.df_utils._infer_frequency) - Major frequency MS corresponds to 89.655% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - MS
INFO - (NP.config.init_data_params) - Setting normalization to global as only one dataframe provided for training.
INFO - (NP.utils.set_auto_seasonalities) - Disabling weekly seasonality. Run NeuralProphet with weekly_seasonality=True to override this.
INFO - (NP.utils.set_auto_seasonalities) - Disabling daily seasonality. Run NeuralProphet with daily_seasonality=True to override this.
INFO - (NP.config.set_auto_batch_epoch) - Auto-set batch_size to 16


Finding best initial lr:   0%|          | 0/206 [00:00<?, ?it/s]

Training: 0it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency MS corresponds to 89.655% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - MS
INFO - (NP.df_utils._infer_frequency) - Major frequency MS corresponds to 90.0% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - MS


Predicting: 5it [00:00, ?it/s]

INFO - (NP.df_utils._infer_frequency) - Major frequency MS corresponds to 91.045% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - MS
INFO - (NP.df_utils._infer_frequency) - Major frequency MS corresponds to 90.0% of the data.
INFO - (NP.df_utils._infer_frequency) - Defined frequency is equal to major frequency - MS


Predicting: 5it [00:00, ?it/s]

18:31:42 - cmdstanpy - INFO - Chain [1] start processing
18:31:43 - cmdstanpy - INFO - Chain [1] done processing
             data          model  \
0  air_passengers  NeuralProphet   
1  air_passengers        Prophet   

                                              params  \
0  {'n_lags': 10, 'n_forecasts': 3, 'seasonality_...   
1  {'seasonality_mode': 'multiplicative', '_data_...   

                                          experiment      MAPE  
0  air_passengers_NeuralProphet_n_lags_10_n_forec...  5.639420  
1  air_passengers_Prophet_seasonality_mode_multip...  5.045631  
