In [None]:
import sys
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import root_mean_squared_error

# statsmodels
import statsmodels
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.stattools import adfuller
from statsmodels.tsa.stattools import kpss
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.seasonal import seasonal_decompose

# skforecast
import skforecast
from skforecast.datasets import fetch_dataset
from skforecast.plot import set_dark_theme
from skforecast.sarimax import Sarimax
from skforecast.recursive import ForecasterSarimax
from skforecast.model_selection import TimeSeriesFold
from skforecast.model_selection import backtesting_sarimax
from skforecast.model_selection import grid_search_sarimax

import warnings
warnings.filterwarnings('once')

color = '\033[1m\033[38;5;208m' 
print(f"{color}Versi贸n skforecast: {skforecast.__version__}")
print(f"{color}Versi贸n statsmodels: {statsmodels.__version__}")
print(f"{color}Versi贸n pandas: {pd.__version__}")
print(f"{color}Versi贸n numpy: {np.__version__}")

In [None]:
def load_datasets():
    current_dir = os.getcwd()
    ROOT_PATH = os.path.dirname(current_dir)
    sys.path.insert(1, ROOT_PATH)
    sys.path.insert(1, current_dir)
    import root

    train = pd.read_pickle(root.DIR_DATA_STAGE + 'train_preprocessed.pkl')
    return root, train


def create_forecaster(train, series, levels):
    forecaster = ForecasterSarimax(
        regressor=Sarimax(
            order=(1, 1, 1), # Placeholder replaced in the grid search
            seasonal_order = (1, 1, 1, 12),
            maxiter=500
        )
    )
    forecaster.fit(
        y    = train['target'],
        exog = train[series]
    )
    return forecaster


def backtesting(data, train, forecaster, param_grid, steps):
    cv = TimeSeriesFold(
        steps              = steps,
        initial_train_size = len(train),
        refit              = False,
    )
    resultados_grid = grid_search_sarimax(
        forecaster            = forecaster,
        y                     = data['target'],
        cv                    = cv,
        param_grid            = param_grid,
        metric                = root_mean_squared_error,
        return_best           = False,
        n_jobs                = 'auto',
        suppress_warnings_fit = True,
        verbose               = False,
    )
    return resultados_grid

In [None]:
root, train = load_datasets()
data = train.copy()
end_val = '2022-08-31 23:59:59'
val = train.loc[end_val:]
train = train.loc[:end_val]

series = ['target', 'temperature', 'rain', 'snowfall', 'surface_pressure', 'cloudcover_total', 'windspeed_10m', 'winddirection_10m', 'shortwave_radiation', 'euros_per_mwh', 'installed_capacity'] 
levels = ['target']  # Serie que se quiere predecir

data = data[series].copy()
data_train = train[series].copy()
data_val = val[series].copy()

forecaster = create_forecaster(data_train, series, levels)

param_grid = {
    'order': [(0, 1, 0), (0, 1, 1), (1, 1, 0), (1, 1, 1), (2, 1, 1)],
    'seasonal_order': [(0, 0, 0, 0), (0, 1, 0, 12), (1, 1, 1, 12)],
    'trend': [None, 'n', 'c']
}
resultados_grid = backtesting(data, train, forecaster, param_grid, 72)
resultados_grid.to_excel(root.DIR_DATA_ANALYTICS + 'SARIMAX_grid_search_results.xlsx')