In [31]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
c:\Users\jaesc2\GitHub\skforecast


In [32]:
import re
import pytest
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
from skforecast.recursive import ForecasterRecursive
from skforecast.direct import ForecasterDirect
from skforecast.model_selection._split import TimeSeriesFold
from skforecast.model_selection._validation import _backtesting_forecaster
from skforecast.preprocessing import RollingFeatures

# Fixtures
from skforecast.exceptions import IgnoredArgumentWarning
from skforecast.model_selection.tests.fixtures_model_selection import y
from skforecast.model_selection.tests.fixtures_model_selection import exog
from skforecast.model_selection.tests.fixtures_model_selection import out_sample_residuals

In [33]:
print(np.__version__)
print(pd.__version__)
import skforecast
print(skforecast.__version__)

1.26.4
2.2.3
0.14.0


In [34]:
expected_ForecasterRecursive = {
    0: {'stats': False, 
        'mean_absolute_error': 0.1780043},
    1: {'stats': ['mean'], 
        'mean_absolute_error': 0.18347593},
    2: {'stats': ['mean', 'std'], 
        'mean_absolute_error': 0.22006699},
    3: {'stats': ['mean', 'std', 'min'], 
        'mean_absolute_error': 0.25699072},
    4: {'stats': ['mean', 'std', 'min', 'max'], 
        'mean_absolute_error': 0.25101236},
    5: {'stats': ['mean', 'std', 'min', 'max', 'sum'], 
        'mean_absolute_error': 0.2481226},
    6: {'stats': ['mean', 'std', 'min', 'max', 'sum', 'median'], 
        'mean_absolute_error': 0.24823951},
    7: {'stats': ['mean', 'std', 'min', 'max', 'sum', 'median', 'ratio_min_max'], 
        'mean_absolute_error': 0.26102419},
    8: {'stats': ['mean', 'std', 'min', 'max', 'sum', 'median', 'ratio_min_max', 'coef_variation'], 
        'mean_absolute_error': 0.2706073},
}

for k, v in expected_ForecasterRecursive.items():

    if expected_ForecasterRecursive[k]['stats'] is False:
        window_features = None
    else:
        window_features = RollingFeatures(
            stats = expected_ForecasterRecursive[k]['stats'],
            window_sizes = 3,
        )
    
    forecaster = ForecasterRecursive(
        regressor=LinearRegression(), lags=3, window_features=window_features
    )

    n_backtest = 12
    y_train = y[:-n_backtest]
    cv = TimeSeriesFold(
            steps                 = 4,
            initial_train_size    = len(y_train),
            window_size           = None,
            differentiation       = None,
            refit                 = True,
            fixed_train_size      = False,
            gap                   = 0,
            skip_folds            = None,
            allow_incomplete_fold = True,
            return_all_indexes    = False,
        )
    metric, _ = _backtesting_forecaster(
                    forecaster = forecaster,
                    y          = y,
                    exog       = exog,
                    cv         = cv,
                    metric     = 'mean_absolute_error',
                    verbose    = False,
                    show_progress= False,
                    n_jobs='auto',
                )
    
    assert np.isclose(metric, expected_ForecasterRecursive[k]['mean_absolute_error'], atol=1e-8)

In [35]:
expected_ForecasterDirect = {
    0: {'stats': False, 
        'mean_absolute_error': 0.18601159},
    1: {'stats': ['mean'], 
        'mean_absolute_error': 0.18934429},
    2: {'stats': ['mean', 'std'], 
        'mean_absolute_error': 0.20115662},
    3: {'stats': ['mean', 'std', 'min'], 
        'mean_absolute_error': 0.23965493},
    4: {'stats': ['mean', 'std', 'min', 'max'], 
        'mean_absolute_error': 0.25311027},
    5: {'stats': ['mean', 'std', 'min', 'max', 'sum'], 
        'mean_absolute_error': 0.25312519},
    6: {'stats': ['mean', 'std', 'min', 'max', 'sum', 'median'], 
        'mean_absolute_error': 0.25508699},
    7: {'stats': ['mean', 'std', 'min', 'max', 'sum', 'median', 'ratio_min_max'], 
        'mean_absolute_error': 0.26398719},
    8: {'stats': ['mean', 'std', 'min', 'max', 'sum', 'median', 'ratio_min_max', 'coef_variation'], 
        'mean_absolute_error': 0.26606391},
}

for k, v in expected_ForecasterDirect.items():

    if expected_ForecasterDirect[k]['stats'] is False:
        window_features = None
    else:
        window_features = RollingFeatures(
            stats = expected_ForecasterDirect[k]['stats'],
            window_sizes = 3,
        )
    
    forecaster = ForecasterDirect(
        regressor=LinearRegression(), steps=4, lags=3, window_features=window_features
    )

    n_backtest = 12
    y_train = y[:-n_backtest]
    cv = TimeSeriesFold(
            steps                 = 4,
            initial_train_size    = len(y_train),
            window_size           = None,
            differentiation       = None,
            refit                 = True,
            fixed_train_size      = False,
            gap                   = 0,
            skip_folds            = None,
            allow_incomplete_fold = True,
            return_all_indexes    = False,
        )
    metric, _ = _backtesting_forecaster(
                    forecaster = forecaster,
                    y          = y,
                    exog       = exog,
                    cv         = cv,
                    metric     = 'mean_absolute_error',
                    verbose    = False,
                    show_progress= False,
                    n_jobs=1,
                )
    
    assert np.isclose(metric, expected_ForecasterDirect[k]['mean_absolute_error'], atol=1e-8)