In [None]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

import numpy as np
import pandas as pd

In [None]:
from skforecast.recursive import ForecasterRecursive, ForecasterRecursiveMultiSeries
from skforecast.model_selection import TimeSeriesFold, backtesting_forecaster_multiseries
from lightgbm import LGBMRegressor
%load_ext pyinstrument
%load_ext line_profiler

In [None]:
n = 1_000
n_series = 500
series = pd.DataFrame(
    np.random.normal(0, 1, n * n_series).reshape(n, n_series),
    index=pd.date_range('2020-01-01', periods=n, freq='h'),
    columns=[f'series_{i}' for i in range(n_series)]
)

In [None]:
forecaster = ForecasterRecursiveMultiSeries(
    regressor = LGBMRegressor(verbose=-1),
    lags     = 48
)

forecaster.fit(series=series, store_in_sample_residuals=False)

In [None]:
cv = TimeSeriesFold(
     initial_train_size=int(len(series) * 0.8),
     steps = 24,
     refit = False
)

In [None]:
%%pyinstrument
_ = backtesting_forecaster_multiseries(
    forecaster    = forecaster,
    series        = series,
    cv            = cv,
    metric        = "mean_absolute_error",
    show_progress = False,
    add_aggregated_metric = True
)

In [None]:
# Profiling backtesting_forecaster_multiseries()
# ==============================================================================
from skforecast.model_selection._validation import _backtesting_forecaster_multiseries
def funt_to_profile(forecaster, series, cv, metric, show_progress, add_aggregated_metric):
    _backtesting_forecaster_multiseries(
        forecaster    = forecaster,
        series        = series,
        cv            = cv,
        metric        = metric,
        show_progress = show_progress,
        add_aggregated_metric = False
    )

%lprun -f _backtesting_forecaster_multiseries funt_to_profile(forecaster, series, cv, "mean_absolute_error", False, False)

In [None]:
# returns = {
#         'series'                :series,
#         'predictions'           :backtest_predictions[['level', 'pred']],
#         'folds'                 :folds,
#         'span_index'            :span_index,
#         'window_size'           :forecaster.window_size,
#         'metrics'               :metrics,
#         'levels'                :levels,
#         'add_aggregated_metric' :add_aggregated_metric
#     }

#     return returns


from skforecast.model_selection._validation import _backtesting_forecaster_multiseries
inputs_calculate_metrics = _backtesting_forecaster_multiseries(
    forecaster    = forecaster,
    series        = series,
    cv            = cv,
    metric        = ["mean_absolute_error"],
    show_progress = False,
    add_aggregated_metric = True
)

In [None]:
# Profiling _create_train_X_y()
# ==============================================================================
from skforecast.model_selection._utils import _calculate_metrics_backtesting_multiseries
def funt_to_profile(inputs_calculate_metrics):
    _calculate_metrics_backtesting_multiseries(**inputs_calculate_metrics)

%lprun -f _calculate_metrics_backtesting_multiseries funt_to_profile(inputs_calculate_metrics)

In [None]:
%%pyinstrument
_ = _calculate_metrics_backtesting_multiseries(**inputs_calculate_metrics)

In [None]:
%%pyinstrument
_ = backtesting_forecaster_multiseries(
    forecaster           = forecaster,
    series               = series,
    cv                   = cv,
    metric               = "mean_absolute_error",
    interval             = [10, 90],
    n_boot               = 10,
    use_binned_residuals = True,
    show_progress        = False
)

In [None]:
# Create sample data
dates = pd.date_range(start='2020-01-01', periods=10, freq='D')
series = pd.DataFrame({
    'level_1': np.random.rand(10),
    'level_2': np.random.rand(10)
}, index=dates)

backtest_predictions = series.copy()
backtest_predictions = backtest_predictions.melt(ignore_index=False, value_name='pred', var_name='level')
backtest_predictions = (
        backtest_predictions
        .rename_axis('idx', axis=0)
        .set_index('level', append=True)
    )
series.loc['2020-01-05':'2020-01-09', 'level_1'] = np.nan
display(series)
display(backtest_predictions)

In [None]:
backtest_levels = ['level_1', 'level_2']
backtest_predictions_grouped = backtest_predictions.groupby('level', sort=False)
for level, indices in backtest_predictions_grouped.groups.items():
    if level in backtest_levels:
        valid_index = series[level].dropna().index
        print(valid_index)
        valid_index = pd.MultiIndex.from_product([valid_index, [level]], names=['idx', 'level'])
        print(valid_index)
        no_valid_index = indices.difference(valid_index, sort=False)
        print(no_valid_index)
        backtest_predictions.loc[no_valid_index, 'pred'] = np.nan

backtest_predictions = (
        backtest_predictions
        .reset_index('level')
        .rename_axis(None, axis=0)
    )

backtest_predictions

In [None]:
# example of using sklearn metric mean_absolute_error
from sklearn.metrics import mean_absolute_error
import time

y_true = pd.Series(np.random.normal(0, 1, 10_0000))
y_pred = pd.Series(np.random.normal(0, 1, 10_0000))

start = time.time()
for i in range(500):
    mean_absolute_error(y_true.to_numpy(), y_pred.to_numpy())
print(time.time() - start)

In [None]:
from skforecast.metrics import add_y_train_argument

custom_metric = add_y_train_argument(mean_absolute_error)

start = time.time()
for i in range(500):
    mean_absolute_error(y_true, y_pred)
print(time.time() - start)

In [None]:
data = {
    'Category': ['A', 'B', 'A', 'B', 'A', 'C', 'C'],
    'Value': [10, 20, 30, 40, 50, 60, 70]
}

df = pd.DataFrame(data)

# Group by 'Category'
grouped = df.groupby('Category', as_index=False)['Value']
grouped.get_group('A')