In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

import numpy as np
import pandas as pd
import skforecast

print(skforecast.__version__)

c:\Users\jaesc2\GitHub\skforecast
0.20.0


In [2]:
import numpy as np
import pandas as pd
import skforecast
from sklearn.linear_model import LinearRegression
from skforecast.recursive import ForecasterRecursive, ForecasterRecursiveMultiSeries
from skforecast.preprocessing import RollingFeatures
import time
import warnings
from skforecast.exceptions import DataTransformationWarning

warnings.simplefilter('ignore', category=DataTransformationWarning)  

In [3]:
# create a dictionary with 1000 time series of 1000 observations each
series = {f'series_{i}': pd.Series(np.random.randn(1000), 
                                 index=pd.date_range(start='2000-01-01', periods=1000, freq='D')) 
        for i in range(1000)}

y = series['series_0']

In [3]:
forecaster = ForecasterRecursive(
                    estimator = LinearRegression(),
                    lags      = 48,
)
forecaster.fit(y=y, store_in_sample_residuals=True)

start = time.perf_counter()

forecaster.predict_bootstrapping(steps=48, n_boot=500)

end = time.perf_counter()
print(f'Time taken: {end - start} seconds')

Time taken: 2.7000052479997976 seconds


In [4]:
window_features = RollingFeatures(
                    window_sizes = 48,
                    stats   = ['mean', 'std', 'min', 'max']
)
forecaster = ForecasterRecursive(
                    estimator = LinearRegression(),
                    lags      = 48,
                    window_features = window_features
)
forecaster.fit(y=y, store_in_sample_residuals=True)

start = time.perf_counter()

forecaster.predict_bootstrapping(steps=48, n_boot=500)

end = time.perf_counter()
print(f'Time taken: {end - start} seconds')

Time taken: 4.429095293999126 seconds


In [9]:
window_features = RollingFeatures(
                    window_sizes = 48,
                    stats   = ['mean', 'std', 'min', 'max']
)

forecaster = ForecasterRecursiveMultiSeries(
                    estimator = LinearRegression(),
                    lags      = 48,
                    window_features = window_features
)
forecaster.fit(series=series, store_in_sample_residuals=True)

steps = 24
n_boot = 100
use_in_sample_residuals = True
use_binned_residuals = False

(
    last_window,
    exog_values_dict,
    levels,
    prediction_index
) = forecaster._create_predict_inputs(
        steps                   = steps,
        levels                  = None,
        last_window             = None,
        exog                    = None,
        predict_probabilistic   = True,
        use_in_sample_residuals = use_in_sample_residuals,
        use_binned_residuals    = use_binned_residuals
    )

if use_in_sample_residuals:
    residuals = forecaster.in_sample_residuals_
    residuals_by_bin = forecaster.in_sample_residuals_by_bin_
else:
    residuals = forecaster.out_sample_residuals_
    residuals_by_bin = forecaster.out_sample_residuals_by_bin_

n_levels = len(levels)
rng = np.random.default_rng(seed=123)

if use_binned_residuals:
    # Pre-allocate 4D array directly: (n_bins, steps, n_boot, n_levels)
    # Loop order must match original to preserve RNG sequence for reproducibility
    n_bins = forecaster.binner_kwargs['n_bins']
    sampled_residuals = np.empty(
        (n_bins, steps, n_boot, n_levels), order='C', dtype=float
    )
    for bin_idx in range(n_bins):
        for i, level in enumerate(levels):
            sampled_residuals[bin_idx, :, :, i] = rng.choice(
                a       = residuals_by_bin.get(level, residuals_by_bin['_unknown_level'])[bin_idx],
                size    = (steps, n_boot),
                replace = True
            )
else:
    sampled_residuals = np.full(
        shape      = (steps, n_levels, n_boot),
        fill_value = np.nan,
        order      = 'C',
        dtype      = float
    )
    for i, level in enumerate(levels):
        sampled_residuals[:, i, :] = rng.choice(
            a       = residuals.get(level, residuals['_unknown_level']),
            size    = (steps, n_boot),
            replace = True
        )

boot_columns = [f"pred_boot_{i}" for i in range(n_boot)]

In [10]:
%%timeit

with warnings.catch_warnings():
    warnings.filterwarnings(
        "ignore", 
        message="X does not have valid feature names", 
        category=UserWarning
    )
    forecaster._recursive_predict_bootstrapping(
                    steps                = steps,
                    levels               = levels,
                    last_window          = last_window,
                    n_boot               = n_boot,
                    sampled_residuals    = sampled_residuals,
                    use_binned_residuals = use_binned_residuals,
                    exog_values_dict     = exog_values_dict,
                )

3.63 s ± 44.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:


    def _recursive_predict_bootstrapping_new(
        self,
        steps: int,
        levels: list,
        last_window: pd.DataFrame,
        n_boot: int,
        sampled_residuals: np.ndarray,
        use_binned_residuals: bool,
        exog_values_dict: dict[str, np.ndarray] | None = None,
    ) -> np.ndarray:
        """
        Vectorized bootstrap prediction with optimized memory layout.
        
        Uses layout `(window_size+steps, n_boot, n_levels)` for `last_window_boot`
        which provides ~4x speedup compared to the original layout.
        """

        original_device = set_cpu_gpu_device(estimator=self.estimator, device='cpu')

        n_levels = len(levels)
        n_lags = len(self.lags) if self.lags is not None else 0
        n_window_features = (
            len(self.X_train_window_features_names_out_)
            if self.window_features is not None
            else 0
        )
        n_autoreg = n_lags + n_window_features
        n_exog = len(self.X_train_exog_names_out_) if exog_values_dict is not None else 0

        n_samples = n_levels * n_boot

        if self.encoding is not None:
            if self.encoding == "onehot":
                levels_encoded = np.zeros(
                    (n_levels, len(self.X_train_series_names_in_)), dtype=float
                )
                for i, level in enumerate(levels):
                    if level in self.X_train_series_names_in_:
                        levels_encoded[i, self.X_train_series_names_in_.index(level)] = 1.
            else:
                levels_encoded = np.array(
                    [self.encoding_mapping_.get(level, np.nan) for level in levels],
                    dtype="float64"
                ).reshape(-1, 1)
            levels_encoded_shape = levels_encoded.shape[1]
            levels_encoded = np.tile(levels_encoded, (n_boot, 1))
        else:
            levels_encoded_shape = 0

        features_shape = n_autoreg + levels_encoded_shape + n_exog
        features = np.full(
            shape=(n_samples, features_shape), fill_value=np.nan, order='C', dtype=float
        )
        if self.encoding is not None:
            features[:, n_autoreg: n_autoreg + levels_encoded_shape] = levels_encoded

        boot_predictions = np.full(
            shape=(steps, n_levels, n_boot), fill_value=np.nan, order='C', dtype=float
        )

        # NEW LAYOUT: (window_size + steps, n_boot, n_levels)
        last_window = last_window.to_numpy()
        window_size = last_window.shape[0]
        last_window_boot = np.empty(
            (window_size + steps, n_boot, n_levels), dtype=float, order='C'
        )
        last_window_boot[:window_size, :, :] = last_window[:, np.newaxis, :]
        last_window_boot[window_size:, :, :] = np.nan

        for step in range(steps):

            if self.lags is not None:
                lags_indices = window_size + step - self.lags
                # lagged_values shape: (n_lags, n_boot, n_levels)
                lagged_values = last_window_boot[lags_indices, :, :]
                # NEW: transpose(1, 2, 0) instead of (2, 1, 0)
                features[:, :n_lags] = lagged_values.transpose(1, 2, 0).reshape(n_samples, n_lags)

            if self.window_features is not None:
                wf_col_offset = n_lags
                for wf in self.window_features:
                    wf_in = last_window_boot[:window_size + step, :, :]
                    # NEW: No transpose needed - direct reshape
                    wf_in = wf_in.reshape(window_size + step, n_samples)
                    wf_out = wf.transform(wf_in)
                    n_wf_cols = wf_out.shape[1]
                    features[:, wf_col_offset:wf_col_offset + n_wf_cols] = wf_out
                    wf_col_offset += n_wf_cols

            if exog_values_dict is not None:
                features[:, -n_exog:] = np.tile(exog_values_dict[step + 1], (n_boot, 1))

            pred = self.estimator.predict(features)
            pred = pred.reshape(n_boot, n_levels).T
            
            if not features.flags.writeable:
                features.flags.writeable = True

            if use_binned_residuals:
                boot_indices = np.arange(n_boot)
                for j, level in enumerate(levels):
                    binner = self.binner.get(level, self.binner['_unknown_level'])
                    predicted_bins = binner.transform(pred[j, :]).astype(int)
                    pred[j, :] += sampled_residuals[predicted_bins, step, boot_indices, j]
            else:
                pred += sampled_residuals[step, :, :]

            boot_predictions[step, :, :] = pred

            # NEW: Need transpose for (n_boot, n_levels) layout
            last_window_boot[window_size + step, :, :] = pred.T

        set_cpu_gpu_device(estimator=self.estimator, device=original_device)

        return boot_predictions

In [None]:


        original_device = set_cpu_gpu_device(estimator=self.estimator, device='cpu')

        n_levels = len(levels)
        n_lags = len(self.lags) if self.lags is not None else 0
        n_window_features = (
            len(self.X_train_window_features_names_out_)
            if self.window_features is not None
            else 0
        )
        n_autoreg = n_lags + n_window_features
        n_exog = len(self.X_train_exog_names_out_) if exog_values_dict is not None else 0

        # Total samples per step: n_boot × n_levels
        # Row ordering: [level0_boot0, level1_boot0, ..., levelN_boot0, level0_boot1, ...]
        n_samples = n_levels * n_boot

        # Build level encoding (repeated for all bootstrap samples)
        if self.encoding is not None:
            if self.encoding == "onehot":
                levels_encoded = np.zeros(
                    (n_levels, len(self.X_train_series_names_in_)), dtype=float
                )
                for i, level in enumerate(levels):
                    if level in self.X_train_series_names_in_:
                        levels_encoded[i, self.X_train_series_names_in_.index(level)] = 1.
            else:
                levels_encoded = np.array(
                    [self.encoding_mapping_.get(level, np.nan) for level in levels],
                    dtype="float64"
                ).reshape(-1, 1)
            levels_encoded_shape = levels_encoded.shape[1]
            # Tile to (n_boot × n_levels, encoded_shape): pattern repeats n_boot times
            levels_encoded = np.tile(levels_encoded, (n_boot, 1))
        else:
            levels_encoded_shape = 0

        features_shape = n_autoreg + levels_encoded_shape + n_exog
        features = np.full(
            shape=(n_samples, features_shape), fill_value=np.nan, order='C', dtype=float
        )
        if self.encoding is not None:
            features[:, n_autoreg: n_autoreg + levels_encoded_shape] = levels_encoded

        boot_predictions = np.full(
            shape=(steps, n_levels, n_boot), fill_value=np.nan, order='C', dtype=float
        )

        # Expand last_window to 3D: (window_size + steps, n_levels, n_boot)
        # All bootstrap samples start with identical last_window values
        last_window = last_window.to_numpy()
        window_size = last_window.shape[0]
        last_window_boot = np.empty(
            (window_size + steps, n_levels, n_boot), dtype=float, order='C'
        )
        last_window_boot[:window_size, :, :] = last_window[:, :, np.newaxis]
        last_window_boot[window_size:, :, :] = np.nan

        for step in range(steps):

            if self.lags is not None:
                lags_indices = window_size + step - self.lags
                # lagged_values shape: (n_lags, n_levels, n_boot)
                lagged_values = last_window_boot[lags_indices, :, :]
                # Reshape to (n_boot x n_levels, n_lags) with correct row ordering
                features[:, :n_lags] = lagged_values.transpose(2, 1, 0).reshape(n_samples, n_lags)

            if self.window_features is not None:
                wf_col_offset = n_lags
                for wf in self.window_features:
                    wf_in = last_window_boot[:window_size + step, :, :]
                    # Reshape to (window_length, n_samples) with correct column ordering
                    wf_in = wf_in.transpose(0, 2, 1).reshape(window_size + step, n_samples)
                    wf_out = wf.transform(wf_in)
                    n_wf_cols = wf_out.shape[1]
                    features[:, wf_col_offset:wf_col_offset + n_wf_cols] = wf_out
                    wf_col_offset += n_wf_cols

            if exog_values_dict is not None:
                # Reshape (n_levels, n_exog) to (n_boot × n_levels, n_exog)
                features[:, -n_exog:] = np.tile(exog_values_dict[step + 1], (n_boot, 1))

            pred = self.estimator.predict(features)
            # Reshape from (n_boot × n_levels,) to (n_levels, n_boot)
            pred = pred.reshape(n_boot, n_levels).T
            
            # NOTE: CatBoost makes the input array read-only.
            if not features.flags.writeable:
                features.flags.writeable = True

            if use_binned_residuals:
                boot_indices = np.arange(n_boot)
                # Vectorized residual lookup for all levels and boots
                # sampled_residuals shape: (n_bins, steps, n_boot, n_levels)
                for j, level in enumerate(levels):
                    binner = self.binner.get(level, self.binner['_unknown_level'])
                    # Transform all predictions for this level at once (n_boot predictions)
                    predicted_bins = binner.transform(pred[j, :]).astype(int)
                    # Vectorized lookup: sampled_residuals[predicted_bins, step, boot_indices, j]
                    pred[j, :] += sampled_residuals[predicted_bins, step, boot_indices, j]
            else:
                # sampled_residuals shape: (steps, n_levels, n_boot)
                pred += sampled_residuals[step, :, :]

            boot_predictions[step, :, :] = pred

            # Update last_window_boot with new predictions for next step
            last_window_boot[window_size + step, :, :] = pred

        set_cpu_gpu_device(estimator=self.estimator, device=original_device)

        return boot_predictions

In [11]:
%%timeit

with warnings.catch_warnings():
    warnings.filterwarnings(
        "ignore", 
        message="X does not have valid feature names", 
        category=UserWarning
    )

    forecaster._recursive_predict_bootstrapping_new(
                    steps                = steps,
                    levels               = levels,
                    last_window          = last_window,
                    n_boot               = n_boot,
                    sampled_residuals    = sampled_residuals,
                    use_binned_residuals = use_binned_residuals,
                    exog_values_dict     = exog_values_dict,
                )

2.84 s ± 98.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [6]:
window_features = RollingFeatures(
                    window_sizes = 48,
                    stats   = ['mean', 'std', 'min', 'max']
)
forecaster = ForecasterRecursiveMultiSeries(
                    estimator = LinearRegression(),
                    lags      = 48,
                    window_features = window_features
)
forecaster.fit(series=series, store_in_sample_residuals=True)

start = time.perf_counter()

forecaster.predict_bootstrapping(steps=10, n_boot=100)

end = time.perf_counter()
print(f'Time taken: {end - start} seconds')

Time taken: 28.04260306800097 seconds
