In [52]:
import time
import platform
import psutil
import threadpoolctl
import sklearn
import numpy as np
import pandas as pd
import lightgbm
import skforecast
from packaging.version import parse as parse_version

from lightgbm import LGBMRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from skforecast.utils import check_predict_input
from skforecast.recursive import ForecasterRecursive
from skforecast.model_selection import TimeSeriesFold, backtesting_forecaster, backtesting_forecaster_multiseries

%load_ext pyinstrument
%load_ext line_profiler

The pyinstrument extension is already loaded. To reload it, use:
  %reload_ext pyinstrument
The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler


In [53]:
# !pip install pyinstrument
# !pip install line_profiler
# !pip install lightgbm==4.6.0
# !pip install -U scikit-learn==1.6.1 lightgbm==4.6.0 pandas==2.2.3 numpy==2.1.3

# conda create -n skforecast_benchmark_3_12_9 python=3.12.9

In [54]:
print(f"Python version: {platform.python_version()}")
print(f"skforecast version: {skforecast.__version__}")
print(f"scikit-learn version: {sklearn.__version__}")
print(f"lightgbm version: {lightgbm.__version__}")
print(f"pandas version: {pd.__version__}")
print(f"numpy version: {np.__version__}")
print(f"Computer network name: {platform.node()}")
print(f"Processor type: {platform.processor()}")
print(f"Platform type: {platform.platform()}")
print(f"Operating system: {platform.system()}")
print(f"Operating system release: {platform.release()}")
print(f"Operating system version: {platform.version()}")
print(f"Number of physical cores: {psutil.cpu_count(logical=False)}")
print(f"Number of logical cores: {psutil.cpu_count(logical=True)}")
print("Threadpools:", threadpoolctl.threadpool_info())
# Info BLAS detallada (NumPy >=1.26)
try:
    import numpy.__config__ as c
    print(c.get_info('blas_opt_info'))
except Exception as e:
    print("No BLAS info:", e)

Python version: 3.12.11
skforecast version: 0.16.0
scikit-learn version: 1.7.1
lightgbm version: 4.6.0
pandas version: 2.3.1
numpy version: 2.2.6
Computer network name: joaquin-HP-ProBook-440-G6
Processor type: x86_64
Platform type: Linux-6.14.0-27-generic-x86_64-with-glibc2.39
Operating system: Linux
Operating system release: 6.14.0-27-generic
Operating system version: #27~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Tue Jul 22 17:38:49 UTC 2
Number of physical cores: 4
Number of logical cores: 8
Threadpools: [{'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 8, 'prefix': 'libscipy_openblas', 'filepath': '/home/joaquin/miniconda3/envs/skforecast_benchmark/lib/python3.12/site-packages/numpy.libs/libscipy_openblas64_-56d6093b.so', 'version': '0.3.29', 'threading_layer': 'pthreads', 'architecture': 'Haswell'}, {'user_api': 'blas', 'internal_api': 'openblas', 'num_threads': 8, 'prefix': 'libscipy_openblas', 'filepath': '/home/joaquin/miniconda3/envs/skforecast_benchmark/lib/python3.12

In [55]:
# Mock data for benchmarking
# ==========================================================
len_series = 2000
rng = np.random.default_rng(321)
y = pd.Series(
        data = rng.normal(loc=20, scale=5, size=len_series),
        index=pd.date_range(
            start='2010-01-01',
            periods=len_series,
            freq='h'
        ),
        name='y'
    )
rng = np.random.default_rng(321)
exog = pd.DataFrame(index=y.index)
exog['day_of_week'] = exog.index.dayofweek
exog['week_of_year'] = exog.index.isocalendar().week.astype(int)
exog['month'] = exog.index.month
exog_prediction = pd.DataFrame(
                    index=pd.date_range(
                        start=exog.index.max() + pd.Timedelta(hours=1),
                        periods=100,
                        freq='h'
                    )
                 ) 
exog_prediction['day_of_week'] = exog_prediction.index.dayofweek
exog_prediction['week_of_year'] = exog_prediction.index.isocalendar().week.astype(int)
exog_prediction['month'] = exog_prediction.index.month

In [56]:
import hashlib

def hash_dataframe(df):
    row_hashes = pd.util.hash_pandas_object(df, index=True).values
    combined_hash = hashlib.sha256(row_hashes.tobytes()).hexdigest()
    return combined_hash

hash_y = hash_dataframe(y)
hash_exog = hash_dataframe(exog)
hash_exog_prediction = hash_dataframe(exog_prediction)
print(f"Hash of y: {hash_y}")
print(f"Hash of exog: {hash_exog}")
print(f"Hash of exog_prediction: {hash_exog_prediction}")
display(y.dtypes)
display(exog.dtypes)
display(exog_prediction.dtypes)

Hash of y: f1ba70f786d503ec78f1cd228723bf3d073a5dfe3345f2faa26800d02cabe439
Hash of exog: 10fb78eb4190dd80df8de7bff30fa2fc8e974c5da5064ef2eaf6b69a5c17cf3d
Hash of exog_prediction: b533b86d8d547d466cdb52ceba8ae4ce5261765906d976d76078c6f2cfa0c5e4


dtype('float64')

day_of_week     int32
week_of_year    int64
month           int32
dtype: object

day_of_week     int32
week_of_year    int64
month           int32
dtype: object

In [57]:
regressor = LGBMRegressor(random_state=8520, verbose=-1)
regressor = LinearRegression()
forecaster = ForecasterRecursive(
    regressor=regressor,
    lags=50,
    transformer_y=None,
    transformer_exog=None,
)
forecaster.fit(y=y, exog=exog)

## skforecast 0.16.0

In [7]:
%%timeit -n 200 -r 100

check_predict_input(
    forecaster_name  = type(forecaster).__name__,
    steps            = 100,
    is_fitted        = forecaster.is_fitted,
    exog_in_         = forecaster.exog_in_,
    index_type_      = forecaster.index_type_,
    index_freq_      = forecaster.index_freq_,
    window_size      = forecaster.window_size,
    last_window      = forecaster.last_window_,
    exog             = exog_prediction,
    exog_names_in_   = forecaster.exog_names_in_,
    interval         = None
)

265 μs ± 78.5 μs per loop (mean ± std. dev. of 100 runs, 200 loops each)


In [8]:
# skforecast 0.16.0
# ==============================================================================
def funt_to_profile(forecaster, steps, exog):
    check_predict_input(
        forecaster_name  = type(forecaster).__name__,
        steps            = steps,
        is_fitted        = forecaster.is_fitted,
        exog_in_         = forecaster.exog_in_,
        index_type_      = forecaster.index_type_,
        index_freq_      = forecaster.index_freq_,
        window_size      = forecaster.window_size,
        last_window      = forecaster.last_window_,
        exog             = exog,
        exog_names_in_   = forecaster.exog_names_in_,
        interval         = None
    )


%lprun -f check_predict_input funt_to_profile(forecaster, 100, exog_prediction)

Timer unit: 1e-09 s

Total time: 0.00206185 s
File: /home/joaquin/miniconda3/envs/skforecast_benchmark/lib/python3.12/site-packages/skforecast/utils/utils.py
Function: check_predict_input at line 773

Line #      Hits         Time  Per Hit   % Time  Line Contents
   773                                           def check_predict_input(
   774                                               forecaster_name: str,
   775                                               steps: int | list[int],
   776                                               is_fitted: bool,
   777                                               exog_in_: bool,
   778                                               index_type_: type,
   779                                               index_freq_: str,
   780                                               window_size: int,
   781                                               last_window: pd.Series | pd.DataFrame | None,
   782                                               last_

In [9]:
%%timeit -n 100 -r 150

forecaster._create_predict_inputs(
    steps=100, exog=exog_prediction
)

2.26 ms ± 611 μs per loop (mean ± std. dev. of 150 runs, 100 loops each)


In [10]:
# skforecast 0.16.0
# ==============================================================================
def funt_to_profile(steps, exog):
    forecaster._create_predict_inputs(
        steps=steps, exog=exog
    )


%lprun -f forecaster._create_predict_inputs funt_to_profile(100, exog_prediction)

Timer unit: 1e-09 s

Total time: 0.015776 s
File: /home/joaquin/miniconda3/envs/skforecast_benchmark/lib/python3.12/site-packages/skforecast/recursive/_forecaster_recursive.py
Function: ForecasterRecursive._create_predict_inputs at line 1098

Line #      Hits         Time  Per Hit   % Time  Line Contents
  1098                                               def _create_predict_inputs(
  1099                                                   self,
  1100                                                   steps: int | str | pd.Timestamp, 
  1101                                                   last_window: pd.Series | pd.DataFrame | None = None,
  1102                                                   exog: pd.Series | pd.DataFrame | None = None,
  1103                                                   predict_probabilistic: bool = False,
  1104                                                   use_in_sample_residuals: bool = True,
  1105                                                   

In [11]:
%%timeit -n 20 -r 10

# skforecast 0.16.0
forecaster.predict(
    steps=100, exog=exog_prediction
)

43 ms ± 6.05 ms per loop (mean ± std. dev. of 10 runs, 20 loops each)


In [12]:
# skforecast 0.16.0
# ==============================================================================
def funt_to_profile(steps, exog):
    forecaster.predict(
        steps=steps, exog=exog
    )


%lprun -f forecaster.predict funt_to_profile(100, exog_prediction)

Timer unit: 1e-09 s

Total time: 0.140169 s
File: /home/joaquin/miniconda3/envs/skforecast_benchmark/lib/python3.12/site-packages/skforecast/recursive/_forecaster_recursive.py
Function: ForecasterRecursive.predict at line 1429

Line #      Hits         Time  Per Hit   % Time  Line Contents
  1429                                               def predict(
  1430                                                   self,
  1431                                                   steps: int | str | pd.Timestamp,
  1432                                                   last_window: pd.Series | pd.DataFrame | None = None,
  1433                                                   exog: pd.Series | pd.DataFrame | None = None,
  1434                                                   check_inputs: bool = True
  1435                                               ) -> pd.Series:
  1436                                                   """
  1437                                                   Predict

In [16]:
import warnings
warnings.filterwarnings(
    "ignore", 
    message=".*X does not have valid feature names.*", 
    category=UserWarning
)

In [17]:
last_window_values=forecaster.last_window_.to_numpy().flatten()
exog_values=exog.to_numpy()

In [18]:
%%timeit -n 20 -r 20

# skforecast 0.16.0
forecaster._recursive_predict(
    steps=100,
    last_window_values=last_window_values,
    exog_values=exog_values,
    residuals=None,
    use_binned_residuals=True
)

11.2 ms ± 2.42 ms per loop (mean ± std. dev. of 20 runs, 20 loops each)


In [19]:
# skforecast 0.16.0
# ==============================================================================

import warnings
warnings.filterwarnings(
    "ignore", 
    message=".*X does not have valid feature names.*", 
    category=UserWarning
)


def funt_to_profile(steps, exog):
    forecaster._recursive_predict(
        steps=steps,
        last_window_values=forecaster.last_window_.to_numpy().flatten(),
        exog_values=exog.to_numpy(),
        residuals = None,
        use_binned_residuals=True
    )

%lprun -f forecaster._recursive_predict funt_to_profile(100, exog_prediction)

Timer unit: 1e-09 s

Total time: 0.182641 s
File: /home/joaquin/miniconda3/envs/skforecast_benchmark/lib/python3.12/site-packages/skforecast/recursive/_forecaster_recursive.py
Function: ForecasterRecursive._recursive_predict at line 1234

Line #      Hits         Time  Per Hit   % Time  Line Contents
  1234                                               def _recursive_predict(
  1235                                                   self,
  1236                                                   steps: int,
  1237                                                   last_window_values: np.ndarray,
  1238                                                   exog_values: np.ndarray | None = None,
  1239                                                   residuals: np.ndarray | dict[str, np.ndarray] | None = None,
  1240                                                   use_binned_residuals: bool = True,
  1241                                               ) -> np.ndarray:
  1242                  

In [20]:
import numpy as np
rng = np.random.default_rng(321)
X_test = rng.random((1, len(forecaster.lags) + exog.shape[1])).astype(np.float64)
print(X_test.flags)

  C_CONTIGUOUS : True
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False



In [23]:
%%timeit
forecaster.regressor.predict(X_test)

251 μs ± 10.2 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [22]:
import numpy as np, time
n, d = 200000, 50
X = np.random.randn(n, d).astype(np.float64)
w = np.random.randn(d)
t0 = time.perf_counter(); y = X @ w; dt = time.perf_counter()-t0
print("dot:", dt, "s")

dot: 0.015585570999974152 s
