In [59]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

import numpy as np
import pandas as pd
import skforecast

print(skforecast.__version__)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
/home/joaquin/Documents/GitHub/skforecast
0.19.1


In [60]:
from sklearn.datasets import make_regression
from sklearn.linear_model import Ridge
from lightgbm import LGBMRegressor
from xgboost import XGBRegressor
import time

In [61]:
# Data
X, y = make_regression(n_samples=1000, n_features=20, noise=0.1, random_state=42)
X_predict = X[:1]
X_predict

array([[ 0.22584183,  1.55137772, -0.10734682,  0.85969505, -0.94296337,
        -1.09662504, -1.19716659, -1.73376709, -0.95004188,  1.27405964,
         1.6729897 ,  1.47737256,  0.33512613, -0.6211348 , -1.52510564,
         1.01109701,  1.47818394, -1.35154745,  0.3646018 ,  0.13316649]])

In [62]:
results = []
n_iterations = 1000

# Train Ridge model
model_ridge = Ridge(alpha=1.0)
model_ridge.fit(X, y)

# Benchmark sklearn predict
start = time.perf_counter()
for _ in range(n_iterations):
    pred = model_ridge.predict(X_predict)
end = time.perf_counter()
sklearn_time = end - start

# Benchmark numpy dot product
start = time.perf_counter()
for _ in range(n_iterations):
    pred = np.dot(X_predict, model_ridge.coef_) + model_ridge.intercept_
end = time.perf_counter()
numpy_time = end - start

speedup = sklearn_time / numpy_time
results.append({
    'batch_size': 1,
    'sklearn_time': sklearn_time,
    'numpy_time': numpy_time,
    'speedup': speedup
})

results_df = pd.DataFrame(results)
print(f"\nPerformance Comparison ({n_iterations} iterations):")
print("="*60)
print(results_df.to_string(index=False))


Performance Comparison (1000 iterations):
 batch_size  sklearn_time  numpy_time   speedup
          1       0.05641    0.001965 28.703614


In [63]:
import warnings

X_predict = X[:1]
n_iterations = 1000

model_lightgbm = LGBMRegressor(n_estimators=100, random_state=42, verbose=-1)
model_lightgbm.fit(X, y)

# Standard predict
start = time.perf_counter()
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for _ in range(n_iterations):
        pred = model_lightgbm.predict(X_predict)
end = time.perf_counter()
standard_time = end - start

# Using the booster directly (lower level API)
start = time.perf_counter()
booster = model_lightgbm.booster_
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    for _ in range(n_iterations):
        pred = booster.predict(X_predict)
end = time.perf_counter()
booster_time = end - start

print(f"\nLGBMRegressor Performance ({n_iterations} iterations):")
print("="*60)
print(f"Standard predict:           {standard_time:.6f} s ({standard_time/n_iterations*1e6:.2f} µs per pred)")
print(f"Booster direct predict:     {booster_time:.6f} s ({booster_time/n_iterations*1e6:.2f} µs per pred)")
print(f"Speedup (booster vs standard):   {standard_time/booster_time:.2f}x")


LGBMRegressor Performance (1000 iterations):
Standard predict:           0.551846 s (551.85 µs per pred)
Booster direct predict:     0.060219 s (60.22 µs per pred)
Speedup (booster vs standard):   9.16x


In [64]:
X_predict = X[:1]
n_iterations = 1000

model_xgboost = XGBRegressor(n_estimators=100, random_state=42, verbosity=0)
model_xgboost.fit(X, y)

# Standard predict
start = time.perf_counter()
for _ in range(n_iterations):
    pred = model_xgboost.predict(X_predict)
end = time.perf_counter()
standard_time = end - start


# Standard predict
start = time.perf_counter()
for _ in range(n_iterations):
    pred = model_xgboost.predict(X_predict,  validate_features=False)
end = time.perf_counter()
standard_no_validation_time = end - start

# Using the booster directly (lower level API)
start = time.perf_counter()
booster = model_xgboost.get_booster()
for _ in range(n_iterations):
    pred = booster.inplace_predict(X_predict)
end = time.perf_counter()
booster_time = end - start

print(f"\nLGBMRegressor Performance ({n_iterations} iterations):")
print("="*60)
print(f"Standard predict:                 {standard_time:.6f} s ({standard_time/n_iterations*1e6:.2f} µs per pred)")
print(f"Standard predict (no validation): {standard_no_validation_time:.6f} s ({standard_no_validation_time/n_iterations*1e6:.2f} µs per pred)")
print(f"Booster direct predict:           {booster_time:.6f} s ({booster_time/n_iterations*1e6:.2f} µs per pred)")
print(f"Speedup (booster vs standard):    {standard_time/booster_time:.2f}x")


LGBMRegressor Performance (1000 iterations):
Standard predict:                 0.410700 s (410.70 µs per pred)
Standard predict (no validation): 0.384469 s (384.47 µs per pred)
Booster direct predict:           0.263982 s (263.98 µs per pred)
Speedup (booster vs standard):    1.56x
