# Profiling

In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'/home/joaquin/Documents/GitHub/skforecast'

In [1]:
# !pip install skforecast==0.15.1

In [2]:
import platform
import psutil
import skforecast
import pandas as pd
import numpy as np
import scipy
import sklearn

import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMRegressor

from skforecast.recursive import ForecasterRecursiveMultiSeries
from skforecast.model_selection import grid_search_forecaster_multiseries
from skforecast.model_selection import bayesian_search_forecaster_multiseries
from skforecast.model_selection import backtesting_forecaster_multiseries
from skforecast.utils import *

from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import make_column_transformer
from skforecast.preprocessing import series_long_to_dict
from skforecast.preprocessing import exog_long_to_dict
from skforecast.datasets import fetch_dataset

%load_ext pyinstrument
%load_ext line_profiler

# Information system and libraries

In [3]:
# Versions
# ==============================================================================
print(f"Python version: {platform.python_version()}")
print(f"scikit-learn version: {sklearn.__version__}")
print(f"skforecast version: {skforecast.__version__}")
print(f"pandas version: {pd.__version__}")
print(f"numpy version: {np.__version__}")
print(f"scipy version: {scipy.__version__}")
print(f"psutil version: {psutil.__version__}")
print("")

# Computer information
# ==============================================================================
#Computer network name
print(f"Computer network name: {platform.node()}")
#Machine type
print(f"Machine type: {platform.machine()}")
#Processor type
print(f"Processor type: {platform.processor()}")
#Platform type
print(f"Platform type: {platform.platform()}")
#Operating system
print(f"Operating system: {platform.system()}")
#Operating system release
print(f"Operating system release: {platform.release()}")
#Operating system version
print(f"Operating system version: {platform.version()}")
#Physical cores
print(f"Number of physical cores: {psutil.cpu_count(logical=False)}")
#Logical cores
print(f"Number of logical cores: {psutil.cpu_count(logical=True)}")

Python version: 3.12.9
scikit-learn version: 1.6.1
skforecast version: 0.15.1
pandas version: 2.2.3
numpy version: 2.2.5
scipy version: 1.15.2
psutil version: 5.9.0

Computer network name: joaquin-HP-ProBook-440-G6
Machine type: x86_64
Processor type: x86_64
Platform type: Linux-6.11.0-21-generic-x86_64-with-glibc2.39
Operating system: Linux
Operating system release: 6.11.0-21-generic
Operating system version: #21~24.04.1-Ubuntu SMP PREEMPT_DYNAMIC Mon Feb 24 16:52:15 UTC 2
Number of physical cores: 4
Number of logical cores: 8


# ForecasterAutoregMultiSeries

In [4]:
n_series = 600
len_series = (2000, 5000)
series_dict = {}
rng = np.random.default_rng(321)
for i in range(n_series):
    #n = rng.integers(low=len_series[0], high=len_series[1])
    n=2000
    series_dict[f'series_{i}'] = pd.Series(
        data = rng.normal(loc=20, scale=5, size=n),
        index=pd.date_range(
            start='2010-01-01',
            periods=n,
            freq='h'
        ),
        name=f'series_{i}'
    )

exog_dict = {}
rng = np.random.default_rng(321)
for k in series_dict.keys():
    exog = pd.DataFrame(
            index=series_dict[k].index
            )
    exog['day_of_week'] = exog.index.dayofweek
    exog['week_of_year'] = exog.index.isocalendar().week.astype(int)
    exog['month'] = exog.index.month
    exog_dict[k] = exog


exog_dict_prediction = {}
for k in series_dict.keys():
    exog = pd.DataFrame(
            index=pd.date_range(
                start=series_dict[k].index.max() + pd.Timedelta(hours=1),
                periods=100,
                freq='h'
            )
            )
    exog['day_of_week'] = exog.index.dayofweek
    exog['week_of_year'] = exog.index.isocalendar().week.astype(int)
    exog['month'] = exog.index.month
    exog_dict_prediction[k] = exog   


print(f"Range of dates: "
    f"{np.min([series_dict[k].index.min() for k in series_dict.keys()])} - "
    f"{np.max([series_dict[k].index.max() for k in series_dict.keys()])}"
)

print(f"Range of dates for prediction: "
      f"{np.min([exog_dict_prediction[k].index.min() for k in exog_dict_prediction.keys()])} - "
    f"{np.max([exog_dict_prediction[k].index.max() for k in exog_dict_prediction.keys()])}"
)

Range of dates: 2010-01-01 00:00:00 - 2010-03-25 07:00:00
Range of dates for prediction: 2010-03-25 08:00:00 - 2010-03-29 11:00:00


In [5]:
# Forecaster
# ==============================================================================
forecaster = ForecasterRecursiveMultiSeries(
    regressor=LGBMRegressor(random_state=8520, verbose=-1),
    lags=50,
    transformer_series=StandardScaler(),
    transformer_exog=StandardScaler(),
    encoding="ordinal"
)

In [6]:
skforecast.__version__

'0.15.1'

In [7]:
%%timeit -r 5

forecaster.fit(series=series_dict, exog=exog_dict)

11.6 s ± 491 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)


+ 0.14.0
    + 17.7 s ± 805 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)

+ 0.15.0
    + 17.1 s ± 392 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)
    + 11.6 s ± 491 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)

+ 0.16.0
    + 17.2 s ± 391 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)
    + 11.9 s ± 622 ms per loop (mean ± std. dev. of 5 runs, 1 loop each)

In [8]:
# %%pyinstrument
# forecaster.fit(series=series_dict, exog=exog_dict)

In [9]:
# Profiling fit()
# ==============================================================================
# def funt_to_profile(forecaster, series, exog):
#     forecaster.fit(series=series, exog=exog)

# %lprun -f forecaster.fit funt_to_profile(forecaster, series_dict, exog_dict)

In [10]:
# Profiling _create_train_X_y()
# ==============================================================================
# def funt_to_profile(forecaster, series, exog):
#     forecaster._create_train_X_y(series=series, exog=exog)

# %lprun -f forecaster._create_train_X_y funt_to_profile(forecaster, series_dict, exog_dict)

In [9]:
# Profiling _create_train_X_y_single_series()
# ==============================================================================
forecaster.fit(series=series_dict, exog=exog_dict)
def funt_to_profile(forecaster, series, exog):
    (
    X_train_autoreg,
    X_train_window_features_names_out_,
    X_train_exog,
    y_train
) = forecaster._create_train_X_y_single_series(
        y = series,
        exog = exog,
        ignore_exog = False,
    )

%lprun -f forecaster._create_train_X_y_single_series funt_to_profile(forecaster, series_dict['series_0'], exog_dict['series_0'])

Timer unit: 1e-09 s

Total time: 0.00517557 s
File: /home/joaquin/miniconda3/envs/skforecast_16_py12/lib/python3.12/site-packages/skforecast/recursive/_forecaster_recursive_multiseries.py
Function: _create_train_X_y_single_series at line 836

Line #      Hits         Time  Per Hit   % Time  Line Contents
   836                                               def _create_train_X_y_single_series(
   837                                                   self,
   838                                                   y: pd.Series,
   839                                                   ignore_exog: bool,
   840                                                   exog: pd.DataFrame | None = None
   841                                               ) -> tuple[pd.DataFrame, list[str], pd.DataFrame, pd.Series]:
   842                                                   """
   843                                                   Create training matrices from univariate time series and exogenous
   8

In [11]:
# Profiling predict()
# ==============================================================================
forecaster.fit(series=series_dict, exog=exog_dict)

def funt_to_profile(forecaster, steps, exog):
    forecaster.predict(steps=steps, exog=exog, suppress_warnings=True)

%lprun -f forecaster.predict funt_to_profile(forecaster, 100, exog_dict_prediction)



Timer unit: 1e-07 s

Total time: 1.09465 s
File: c:\Users\jaesc2\GitHub\skforecast\skforecast\recursive\_forecaster_recursive_multiseries.py
Function: predict at line 2595

Line #      Hits         Time  Per Hit   % Time  Line Contents
  2595                                               def predict(
  2596                                                   self,
  2597                                                   steps: int,
  2598                                                   levels: str | list[str] | None = None,
  2599                                                   last_window: pd.DataFrame | None = None,
  2600                                                   exog: pd.Series | pd.DataFrame | dict[str, pd.Series | pd.DataFrame] | None = None,
  2602                                                   check_inputs: bool = True
  2603                                               ) -> pd.DataFrame:
  2604                                                   """
  2605         

In [12]:
# Profiling predict_new
# ==============================================================================
def funt_to_profile(forecaster, steps, exog):
    forecaster.predict_new(steps=steps, exog=exog, suppress_warnings=True)

%lprun -f forecaster.predict_new funt_to_profile(forecaster, 100, exog_dict_prediction)

UsageError: Could not find module forecaster.predict_new.
AttributeError: 'ForecasterRecursiveMultiSeries' object has no attribute 'predict_new'


In [None]:
%%timeit
forecaster.predict(steps=100, exog=exog_dict_prediction, suppress_warnings=True)



1.21 s ± 41.2 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%%timeit
forecaster.predict_new(steps=100, exog=exog_dict_prediction, suppress_warnings=True)



450 ms ± 15.7 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)




In [None]:
old = forecaster.predict(steps = 100, exog=exog_dict_prediction)
new = forecaster.predict_new(steps = 100, exog=exog_dict_prediction)
pd.testing.assert_frame_equal(old, new)

In [13]:
# Profiling _create_predict_inputs()
# ==============================================================================
def funt_to_profile(forecaster, steps, exog):
    (
        last_window,
        exog_values_dict,
        levels,
        prediction_index
    ) = forecaster._create_predict_inputs(
        steps         = steps,
        #levels       = forecaster.levels,
        #last_window  = forecaster.last_window_,
        exog         = exog_dict_prediction,
        check_inputs = True
    )

%lprun -f forecaster._create_predict_inputs funt_to_profile(forecaster, 100, exog_dict_prediction)

Timer unit: 1e-07 s

Total time: 0.66522 s
File: c:\Users\jaesc2\GitHub\skforecast\skforecast\recursive\_forecaster_recursive_multiseries.py
Function: _create_predict_inputs at line 2102

Line #      Hits         Time  Per Hit   % Time  Line Contents
  2102                                               def _create_predict_inputs(
  2103                                                   self,
  2104                                                   steps: int,
  2105                                                   levels: str | list[str] | None = None,
  2106                                                   last_window: pd.DataFrame | None = None,
  2107                                                   exog: pd.Series | pd.DataFrame | dict[str, pd.Series | pd.DataFrame] | None = None,
  2108                                                   predict_probabilistic: bool = False,
  2109                                                   use_in_sample_residuals: bool = True,
  2110      

In [14]:
# Profiling _create_predict_inputs()
# ==============================================================================
def funt_to_profile(forecaster, steps, exog):
    (
        last_window,
        exog_values_dict,
        levels,
        prediction_index
    ) = forecaster._create_predict_inputs_old(
        steps         = steps,
        #levels       = forecaster.levels,
        #last_window  = forecaster.last_window_,
        exog         = exog_dict_prediction,
        check_inputs = True
    )

%lprun -f forecaster._create_predict_inputs_old funt_to_profile(forecaster, 100, exog_dict_prediction)

Timer unit: 1e-07 s

Total time: 0.803494 s
File: c:\Users\jaesc2\GitHub\skforecast\skforecast\recursive\_forecaster_recursive_multiseries.py
Function: _create_predict_inputs_old at line 1897

Line #      Hits         Time  Per Hit   % Time  Line Contents
  1897                                               def _create_predict_inputs_old(
  1898                                                   self,
  1899                                                   steps: int,
  1900                                                   levels: str | list[str] | None = None,
  1901                                                   last_window: pd.DataFrame | None = None,
  1902                                                   exog: pd.Series | pd.DataFrame | dict[str, pd.Series | pd.DataFrame] | None = None,
  1903                                                   predict_probabilistic: bool = False,
  1904                                                   use_in_sample_residuals: bool = True,
  1

In [24]:
# Profiling check_predict_input()
# ==============================================================================
def funt_to_profile(forecaster, steps, exog):

    check_predict_input(
    forecaster_name  = type(forecaster).__name__,
    steps            = steps,
    is_fitted        = forecaster.is_fitted,
    exog_in_         = forecaster.exog_in_,
    index_type_      = forecaster.index_type_,
    index_freq_      = forecaster.index_freq_,
    window_size      = forecaster.window_size,
    last_window      = pd.DataFrame(forecaster.last_window_),
    exog             = exog,
    exog_type_in_    = forecaster.exog_type_in_,
    exog_names_in_   = forecaster.exog_names_in_,
    interval         = None,
    levels           = forecaster.series_names_in_,
    series_names_in_ = forecaster.series_names_in_,
    encoding         = forecaster.encoding
)

%lprun -f check_predict_input funt_to_profile(forecaster, 100, exog_dict_prediction)


Timer unit: 1e-09 s

Total time: 0.973173 s
File: /home/joaquin/Documents/GitHub/skforecast/skforecast/utils/utils.py
Function: check_predict_input at line 762

Line #      Hits         Time  Per Hit   % Time  Line Contents
   762                                           def check_predict_input(
   763                                               forecaster_name: str,
   764                                               steps: int | list[int],
   765                                               is_fitted: bool,
   766                                               exog_in_: bool,
   767                                               index_type_: type,
   768                                               index_freq_: str,
   769                                               window_size: int,
   770                                               last_window: pd.Series | pd.DataFrame | None,
   771                                               last_window_exog: pd.Series | pd.DataFrame | 

In [25]:
# Profiling check_predict_input()
# ==============================================================================
def funt_to_profile(forecaster, steps, exog):

    check_predict_input_new(
    forecaster_name  = type(forecaster).__name__,
    steps            = steps,
    is_fitted        = forecaster.is_fitted,
    exog_in_         = forecaster.exog_in_,
    index_type_      = forecaster.index_type_,
    index_freq_      = forecaster.index_freq_,
    window_size      = forecaster.window_size,
    last_window      = pd.DataFrame(forecaster.last_window_),
    exog             = exog,
    exog_type_in_    = forecaster.exog_type_in_,
    exog_names_in_   = forecaster.exog_names_in_,
    interval         = None,
    levels           = forecaster.series_names_in_,
    series_names_in_ = forecaster.series_names_in_,
    encoding         = forecaster.encoding
)

%lprun -f check_predict_input_new funt_to_profile(forecaster, 100, exog_dict_prediction)

Timer unit: 1e-09 s

Total time: 0.531833 s
File: /home/joaquin/Documents/GitHub/skforecast/skforecast/utils/utils.py
Function: check_predict_input_new at line 1205

Line #      Hits         Time  Per Hit   % Time  Line Contents
  1205                                           def check_predict_input_new(
  1206                                               forecaster_name: str,
  1207                                               steps: int | list[int],
  1208                                               is_fitted: bool,
  1209                                               exog_in_: bool,
  1210                                               index_type_: type,
  1211                                               index_freq_: str,
  1212                                               window_size: int,
  1213                                               last_window: pd.Series | pd.DataFrame | None,
  1214                                               last_window_exog: pd.Series | pd.Dat

In [26]:
# Profiling _create_predict_inputs()
# ==============================================================================
def funt_to_profile(forecaster, steps, exog):
    (
        last_window,
        exog_values_dict,
        levels,
        prediction_index
    ) = forecaster._create_predict_inputs_new_new(
        steps         = steps,
        #levels       = forecaster.levels,
        #last_window  = forecaster.last_window_,
        exog         = exog_dict_prediction,
        check_inputs = True
    )

%lprun -f forecaster._create_predict_inputs_new_new funt_to_profile(forecaster, 100, exog_dict_prediction)

Timer unit: 1e-09 s

Total time: 1.56319 s
File: /home/joaquin/Documents/GitHub/skforecast/skforecast/recursive/_forecaster_recursive_multiseries.py
Function: _create_predict_inputs_new_new at line 2325

Line #      Hits         Time  Per Hit   % Time  Line Contents
  2325                                               def _create_predict_inputs_new_new(
  2326                                                   self,
  2327                                                   steps: int,
  2328                                                   levels: str | list[str] | None = None,
  2329                                                   last_window: pd.DataFrame | None = None,
  2330                                                   exog: pd.Series | pd.DataFrame | dict[str, pd.Series | pd.DataFrame] | None = None,
  2331                                                   predict_probabilistic: bool = False,
  2332                                                   use_in_sample_residuals: b

In [11]:
# Profiling _backtesting_forecaster_multiseries()
# ==============================================================================
from skforecast.model_selection import TimeSeriesFold
from skforecast.model_selection import backtesting_forecaster_multiseries
from skforecast.model_selection._validation import _backtesting_forecaster_multiseries

cv = TimeSeriesFold(
         steps                 = 50,
         initial_train_size    = 1500,
         refit                 = False,
         fixed_train_size      = False,
         gap                   = 0,
         allow_incomplete_fold = True,
         verbose               = False
     )

def funt_to_profile(forecaster, series, exog, cv):
    
    metric, predictions = _backtesting_forecaster_multiseries(
                          forecaster    = forecaster,
                          series        = series,
                          exog          = exog,
                          cv            = cv,
                          metric        = 'mean_squared_error',
                          n_jobs        = 'auto',
                          verbose       = False,
                          show_progress = True
                      )

%lprun -f _backtesting_forecaster_multiseries funt_to_profile(forecaster, series_dict, exog_dict, cv)

  0%|          | 0/10 [00:00<?, ?it/s]

Timer unit: 1e-09 s

Total time: 80.613 s
File: /home/joaquin/Documents/GitHub/skforecast/skforecast/model_selection/_validation.py
Function: _backtesting_forecaster_multiseries at line 559

Line #      Hits         Time  Per Hit   % Time  Line Contents
   559                                           def _backtesting_forecaster_multiseries(
   560                                               forecaster: object,
   561                                               series: pd.DataFrame | dict[str, pd.Series | pd.DataFrame],
   562                                               cv: TimeSeriesFold,
   563                                               metric: str | Callable | list[str | Callable],
   564                                               levels: str | list[str] | None = None,
   565                                               add_aggregated_metric: bool = True,
   566                                               exog: pd.Series | pd.DataFrame | dict[str, pd.Series | pd.DataF

In [None]:
# Profiling align_series_and_exog_multiseries()
# ==============================================================================
# def funt_to_profile(series_dict, input_series_is_dict, exog_dict):
#     align_series_and_exog_multiseries(
#         series_dict=series_dict,
#         input_series_is_dict=input_series_is_dict,
#         exog_dict = exog_dict,
#     )

# %lprun -f align_series_and_exog_multiseries funt_to_profile(series_dict_train, True, exog_dict_train)

: 

: 

In [14]:
# Profiling check_preprocess_exog_multiseries()
# ==============================================================================
series_indexes = {k: v.index for k, v  in series_dict.items()}
series_col_names = list(series_dict.keys())

def funt_to_profile(input_series_is_dict, series_indexes, series_col_names, exog, exog_dict):
    check_preprocess_exog_multiseries(
        input_series_is_dict = input_series_is_dict,
        series_indexes = series_indexes,
        series_names_in_ = series_col_names,
        exog = exog_dict,
        exog_dict = exog_dict,
    )

%lprun -f check_preprocess_exog_multiseries funt_to_profile(True, series_indexes, series_col_names, exog, exog_dict)

Timer unit: 1e-09 s

Total time: 0.198148 s

Could not find file <string>
Are you sure you are running this program from the same directory
that you ran the profiler from?
Continuing without the function's contents.

Line #      Hits         Time  Per Hit   % Time  Line Contents
     1                                           
     2                                           
     3                                           
     4                                           
     5                                           
     6                                           
     7                                           
     8                                           
     9                                           
    10                                           
    11                                           
    12                                           
    13                                           
    14                                           
    15              