# Profiling main skforecast classes and methods

In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
sys.path.insert(1, str(Path.cwd().parent))
str(Path.cwd().parent)

'/home/ubuntu/varios/skforecast'

In [2]:
import platform
import psutil
import skforecast
import pandas as pd
import numpy as np
import scipy
import sklearn

import numpy as np
import pandas as pd
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from lightgbm import LGBMRegressor

from skforecast.ForecasterAutoreg import ForecasterAutoreg
from skforecast.ForecasterAutoregMultiSeries import ForecasterAutoregMultiSeries
from skforecast.model_selection import backtesting_forecaster
from skforecast.model_selection import grid_search_forecaster
from skforecast.model_selection_multiseries import grid_search_forecaster_multiseries
from skforecast.model_selection_multiseries import bayesian_search_forecaster_multiseries
from skforecast.model_selection_multiseries import backtesting_forecaster_multiseries
from skforecast.utils import *
from sklearn.preprocessing import OrdinalEncoder
from sklearn.compose import make_column_transformer
from skforecast.preprocessing import series_long_to_dict
from skforecast.preprocessing import exog_long_to_dict
from skforecast.datasets import fetch_dataset

%load_ext pyinstrument
%load_ext line_profiler

# Information system and libraries

In [3]:
# Versions
# ==============================================================================
print(f"Python version: {platform.python_version()}")
print(f"scikit-learn version: {sklearn.__version__}")
print(f"skforecast version: {skforecast.__version__}")
print(f"pandas version: {pd.__version__}")
print(f"numpy version: {np.__version__}")
print(f"scipy version: {scipy.__version__}")
print(f"psutil version: {psutil.__version__}")
print("")

# Computer information
# ==============================================================================
#Computer network name
print(f"Computer network name: {platform.node()}")
#Machine type
print(f"Machine type: {platform.machine()}")
#Processor type
print(f"Processor type: {platform.processor()}")
#Platform type
print(f"Platform type: {platform.platform()}")
#Operating system
print(f"Operating system: {platform.system()}")
#Operating system release
print(f"Operating system release: {platform.release()}")
#Operating system version
print(f"Operating system version: {platform.version()}")
#Physical cores
print(f"Number of physical cores: {psutil.cpu_count(logical=False)}")
#Logical cores
print(f"Number of logical cores: {psutil.cpu_count(logical=True)}")

Python version: 3.11.9
scikit-learn version: 1.3.0
skforecast version: 0.13.0
pandas version: 2.2.2
numpy version: 1.26.4
scipy version: 1.13.1
psutil version: 6.0.0

Computer network name: ip-10-2-1-218
Machine type: x86_64
Processor type: x86_64
Platform type: Linux-5.15.0-1064-aws-x86_64-with-glibc2.31
Operating system: Linux
Operating system release: 5.15.0-1064-aws
Operating system version: #70~20.04.1-Ubuntu SMP Fri Jun 14 15:42:13 UTC 2024
Number of physical cores: 4
Number of logical cores: 8


# ForecasterAutoreg

In [None]:
# Data
# ==============================================================================
len_series = 1_000
n_exog = 300
rgn = np.random.default_rng(seed=123)
y = pd.Series(
    rgn.random(size=(len_series)),
    name="y",
    index=pd.date_range(start="2000-01-01", periods=len_series, freq="h"),
)
exog = pd.DataFrame(rgn.random(size=(len_series, n_exog)), index=y.index)
exog.columns = [f"exog_{i}" for i in range(exog.shape[1])]
exog_test = exog.copy()
exog_test.index = exog.index + pd.DateOffset(hours=len_series)
exog_test = exog_test.asfreq("h")
print(f"Shape of y: {y.shape}")
print(f"Shape of exog: {exog.shape}")
print(f"Shape of exog_test: {exog_test.shape}")

display(y.head(2))
display(exog.head(2))
display(exog_test.head(2))

In [7]:
# Forecaster
# ==============================================================================
forecaster = ForecasterAutoreg(
    regressor=Ridge(),
    lags=24,
    transformer_exog=StandardScaler(),
    transformer_y=StandardScaler(),
)

In [8]:
%%pyinstrument

forecaster.fit(y=y, exog=exog)

In [9]:
# Profiling fit()
# ==============================================================================
def funt_to_profile(forecaster, y, exog):
    forecaster.fit(y=y, exog=exog)

%lprun -f forecaster.fit funt_to_profile(forecaster, y, exog)

Timer unit: 1e-09 s

Total time: 0.103863 s
File: /home/ubuntu/varios/skforecast/skforecast/ForecasterAutoreg/ForecasterAutoreg.py
Function: fit at line 527

Line #      Hits         Time  Per Hit   % Time  Line Contents
   527                                               def fit(
   528                                                   self,
   529                                                   y: pd.Series,
   530                                                   exog: Optional[Union[pd.Series, pd.DataFrame]]=None,
   531                                                   store_last_window: bool=True,
   532                                                   store_in_sample_residuals: bool=True
   533                                               ) -> None:
   534                                                   """
   535                                                   Training Forecaster.
   536                                           
   537                                 

In [10]:
# Profiling create_train_X_y()
# ==============================================================================
def funt_to_profile(forecaster, y, exog):
    forecaster.create_train_X_y(y=y, exog=exog)

%lprun -f forecaster.create_train_X_y funt_to_profile(forecaster, y, exog)

Timer unit: 1e-09 s

Total time: 0.0239219 s
File: /home/ubuntu/varios/skforecast/skforecast/ForecasterAutoreg/ForecasterAutoreg.py
Function: create_train_X_y at line 379

Line #      Hits         Time  Per Hit   % Time  Line Contents
   379                                               def create_train_X_y(
   380                                                   self,
   381                                                   y: pd.Series,
   382                                                   exog: Optional[Union[pd.Series, pd.DataFrame]]=None
   383                                               ) -> Tuple[pd.DataFrame, pd.Series]:
   384                                                   """
   385                                                   Create training matrices from univariate time series and exogenous
   386                                                   variables.
   387                                                   
   388                                        

In [11]:
# Profiling funt_to_profile()
# ==============================================================================
def funt_to_profile(forecaster, exog_test):
    forecaster.predict(steps=100, exog=exog_test)

%lprun -f forecaster.predict funt_to_profile(forecaster, exog_test)

Timer unit: 1e-09 s

Total time: 0.0438469 s
File: /home/ubuntu/varios/skforecast/skforecast/ForecasterAutoreg/ForecasterAutoreg.py
Function: predict at line 890

Line #      Hits         Time  Per Hit   % Time  Line Contents
   890                                               def predict(
   891                                                   self,
   892                                                   steps: int,
   893                                                   last_window: Optional[Union[pd.Series, pd.DataFrame]]=None,
   894                                                   exog: Optional[Union[pd.Series, pd.DataFrame]]=None
   895                                               ) -> pd.Series:
   896                                                   """
   897                                                   Predict n steps ahead. It is an recursive process in which, each prediction,
   898                                                   is used as a predictor for th

In [12]:
# Profiling _create_predict_inputs()
# ==============================================================================
last_window = forecaster.last_window

def funt_to_profile(forecaster, steps, last_window, exog):
    forecaster._create_predict_inputs(steps=steps, last_window=last_window, exog=exog)

%lprun -f forecaster._create_predict_inputs funt_to_profile(forecaster, 100, last_window, exog_test)

Timer unit: 1e-09 s

Total time: 0.0171847 s
File: /home/ubuntu/varios/skforecast/skforecast/ForecasterAutoreg/ForecasterAutoreg.py
Function: _create_predict_inputs at line 693

Line #      Hits         Time  Per Hit   % Time  Line Contents
   693                                               def _create_predict_inputs(
   694                                                   self,
   695                                                   steps: int,
   696                                                   last_window: Optional[Union[pd.Series, pd.DataFrame]]=None,
   697                                                   exog: Optional[Union[pd.Series, pd.DataFrame]]=None
   698                                               ) -> Tuple[np.ndarray, np.ndarray, pd.Index]:
   699                                                   """
   700                                                   Create inputs needed for the first iteration of the prediction process. 
   701                        

# ForecasterAutoregMultiSeries

In [13]:
# Data
# ==============================================================================
data = fetch_dataset(name='bdg2_daily')

infrequent_types = (
    data
    .drop_duplicates(subset=['building_id'])['primaryspaceusage']
    .value_counts()
    .loc[lambda x: x < 100]
    .index
    .tolist()
)
infrequent_subtypes = (
    data
    .drop_duplicates(subset=['building_id'])['sub_primaryspaceusage']
    .value_counts()
    .loc[lambda x: x < 50]
    .index
    .tolist()
)

data['primaryspaceusage'] = np.where(
    data['primaryspaceusage'].isin(infrequent_types),
    'Other',
    data['primaryspaceusage']
)
data['sub_primaryspaceusage'] = np.where(
    data['sub_primaryspaceusage'].isin(infrequent_subtypes),
    'Other',
    data['sub_primaryspaceusage']
)

# Calendar features
# ==============================================================================
data['day_of_week'] = data.index.dayofweek
data['week_of_year'] = data.index.isocalendar().week.astype(int)
data['month'] = data.index.month

# Cyclical encoding of calendar features
# ==============================================================================
data['sin_day_of_week'] = np.sin(2*np.pi*data['day_of_week']/7)
data['cos_day_of_week'] = np.cos(2*np.pi*data['day_of_week']/7)
data['sin_week_of_year'] = np.sin(2*np.pi*data['week_of_year']/52)
data['cos_week_of_year'] = np.cos(2*np.pi*data['week_of_year']/52)
data['sin_month'] = np.sin(2*np.pi*data['month']/12)
data['cos_month'] = np.cos(2*np.pi*data['month']/12)

# Transformer: ordinal encoding
# ==============================================================================
# A ColumnTransformer is used to transform categorical (not numerical) features
# using ordinal encoding. Numeric features are left untouched. Missing values
# are coded as -1. If a new category is found in the test set, it is encoded
# as -1.
categorical_features = ['primaryspaceusage', 'sub_primaryspaceusage', 'timezone']
transformer_exog = make_column_transformer(
                       (
                           OrdinalEncoder(
                               dtype=float,
                               handle_unknown="use_encoded_value",
                               unknown_value=np.nan,
                               encoded_missing_value=np.nan
                           ),
                           categorical_features
                       ),
                       remainder="passthrough",
                       verbose_feature_names_out=False,
                   ).set_output(transform="pandas")
transformer_exog

# Exogenous features selected for modeling
# ==============================================================================
exog_features = [
    "primaryspaceusage",
    "sub_primaryspaceusage",
    "timezone",
    "sqm",
    "airTemperature",
    "cloudCoverage",
    "dewTemperature",
    "precipDepth1HR",
    "precipDepth6HR",
    "seaLvlPressure",
    "windDirection",
    "windSpeed",
    "sin_day_of_week",
    "cos_day_of_week",
    "sin_week_of_year",
    "cos_week_of_year",
    "sin_month",
    "cos_month",
]

# Transform series and exog to dictionaries
# ==============================================================================
series_dict = series_long_to_dict(
    data      = data.reset_index(),
    series_id = 'building_id',
    index     = 'timestamp',
    values    = 'meter_reading',
    freq      = 'D'
)

exog_dict = exog_long_to_dict(
    data      = data[exog_features + ['building_id']].reset_index(),
    series_id = 'building_id',
    index     = 'timestamp',
    freq      = 'D'
)

data = data.sort_index()
end_train = '2017-08-31 23:59:00'
end_validation = '2017-10-31 23:59:00'
series_dict_train = {k: v.loc[: end_train,] for k, v in series_dict.items()}
series_dict_valid = {k: v.loc[end_train: end_validation,] for k, v in series_dict.items()}
series_dict_test = {k: v.loc[end_validation:,] for k, v in series_dict.items()}
exog_dict_train = {k: v.loc[: end_train,] for k, v in exog_dict.items()}
exog_dict_valid = {k: v.loc[end_train: end_validation,] for k, v in exog_dict.items()}
exog_dict_test = {k: v.loc[end_validation:,] for k, v in exog_dict.items()}



bdg2_daily
----------
Daily energy consumption data from the The Building Data Genome Project 2 with
building metadata and weather data. https://github.com/buds-lab/building-data-
genome-project-2
Miller, C., Kathirgamanathan, A., Picchetti, B. et al. The Building Data Genome
Project 2, energy meter data from the ASHRAE Great Energy Predictor III
competition. Sci Data 7, 368 (2020). https://doi.org/10.1038/s41597-020-00712-x
Shape of the dataset: (1153518, 17)


In [14]:
# Forecaster
# ==============================================================================
forecaster = ForecasterAutoregMultiSeries(
    regressor=LGBMRegressor(random_state=8520, verbose=-1),
    lags=14,
    transformer_series=None,
    transformer_exog=transformer_exog,
    fit_kwargs={'categorical_feature': categorical_features},
    encoding="ordinal"
)

In [15]:
%%pyinstrument

forecaster.fit(series=series_dict_train, exog=exog_dict_train)



In [16]:
# Profiling fit()
# ==============================================================================
def funt_to_profile(forecaster, series, exog):
    forecaster.fit(series=series, exog=exog)

%lprun -f forecaster.fit funt_to_profile(forecaster, series_dict_train, exog_dict_train)



Timer unit: 1e-09 s

Total time: 17.3253 s
File: /home/ubuntu/varios/skforecast/skforecast/ForecasterAutoregMultiSeries/ForecasterAutoregMultiSeries.py
Function: fit at line 1052

Line #      Hits         Time  Per Hit   % Time  Line Contents
  1052                                               def fit(
  1053                                                   self,
  1054                                                   series: Union[pd.DataFrame, dict],
  1055                                                   exog: Optional[Union[pd.Series, pd.DataFrame, dict]]=None,
  1056                                                   store_last_window: Union[bool, list]=True,
  1057                                                   store_in_sample_residuals: bool=True,
  1059                                               ) -> None:
  1060                                                   """
  1061                                                   Training Forecaster. See Notes section for more

In [17]:
# Profiling _create_train_X_y()
# ==============================================================================
def funt_to_profile(forecaster, series, exog):
    forecaster._create_train_X_y(series=series, exog=exog)

%lprun -f forecaster._create_train_X_y funt_to_profile(forecaster, series_dict_train, exog_dict_train)



Timer unit: 1e-09 s

Total time: 8.20557 s
File: /home/ubuntu/varios/skforecast/skforecast/ForecasterAutoregMultiSeries/ForecasterAutoregMultiSeries.py
Function: _create_train_X_y at line 595

Line #      Hits         Time  Per Hit   % Time  Line Contents
   595                                               def _create_train_X_y(
   596                                                   self,
   597                                                   series: Union[pd.DataFrame, dict],
   598                                                   exog: Optional[Union[pd.Series, pd.DataFrame, dict]]=None,
   599                                                   store_last_window: Union[bool, list]=True,
   600                                               ) -> Tuple[pd.DataFrame, pd.Series, dict, list, list, list, dict, dict]:
   601                                                   """
   602                                                   Create training matrices from multiple time series an

In [18]:
# Profiling predict()
# ==============================================================================
def funt_to_profile(forecaster, steps, exog):
    forecaster.predict(steps=steps, exog=exog, suppress_warnings=True)

%lprun -f forecaster.predict funt_to_profile(forecaster, 7, exog_dict_valid)

Timer unit: 1e-09 s

Total time: 36.6131 s
File: /home/ubuntu/varios/skforecast/skforecast/ForecasterAutoregMultiSeries/ForecasterAutoregMultiSeries.py
Function: predict at line 1537

Line #      Hits         Time  Per Hit   % Time  Line Contents
  1537                                               def predict(
  1538                                                   self,
  1539                                                   steps: int,
  1540                                                   levels: Optional[Union[str, list]]=None,
  1541                                                   last_window: Optional[pd.DataFrame]=None,
  1542                                                   exog: Optional[Union[pd.Series, pd.DataFrame, dict]]=None,
  1544                                               ) -> pd.DataFrame:
  1545                                                   """
  1546                                                   Predict n steps ahead. It is an recursive process in

In [19]:
# Functions to profile:
# ==============================================================================
# check_preprocess_exog_multiseries
# align_series_and_exog_multiseries
# _create_train_X_y_single_series
# _create_predict_inputs
# _recursive_predict

In [20]:
# Profiling align_series_and_exog_multiseries()
# ==============================================================================
def funt_to_profile(series_dict, input_series_is_dict, exog_dict):
    align_series_and_exog_multiseries(
        series_dict=series_dict,
        input_series_is_dict=input_series_is_dict,
        exog_dict = exog_dict,
    )

%lprun -f align_series_and_exog_multiseries funt_to_profile(series_dict_train, True, exog_dict_train)

Timer unit: 1e-09 s

Total time: 2.26484 s
File: /home/ubuntu/varios/skforecast/skforecast/utils/utils.py
Function: align_series_and_exog_multiseries at line 2532

Line #      Hits         Time  Per Hit   % Time  Line Contents
  2532                                           def align_series_and_exog_multiseries(
  2533                                               series_dict: dict,
  2534                                               input_series_is_dict: bool,
  2535                                               exog_dict: dict=None
  2536                                           ) -> Tuple[Union[pd.Series, pd.DataFrame], Union[pd.Series, pd.DataFrame]]:
  2537                                               """
  2538                                               Align series and exog according to their index. If needed, reindexing is
  2539                                               applied. Heading and trailing NaNs are removed from all series in 
  2540                        

In [23]:
# Profiling check_preprocess_exog_multiseries()
# ==============================================================================
series_indexes = {k: v.index for k, v  in series_dict_train.items()}
series_col_names = list(series_dict_train.keys())

def funt_to_profile(input_series_is_dict, series_indexes, series_col_names, exog, exog_dict):
    check_preprocess_exog_multiseries(
        input_series_is_dict = input_series_is_dict,
        series_indexes = series_indexes,
        series_col_names = series_col_names,
        exog = exog_dict_train,
        exog_dict = exog_dict_train,
    )

%lprun -f check_preprocess_exog_multiseries funt_to_profile(True, series_indexes, series_col_names, exog, exog_dict)

Timer unit: 1e-09 s

Total time: 0.911268 s
File: /home/ubuntu/varios/skforecast/skforecast/utils/utils.py
Function: check_preprocess_exog_multiseries at line 2361

Line #      Hits         Time  Per Hit   % Time  Line Contents
  2361                                           def check_preprocess_exog_multiseries(
  2362                                               input_series_is_dict: bool,
  2363                                               series_indexes: dict,
  2364                                               series_col_names: list,
  2365                                               exog: Union[pd.Series, pd.DataFrame, dict],
  2366                                               exog_dict: dict,
  2367                                           ) -> Tuple[dict, list]:
  2368                                               """
  2369                                               Check and preprocess `exog` argument in `ForecasterAutoregMultiSeries` and
  2370                    

In [None]:
import pandas as pd
import numpy as np
import time

def fun_original():
    pass

def fun_optimized_1():
    pass

start_time = time.time()
try:
    fun_original()
except TypeError as e:
    end_time = time.time()
    print(e)
finally:
    end_time = time.time()
print(f"Original code execution time: {end_time - start_time} seconds")

start_time = time.time()
try:
    fun_optimized_1()
except TypeError as e:
    end_time = time.time()
    print(e)
finally:
    end_time = time.time()
print(f"Optimized code 1 execution time: {end_time - start_time} seconds")


Original code execution time: 9.608268737792969e-05 seconds
Optimized code 1 execution time: 7.390975952148438e-05 seconds
