In [1]:
%load_ext autoreload
%autoreload 2
import sys
from pathlib import Path
path = str(Path.cwd().parent)
print(path)
sys.path.insert(1, path)

c:\Users\jaesc2\GitHub\skforecast


In [2]:
# Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from lightgbm import LGBMRegressor

from skforecast.datasets import fetch_dataset
from skforecast.preprocessing import RollingFeatures
from skforecast.recursive import ForecasterRecursiveMultiSeries
from skforecast.model_selection import TimeSeriesFold
from skforecast.model_selection import backtesting_forecaster_multiseries
from skforecast.model_selection import grid_search_forecaster_multiseries
from skforecast.model_selection import bayesian_search_forecaster_multiseries

In [3]:
# Data download
# ==============================================================================
data = fetch_dataset(name="items_sales")

# Split data into train-val-test
# ==============================================================================
end_train = '2014-07-15 23:59:00'
data_train = data.loc[:end_train, :].copy()
data_test  = data.loc[end_train:, :].copy()

print(
    f"Train dates : {data_train.index.min()} --- {data_train.index.max()}   "
    f"(n={len(data_train)})"
)
print(
    f"Test dates  : {data_test.index.min()} --- {data_test.index.max()}   "
    f"(n={len(data_test)})"
)
data.head()

items_sales
-----------
Simulated time series for the sales of 3 different items.
Simulated data.
Shape of the dataset: (1097, 3)
Train dates : 2012-01-01 00:00:00 --- 2014-07-15 00:00:00   (n=927)
Test dates  : 2014-07-16 00:00:00 --- 2015-01-01 00:00:00   (n=170)


Unnamed: 0_level_0,item_1,item_2,item_3
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2012-01-01,8.253175,21.047727,19.429739
2012-01-02,22.777826,26.578125,28.009863
2012-01-03,27.549099,31.751042,32.078922
2012-01-04,25.895533,24.567708,27.252276
2012-01-05,21.379238,18.191667,20.357737


In [4]:
# Create and train ForecasterRecursiveMultiSeries
# ==============================================================================
forecaster = ForecasterRecursiveMultiSeries(
                 regressor          = LGBMRegressor(random_state=123, verbose=-1),
                 lags               = 24,
                 window_features    = RollingFeatures(stats=['mean', 'mean'], window_sizes=[24, 48]),
                 encoding           = 'ordinal',
                 transformer_series = None,
                 transformer_exog   = None,
                 weight_func        = None,
                 series_weights     = None,
                 differentiation    = 1,
                 dropna_from_series = False,
                 fit_kwargs         = None,
                 forecaster_id      = None
             )

# forecaster.fit(series=data_train)
print(forecaster.differentiation)
print(forecaster.differentiation_max)
print(forecaster.differentiator)
print(forecaster.differentiator_)

1
1
TimeSeriesDifferentiator(order=1, window_size=49)
None


In [5]:
# Create df with categoricals
# ==============================================================================
data_train_cat = data_train.copy()
data_train_cat['item_id'] = np.array(['A', 'B'] * int(len(data_train) / 2) + ['A'])
data_train_cat['item_id'] = data_train_cat['item_id'].astype('category')
data_train_cat

Unnamed: 0_level_0,item_1,item_2,item_3,item_id
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2012-01-01,8.253175,21.047727,19.429739,A
2012-01-02,22.777826,26.578125,28.009863,B
2012-01-03,27.549099,31.751042,32.078922,A
2012-01-04,25.895533,24.567708,27.252276,B
2012-01-05,21.379238,18.191667,20.357737,A
...,...,...,...,...
2014-07-11,25.662128,11.002083,10.396751,A
2014-07-12,23.773923,11.008333,16.139173,B
2014-07-13,22.609388,8.100000,13.028927,A
2014-07-14,23.307307,10.895833,9.315334,B


In [6]:
data_train_cat.dtypes

item_1      float64
item_2      float64
item_3      float64
item_id    category
dtype: object

In [7]:
pd.DataFrame().reindex_like(data_train_cat).dtypes

item_1     float64
item_2     float64
item_3     float64
item_id    float64
dtype: object

In [10]:
data_train_cat.dtypes.to_dict()

{'item_1': dtype('float64'),
 'item_2': dtype('float64'),
 'item_3': dtype('float64'),
 'item_id': CategoricalDtype(categories=['A', 'B'], ordered=False, categories_dtype=object)}

In [12]:
pd.DataFrame(index=data_train_cat.index, columns=data_train_cat.columns).astype(data_train_cat.dtypes.to_dict()).dtypes

item_1      float64
item_2      float64
item_3      float64
item_id    category
dtype: object

In [21]:
from copy import copy

copy(None)

In [7]:
from __future__ import annotations

def sum(a: int | list | None = None) -> int:
    return a

In [11]:
import numpy as np

np.max([5, None])

TypeError: '>=' not supported between instances of 'int' and 'NoneType'

In [None]:
diff = {'l1': [1], 'l2': [2]}
differentiator_ = {'l1': [1], 'l2': [1], 'l3': None}
differentiator_.update(
    {k: deepcopy(v) for k, v in diff.items()}
)
differentiator_

{'l1': [1], 'l2': [2], 'l3': None}

In [28]:
differentiator_['l1'] = [1000]
differentiator_

{'l1': [1000], 'l2': [2], 'l3': None}

In [29]:
diff

{'l1': [1], 'l2': [2]}

In [4]:
# Libraries
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from scipy.stats import skew

from skforecast.datasets import fetch_dataset
from skforecast.recursive import ForecasterRecursive
from skforecast.recursive import ForecasterRecursiveMultiSeries
from skforecast.utils import save_forecaster
from skforecast.utils import load_forecaster


data = fetch_dataset(name="items_sales")


def custom_weights_item_1(index):
    """
    Return 0 if index is between 2012-01-01 and 2012-06-01.
    """
    weights = np.where(
        (index >= '2012-01-01') & (index <= '2012-06-01'), 0, 1
    )

    return weights

def custom_weights_item_2(index):
    """
    Return 0 if index is between 2012-04-01 and 2013-01-01.
    """
    weights = np.where(
        (index >= '2012-04-01') & (index <= '2013-01-01'), 0, 1
    )

    return weights

def custom_weights_item_3(index):
    """
    Return 0 if index is between 2012-06-01 and 2013-01-01.
    """
    weights = np.where(
        (index >= '2012-06-01') & (index <= '2013-01-01'), 0, 1
    )

    return weights


weight_func_dict = {
    'item_1': custom_weights_item_1,
    'item_2': custom_weights_item_2
}

forecaster = ForecasterRecursiveMultiSeries(
                 regressor       = RandomForestRegressor(random_state=123),
                 lags            = 3,
                 encoding        = 'ordinal',
                 weight_func     = weight_func_dict
             )

forecaster.fit(series=data)
forecaster

# Save model and custom function
save_forecaster(
    forecaster, 
    file_name = 'forecaster_multiseries_custom_features.joblib', 
    save_custom_functions = True, 
    verbose = False
)



items_sales
-----------
Simulated time series for the sales of 3 different items.
Simulated data.
Shape of the dataset: (1097, 3)




In [10]:
forecaster.weight_func_

{'item_1': <function __main__.custom_weights_item_1(index)>,
 'item_2': <function __main__.custom_weights_item_2(index)>,
 ForecasterRecursiveMultiSeries 
 Regressor: RandomForestRegressor 
 Lags: [1 2 3] 
 Window features: None 
 Window size: 3 
 Series encoding: ordinal 
 Series names (levels): item_1, item_2, item_3 
 Exogenous included: False 
 Exogenous names: None 
 Transformer for series: None 
 Transformer for exog: None 
 Weight function included: True 
 Series weights: None 
 Differentiation order: None 
 Training range: 
     'item_1': ['2012-01-01', '2015-01-01'], 'item_2': ['2012-01-01', '2015-01-01'],
     'item_3': ['2012-01-01', '2015-01-01'] 
 Training index type: DatetimeIndex 
 Training index frequency: D 
 Regressor parameters: 
     {'bootstrap': True, 'ccp_alpha': 0.0, 'criterion': 'squared_error', 'max_depth':
     None, 'max_features': 1.0, 'max_leaf_nodes': None, 'max_samples': None,
     'min_impurity_decrease': 0.0, 'min_samples_leaf': 1, 'min_samples_split':

In [14]:
forecaster._weight_func_all_1.__code__.co_code

b'\x97\x00t\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00j\x01\x00\x00\x00\x00\x00\x00\x00\x00t\x05\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00|\x01\xa6\x01\x00\x00\xab\x01\x00\x00\x00\x00\x00\x00\x00\x00t\x06\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xac\x01\xa6\x02\x00\x00\xab\x02\x00\x00\x00\x00\x00\x00\x00\x00}\x02|\x02S\x00'