In [1]:
import pickle


with open("/path/to/BasicModels/LSTM/pkl_files/store_series_dict.pkl","rb") as file:
    store_series_dict=pickle.load(file)

In [None]:
from darts.dataprocessing.transformers import Scaler, StaticCovariatesTransformer
import numpy as np


Splitting my series to train, validation and test, fitting the the sacleres

In [3]:
from darts import TimeSeries
from darts.dataprocessing.transformers import FittableDataTransformer

class DualScaler(FittableDataTransformer):
    """
    A simple composite transformer applying:
    - `ts_scaler` on the sequential part of a TimeSeries
    - `static_scaler` on its static covariates
    """

    def __init__(self, ts_scaler, static_scaler, name="DualScaler"):
        super().__init__(name=name)
        self._ts_scaler = ts_scaler
        self._static_scaler = static_scaler

    def ts_fit(self, series: TimeSeries, **kwargs):
        """
        If the scalers are already fitted, we can simply pass here.
        Otherwise, you could optionally call self._ts_scaler.fit(series), etc.
        """
        self._ts_scaler.fit(series)
        if series[0].static_covariates is not None:
            self._static_scaler.fit(series)
        return self

    def ts_transform(self, series: TimeSeries, in_place: bool = False, **kwargs) -> TimeSeries:
        """
        Apply ts_scaler to the time series data, and static_scaler to the static covariates.
        """
        #s = series if in_place else series.copy()

        # 1) Transform the sequential (time-indexed) part
        series = self._ts_scaler.transform(series)

        # 2) Transform the static covariates
        if series[0].static_covariates is not None:
            series = self._static_scaler.transform(series)

        return series

    def ts_inverse_transform(self, series: TimeSeries, in_place: bool = False, **kwargs) -> TimeSeries:
        """
        Inverse-transform the sequential data and static covariates.
        """
        #s = series if in_place else series.copy()

        # 1) Invert the sequential part
        series = self._ts_scaler.inverse_transform(series)

        # 2) Invert the static covariates
        if series[0].static_covariates is not None:
            series = self._static_scaler.inverse_transform(series)

        return series

In [4]:
store_scalers = {}

train_series_store = {}
val_series_store = {}
forecast_series_store = {}
test_series_store = {}
future_covs_store = {}
series_store = {}

skipped_count = 0
MIN_TRAIN_LENGTH = 150# for example

for store_id, product_series_list in store_series_dict.items():
    # Preprocess data 
    # Split into train/val (last 28 days for validation)
    train_series = []
    val_series = []
    test_series = []
    forecast_series = []
    future_covariates = []
    series = []

    for target, covs in product_series_list:
        if len(target) < MIN_TRAIN_LENGTH:
            skipped_count+=1
            continue
        train_target = target[:-112].astype(np.float32)
        val_target = target[-112:-28].astype(np.float32)
        test_target = target[-28:].astype(np.float32)  
        forecast_target = target[:-28].astype(np.float32)
        series_target = target.astype(np.float32)

        train_series.append(train_target)
        val_series.append(val_target)
        test_series.append(test_target)
        forecast_series.append(forecast_target)
        future_covariates.append(covs.astype(np.float32))
        series.append(series_target)

    train_series_store[store_id] = train_series
    val_series_store[store_id]= val_series
    forecast_series_store[store_id] = forecast_series
    test_series_store[store_id] = test_series
    future_covs_store[store_id] = future_covariates
    series_store[store_id] = series
    # Scale target and covariates
    scaler_target = Scaler()
    scaler_covs = Scaler()
    static_scaler = StaticCovariatesTransformer()

    dual_transformer = DualScaler(
    ts_scaler=scaler_target,   # your main time-series scaler
    static_scaler=static_scaler  # your static covariates scaler
)
    
    
    dual_transformer.ts_fit(train_series)
    #val_series_scaled = scaler_target.transform(val_series)

    scaler_covs.fit(future_covariates)

    #val_series_scaled = static_scaler.transform(val_series_scaled)
    # Store scalers for inverse transformation later
    store_scalers[store_id] = {
        'dual_target': dual_transformer,
        'covariates': scaler_covs,
    }

# 1st Model

Only training for CA_2

In [7]:
from darts.models import LightGBMModel

model_1 = LightGBMModel(
    # Choose your lags (number of past timesteps to use). For example:
        lags=28,  # or range(1,29) if you want t-1 to t-28
        #lags_past_covariates=28,
        lags_future_covariates=list(range(-28,0)),
        # All LightGBM hyperparams:
        boosting_type="gbdt",
        objective="tweedie",
        tweedie_variance_power=1.1,
        metric="rmse",
        n_jobs=-1,
        random_state=42,  # "seed" is deprecated in newer LightGBM; use random_state
        learning_rate=0.2,
        bagging_fraction=0.85,
        bagging_freq=1,
        colsample_bytree=0.85,    # or "feature_fraction=0.85" is also valid
        colsample_bynode=0.85,    # or "feature_fraction_bynode=0.85"
        lambda_l1=0.5,
        lambda_l2=0.5,
        verbose = -1,
)

In [None]:
train_series_ca2 = store_scalers["CA_2"]['dual_target'].ts_transform(train_series_store["CA_2"])
future_covs_ca2 = store_scalers["CA_2"]['covariates'].transform(future_covs_store["CA_2"])

In [9]:
model_1.fit(
        series=train_series_ca2,
        future_covariates=future_covs_ca2,
        #val_series=val_series_store["CA_2"],
        #val_future_covariates=future_covs_store["CA_2"],
    )

LightGBMModel(lags=28, lags_past_covariates=None, lags_future_covariates=[-28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1], output_chunk_length=1, output_chunk_shift=0, add_encoders=None, likelihood=None, quantiles=None, random_state=42, multi_models=True, use_static_covariates=True, categorical_past_covariates=None, categorical_future_covariates=None, categorical_static_covariates=None, boosting_type=gbdt, objective=tweedie, tweedie_variance_power=1.1, metric=rmse, n_jobs=-1, learning_rate=0.2, bagging_fraction=0.85, bagging_freq=1, colsample_bytree=0.85, colsample_bynode=0.85, lambda_l1=0.5, lambda_l2=0.5, verbose=-1)

In [10]:
forecast_series_ca2 = store_scalers["CA_2"]['dual_target'].ts_transform(forecast_series_store["CA_2"])

In [11]:
forcasts = model_1.predict(
                n=28,
                series=forecast_series_ca2,
                future_covariates=future_covs_ca2
)

In [12]:
forecast_inverse = store_scalers['CA_2']['dual_target'].ts_inverse_transform(forcasts)

In [13]:
from darts.metrics import rmse
rmse_list = rmse(test_series_store['CA_2'],forecast_inverse)

In [14]:
print(f"Mean RMSE -> {np.mean(rmse_list)}")

Mean RMSE -> 1.6370193080864468


# 2nd Model


In [26]:
from darts.models import LightGBMModel

model_2 = LightGBMModel(
        lags=14,  # or range(1,29) if you want t-1 to t-28
        lags_future_covariates=list(range(-14,0)),
        n_jobs=-1,
        random_state=42,  # "seed" is deprecated in newer LightGBM; use random_state
        verbose = -1,
)

In [27]:
model_2.fit(
        series=train_series_ca2,
        future_covariates=future_covs_ca2,
        #val_series=val_series_store["CA_2"],
        #val_future_covariates=future_covs_store["CA_2"],
    )

LightGBMModel(lags=14, lags_past_covariates=None, lags_future_covariates=[-14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1], output_chunk_length=1, output_chunk_shift=0, add_encoders=None, likelihood=None, quantiles=None, random_state=42, multi_models=True, use_static_covariates=True, categorical_past_covariates=None, categorical_future_covariates=None, categorical_static_covariates=None, n_jobs=-1, verbose=-1)

In [28]:
forcasts = model_2.predict(
                n=28,
                series=forecast_series_ca2,
                future_covariates=future_covs_ca2
)

In [29]:
forecast_inverse = store_scalers['CA_2']['target'].inverse_transform(forcasts)

In [30]:
rmse_list = rmse(test_series_store['CA_2'],forecast_inverse)
print(f"Mean RMSE -> {np.mean(rmse_list)}")

Mean RMSE -> 1.6506416290885004


# 3rd Model without any Scaling

In [6]:
from darts.models import LightGBMModel

model_3 = LightGBMModel(
    # Choose your lags (number of past timesteps to use). For example:
        lags=28,  # or range(1,29) if you want t-1 to t-28
        #lags_past_covariates=28,
        lags_future_covariates=list(range(-28,0)),
        # All LightGBM hyperparams:
        boosting_type="gbdt",
        objective="tweedie",
        tweedie_variance_power=1.1,
        metric="rmse",
        n_jobs=-1,
        random_state=42,  # "seed" is deprecated in newer LightGBM; use random_state
        learning_rate=0.2,
        bagging_fraction=0.85,
        bagging_freq=1,
        colsample_bytree=0.85,    # or "feature_fraction=0.85" is also valid
        colsample_bynode=0.85,    # or "feature_fraction_bynode=0.85"
        lambda_l1=0.5,
        lambda_l2=0.5,
        verbose = -1,
        categorical_static_covariates= ["item_id", "dept_id", "cat_id"]
        
)

In [7]:
train_series_ca2 = store_scalers["CA_2"]['static'].transform(train_series_store["CA_2"])
#future_covs_ca2 = store_scalers["CA_2"]['covariates'].transform(future_covs_store["CA_2"])

In [8]:
model_3.fit(
        series=train_series_ca2,
        future_covariates=future_covs_store["CA_2"],
        #val_series=val_series_store["CA_2"],
        #val_future_covariates=future_covs_store["CA_2"],
    )

LightGBMModel(lags=28, lags_past_covariates=None, lags_future_covariates=[-28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1], output_chunk_length=1, output_chunk_shift=0, add_encoders=None, likelihood=None, quantiles=None, random_state=42, multi_models=True, use_static_covariates=True, categorical_past_covariates=None, categorical_future_covariates=None, categorical_static_covariates=['item_id', 'dept_id', 'cat_id'], boosting_type=gbdt, objective=tweedie, tweedie_variance_power=1.1, metric=rmse, n_jobs=-1, learning_rate=0.2, bagging_fraction=0.85, bagging_freq=1, colsample_bytree=0.85, colsample_bynode=0.85, lambda_l1=0.5, lambda_l2=0.5, verbose=-1)

In [10]:
forecast_series_ca2 = store_scalers["CA_2"]['static'].transform(forecast_series_store["CA_2"])

In [11]:
forcasts = model_3.predict(
                n=28,
                series=forecast_series_ca2,
                future_covariates=future_covs_store["CA_2"],
)

In [12]:
from darts.metrics import rmse
rmse_list = rmse(test_series_store['CA_2'],forcasts)
print(f"Mean RMSE -> {np.mean(rmse_list)}")

Mean RMSE -> 1.632618284239394


# 4th Model using categorical with Scaling 

In [5]:
from darts.models import LightGBMModel
from darts.metrics import mse
model_4 = LightGBMModel(
    # Choose your lags (number of past timesteps to use). For example:
        lags=28,  # or range(1,29) if you want t-1 to t-28
        #lags_past_covariates=28,
        lags_future_covariates=list(range(-28,0)),
        # All LightGBM hyperparams:
        boosting_type="gbdt",
        objective="tweedie",
        tweedie_variance_power=1.1,
        metric='mse',
        n_jobs=-1,
        random_state=42,  # "seed" is deprecated in newer LightGBM; use random_state
        learning_rate=0.2,
        bagging_fraction=0.85,
        bagging_freq=1,
        colsample_bytree=0.85,    # or "feature_fraction=0.85" is also valid
        colsample_bynode=0.85,    # or "feature_fraction_bynode=0.85"
        lambda_l1=0.5,
        lambda_l2=0.5,
        verbose = -1,
        categorical_future_covariates = [#"tm_d","tm_m","tm_y", "tm_w_end", "tm_dw", "tm_wm",
                                    "event_name_1_enc", "event_type_1_enc", "event_name_2_enc", "event_type_2_enc"],
        categorical_static_covariates= ["item_id", "dept_id", "cat_id"]
        
)

In [6]:
train_series_ca2 = store_scalers["CA_2"]['dual_target'].ts_transform(train_series_store["CA_2"])
future_covs_ca2 = store_scalers["CA_2"]['covariates'].transform(future_covs_store["CA_2"])

In [7]:
model_4.fit(
        series=train_series_ca2,
        future_covariates=future_covs_ca2,
        #val_series=val_series_store["CA_2"],
        #val_future_covariates=future_covs_store["CA_2"],
    )

LightGBMModel(lags=28, lags_past_covariates=None, lags_future_covariates=[-28, -27, -26, -25, -24, -23, -22, -21, -20, -19, -18, -17, -16, -15, -14, -13, -12, -11, -10, -9, -8, -7, -6, -5, -4, -3, -2, -1], output_chunk_length=1, output_chunk_shift=0, add_encoders=None, likelihood=None, quantiles=None, random_state=42, multi_models=True, use_static_covariates=True, categorical_past_covariates=None, categorical_future_covariates=['event_name_1_enc', 'event_type_1_enc', 'event_name_2_enc', 'event_type_2_enc'], categorical_static_covariates=['item_id', 'dept_id', 'cat_id'], boosting_type=gbdt, objective=tweedie, tweedie_variance_power=1.1, metric=mse, n_jobs=-1, learning_rate=0.2, bagging_fraction=0.85, bagging_freq=1, colsample_bytree=0.85, colsample_bynode=0.85, lambda_l1=0.5, lambda_l2=0.5, verbose=-1)

In [8]:
forecast_series_ca2 = store_scalers["CA_2"]['dual_target'].ts_transform(forecast_series_store["CA_2"])

In [9]:
forcasts = model_4.predict(
                n=28,
                series=forecast_series_ca2,
                future_covariates=future_covs_ca2
)

In [17]:
from darts.metrics import rmse,mae

In [11]:
forecast_inverse = store_scalers['CA_2']['dual_target'].ts_inverse_transform(forcasts)

In [12]:


rmse_list = rmse(test_series_store['CA_2'],forecast_inverse)
print(f"Mean RMSE -> {np.mean(rmse_list)}")

Mean RMSE -> 1.631751335216687


In [18]:
mae_list = mae(test_series_store['CA_2'],forecast_inverse)
print(f"Mean mae_list -> {np.mean(mae_list)}")

Mean mae_list -> 1.265607309626463


Lets implement backtest on model4

In [13]:
covar_t = store_scalers["CA_2"]["dual_target"]
print("Covariates Transformer:", covar_t)
print("Type:", type(covar_t))


Covariates Transformer: DualScaler
Type: <class '__main__.DualScaler'>


In [14]:

dual_transformer = store_scalers["CA_2"]["dual_target"]
print(dual_transformer._fit_called)  # if needed

covar_scaler = store_scalers["CA_2"]["dual_target"]
print(covar_scaler._fitted_params    )   # if needed

False
None


In [22]:



scaled_error = model_4.backtest(
    series=store_scalers["CA_2"]["dual_target"].ts_transform(series_store['CA_2']),                # scaled target
    future_covariates=store_scalers["CA_2"]["covariates"].transform(future_covs_store['CA_2']), # if future covariates
    forecast_horizon=28,
    start=0.8,
    retrain=False,          # Use True if you want to re-fit the model and the transformers at each step
    metric=mae,
    last_points_only=False,
    reduction=np.mean,
    verbose=True
)

print("MAE on scaled data:", scaled_error)





historical forecasts:   0%|          | 0/3047 [00:00<?, ?it/s]

MAE on scaled data: [np.float64(0.07060665527091402), np.float64(0.10501910723958015), np.float64(0.1022704458602972), np.float64(0.046927218882032835), np.float64(0.09431467914832695), np.float64(0.04995073911470467), np.float64(0.12313058657850895), np.float64(0.08205713106657776), np.float64(0.05787099458112686), np.float64(0.04581162061648535), np.float64(0.1639611116191664), np.float64(0.08669928448140332), np.float64(0.040488201713975505), np.float64(0.04254107908036907), np.float64(0.08698358045041735), np.float64(0.11128669765477422), np.float64(0.06342136716039748), np.float64(0.09315238243492137), np.float64(0.0489107042909979), np.float64(0.045069906091840674), np.float64(0.05937334272671016), np.float64(0.12516218327967005), np.float64(0.07231004960243126), np.float64(0.086333359764602), np.float64(0.05594300995799841), np.float64(0.05007533822873785), np.float64(0.027830579696027948), np.float64(0.027708270496696632), np.float64(0.02309060226925481), np.float64(0.092528513

In [None]:
inversed_error_list = store_scalers['CA_2']['dual_target'].ts_inverse_transform(scaled_error)

TypeError: 'numpy.float64' object is not iterable

: 