In [None]:
import sys
import os
sys.path.append("../")

In [None]:
from fastai import *
from fastai.tabular import *
from fastai.tabular.all import *

In [None]:
import warnings
import time
warnings.simplefilter(action='ignore', category=FutureWarning)

import pandas as pd
import numpy as np
import optuna

from ngboost.scores import CRPScore, LogScore
from ngboost.learners import default_tree_learner

from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn import model_selection
from sklearn.metrics import mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, mean_squared_log_error



In [None]:
from uncertainty_estimation.uncertainty_estimation_models import Model, XGBoost, CQR, LightGBM, LSF, NGBoost, TFTPytorchFC, PGBM, LightGBMQuantileRegressor
from uncertainty_estimation.constants import DistEnum, PredEnum

Using /root/.cache/torch_extensions/py39_cu117 as PyTorch extensions root...
Creating extension directory /root/.cache/torch_extensions/py39_cu117/split_decision...
Detected CUDA files, patching ldflags
Emitting ninja build file /root/.cache/torch_extensions/py39_cu117/split_decision/build.ninja...
Building extension module split_decision...
Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N)
Loading extension module split_decision...
  warn(f"Failed to load image Python extension: {e}")
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(


## 1 Data preparation

In [None]:
import os

ue_dir_path = os.path.dirname(os.path.dirname(os.getcwd()))
df_path = os.path.join(ue_dir_path, 'datasets', 'favorita_full_df.pickle')

df = pd.read_pickle(df_path)

## REMOVE BEFORE PUBLISHING

In [None]:
df = df.iloc[:10000, :]

In [None]:
df.head(1000)

Unnamed: 0,date,entity,store_nbr,item_nbr,unit_sales,onpromotion,Holiday,family,class,perishable,city,state,type,cluster,dcoilwtico,transactions,promo_missing,unit_saleslag16,unit_saleslag17,unit_saleslag18,unit_saleslag19,unit_saleslag20,unit_saleslag21,unit_saleslag22,unit_saleslag30,unit_saleslag60,mean_7,std_7,mean_30,std_30,mean_60,std_60,ramp_upHoliday,ramp_downHoliday,ramp_uponpromotion,ramp_downonpromotion,dcoilwtico_na,transactions_na,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed
0,2016-07-15,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,45.930000,1275.0,1,2.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,9.0,1.000000,1.527525,1.566667,1.924136,1.966667,2.497230,0.0,0.0,0.370408,0.370408,False,False,2016,7,28,15,4,197,False,False,False,False,False,False,1.468541e+09
1,2016-07-16,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,,1219.0,1,1.0,2.0,0.0,0.0,4.0,0.0,0.0,2.0,1.0,1.000000,1.527525,1.533333,1.925032,1.950000,2.500339,0.0,0.0,0.370408,0.370408,True,False,2016,7,28,16,5,198,False,False,False,False,False,False,1.468627e+09
2,2016-07-17,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,,885.0,1,1.0,1.0,2.0,0.0,0.0,4.0,0.0,3.0,1.0,1.142857,1.463850,1.533333,1.925032,1.950000,2.500339,0.0,0.0,0.370408,0.370408,True,False,2016,7,28,17,6,199,False,False,False,False,False,False,1.468714e+09
3,2016-07-18,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,45.230000,1445.0,1,2.0,1.0,1.0,2.0,0.0,0.0,4.0,5.0,3.0,1.428571,1.397276,1.433333,1.813424,1.866667,2.410898,0.0,0.0,0.370408,0.370408,False,False,2016,7,29,18,0,200,False,False,False,False,False,False,1.468800e+09
4,2016-07-19,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,44.639999,1360.0,1,0.0,2.0,1.0,1.0,2.0,0.0,0.0,4.0,1.0,0.857143,0.899735,1.433333,1.813424,1.850000,2.420429,0.0,0.0,0.370408,0.370408,False,False,2016,7,29,19,1,201,False,False,False,False,False,False,1.468886e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,2017-02-01,13669,15,1004550,2.0,0,0,22,1318,0,9,7,2,14,53.900002,1330.0,0,0.0,1.0,6.0,2.0,0.0,1.0,1.0,3.0,6.0,1.571429,2.070197,2.600000,3.091702,1.900000,2.461225,0.0,0.0,0.171429,0.282313,False,False,2017,2,5,1,2,32,False,True,False,False,False,False,1.485907e+09
996,2017-02-02,13669,15,1004550,0.0,1,0,22,1318,0,9,7,2,14,53.549999,1418.0,1,0.0,0.0,1.0,6.0,2.0,0.0,1.0,2.0,0.0,1.428571,2.149197,2.566667,3.114851,1.883333,2.470790,0.0,0.0,0.056122,0.154762,False,False,2017,2,5,2,3,33,False,False,False,False,False,False,1.485994e+09
997,2017-02-03,13669,15,1004550,3.0,0,0,22,1318,0,9,7,2,14,53.810001,1287.0,0,5.0,0.0,0.0,1.0,6.0,2.0,0.0,1.0,0.0,2.000000,2.516611,2.733333,3.106537,1.933333,2.503331,0.0,0.0,0.071429,0.254762,False,False,2017,2,5,3,4,34,False,False,False,False,False,False,1.486080e+09
998,2017-02-04,13669,15,1004550,5.0,0,0,22,1318,0,9,7,2,14,,1372.0,0,1.0,5.0,0.0,0.0,1.0,6.0,2.0,0.0,0.0,2.142857,2.410295,2.766667,3.081461,1.933333,2.503331,0.0,0.0,0.100000,0.159524,True,False,2017,2,5,4,5,35,False,False,False,False,False,False,1.486166e+09


After the data is loaded the train data is sorted.

In [None]:
df = df.sort_values(by=['entity','date'])
df = df.reset_index(drop=True)

In [None]:
df['unit_sales'] = df['unit_sales'].astype('float64')

Impute for missing values

In [None]:
import feature_engine.imputation as fe

variables_to_impute = ['dcoilwtico', 'transactions']

add_missing_indicator = fe.AddMissingIndicator(variables=variables_to_impute)
df = add_missing_indicator.fit_transform(df)

binary_na_variables_imputed = [var + '_na' for var in variables_to_impute]

df[binary_na_variables_imputed] = df[binary_na_variables_imputed].astype('bool')

In [None]:
display(df.head(1000))

date,entity,store_nbr,item_nbr,unit_sales,onpromotion,Holiday,family,class,perishable,city,state,type,cluster,dcoilwtico,transactions,promo_missing,unit_saleslag16,unit_saleslag17,unit_saleslag18,unit_saleslag19,unit_saleslag20,unit_saleslag21,unit_saleslag22,unit_saleslag30,unit_saleslag60,mean_7,std_7,mean_30,std_30,mean_60,std_60,ramp_upHoliday,ramp_downHoliday,ramp_uponpromotion,ramp_downonpromotion,dcoilwtico_na,transactions_na,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed
2016-07-15T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,45.93,1275.0,1,2.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,9.0,1.0,1.5275252316519463,1.5666666666666669,1.9241358711314325,1.9666666666666663,2.497230103953257,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2016,7,28,15,4,197,False,False,False,False,False,False,1468540800.0
2016-07-16T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,,1219.0,1,1.0,2.0,0.0,0.0,4.0,0.0,0.0,2.0,1.0,1.0,1.5275252316519463,1.5333333333333334,1.9250317208910572,1.95,2.500338960072061,0.0,0.0,0.370408163265306,0.370408163265306,True,False,2016,7,28,16,5,198,False,False,False,False,False,False,1468627200.0
2016-07-17T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,,885.0,1,1.0,1.0,2.0,0.0,0.0,4.0,0.0,3.0,1.0,1.1428571428571428,1.4638501094227994,1.5333333333333334,1.9250317208910572,1.95,2.500338960072061,0.0,0.0,0.370408163265306,0.370408163265306,True,False,2016,7,28,17,6,199,False,False,False,False,False,False,1468713600.0
2016-07-18T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,45.23,1445.0,1,2.0,1.0,1.0,2.0,0.0,0.0,4.0,5.0,3.0,1.4285714285714286,1.3972762620115435,1.4333333333333331,1.8134237638032764,1.8666666666666667,2.4108980439933734,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2016,7,29,18,0,200,False,False,False,False,False,False,1468800000.0
2016-07-19T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,44.64,1360.0,1,0.0,2.0,1.0,1.0,2.0,0.0,0.0,4.0,1.0,0.8571428571428571,0.8997354108424369,1.4333333333333331,1.8134237638032764,1.85,2.4204285935080145,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2016,7,29,19,1,201,False,False,False,False,False,False,1468886400.0
2016-07-20T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,44.96,1342.0,1,1.0,0.0,2.0,1.0,1.0,2.0,0.0,0.0,0.0,1.0,0.8164965809277255,1.4333333333333331,1.8134237638032764,1.7666666666666666,2.360455763709496,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2016,7,29,20,2,202,False,False,False,False,False,False,1468972800.0
2016-07-21T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,43.96,1292.0,1,1.0,1.0,0.0,2.0,1.0,1.0,2.0,0.0,0.0,1.1428571428571428,0.6900655593423536,1.4333333333333331,1.8134237638032764,1.7333333333333334,2.356862800395264,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2016,7,29,21,3,203,False,False,False,False,False,False,1469059200.0
2016-07-22T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,43.41,1336.0,1,0.0,1.0,1.0,0.0,2.0,1.0,1.0,1.0,1.0,0.8571428571428571,0.6900655593423538,1.3,1.7645943169602127,1.6833333333333331,2.361353150328212,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2016,7,29,22,4,204,False,False,False,False,False,False,1469145600.0
2016-07-23T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,,1262.0,1,0.0,0.0,1.0,1.0,0.0,2.0,1.0,1.0,0.0,0.7142857142857143,0.7559289460184541,1.3,1.7645943169602127,1.6166666666666667,2.3512828569626256,0.0,0.0,0.370408163265306,0.370408163265306,True,False,2016,7,29,23,5,205,False,False,False,False,False,False,1469232000.0
2016-07-24T00:00:00.000+0000,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,,967.0,1,0.0,0.0,0.0,1.0,1.0,0.0,2.0,0.0,1.0,0.5714285714285714,0.7867957924694428,1.3,1.7645943169602127,1.5833333333333333,2.3599171679421342,0.0,0.0,0.370408163265306,0.370408163265306,True,False,2016,7,29,24,6,206,False,False,False,False,False,False,1469318400.0


More time features are added by add_datepart.

In [None]:
add_datepart(df, "date", drop=False)

Unnamed: 0,date,entity,store_nbr,item_nbr,unit_sales,onpromotion,Holiday,family,class,perishable,city,state,type,cluster,dcoilwtico,transactions,promo_missing,unit_saleslag16,unit_saleslag17,unit_saleslag18,unit_saleslag19,unit_saleslag20,unit_saleslag21,unit_saleslag22,unit_saleslag30,unit_saleslag60,mean_7,std_7,mean_30,std_30,mean_60,std_60,ramp_upHoliday,ramp_downHoliday,ramp_uponpromotion,ramp_downonpromotion,dcoilwtico_na,transactions_na,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed
0,2016-07-15,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,45.930000,1275.0,1,2.0,0.0,0.0,4.0,0.0,0.0,1.0,0.0,9.0,1.000000,1.527525,1.566667,1.924136,1.966667,2.497230,0.0,0.0,0.370408,0.370408,False,False,2016,7,28,15,4,197,False,False,False,False,False,False,1.468541e+09
1,2016-07-16,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,,1219.0,1,1.0,2.0,0.0,0.0,4.0,0.0,0.0,2.0,1.0,1.000000,1.527525,1.533333,1.925032,1.950000,2.500339,0.0,0.0,0.370408,0.370408,True,False,2016,7,28,16,5,198,False,False,False,False,False,False,1.468627e+09
2,2016-07-17,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,,885.0,1,1.0,1.0,2.0,0.0,0.0,4.0,0.0,3.0,1.0,1.142857,1.463850,1.533333,1.925032,1.950000,2.500339,0.0,0.0,0.370408,0.370408,True,False,2016,7,28,17,6,199,False,False,False,False,False,False,1.468714e+09
3,2016-07-18,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,45.230000,1445.0,1,2.0,1.0,1.0,2.0,0.0,0.0,4.0,5.0,3.0,1.428571,1.397276,1.433333,1.813424,1.866667,2.410898,0.0,0.0,0.370408,0.370408,False,False,2016,7,29,18,0,200,False,False,False,False,False,False,1.468800e+09
4,2016-07-19,13667,15,1001305,0.0,1,0,12,1016,0,9,7,2,14,44.639999,1360.0,1,0.0,2.0,1.0,1.0,2.0,0.0,0.0,4.0,1.0,0.857143,0.899735,1.433333,1.813424,1.850000,2.420429,0.0,0.0,0.370408,0.370408,False,False,2016,7,29,19,1,201,False,False,False,False,False,False,1.468886e+09
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2944544,2017-08-11,174677,9,996122,0.0,1,0,3,1124,0,18,12,1,5,,,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000057,0.216667,0.845560,0.0,0.0,0.297619,0.370408,True,True,2017,8,32,11,4,223,False,False,False,False,False,False,1.502410e+09
2944545,2017-08-12,174677,9,996122,1.0,1,0,3,1124,0,18,12,1,5,,,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000057,0.216667,0.845560,0.0,0.0,0.261905,0.370408,True,True,2017,8,32,12,5,224,False,False,False,False,False,False,1.502496e+09
2944546,2017-08-13,174677,9,996122,0.0,1,0,3,1124,0,18,12,1,5,,,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000057,0.200000,0.839693,0.0,0.0,0.214286,0.370408,True,True,2017,8,32,13,6,225,False,False,False,False,False,False,1.502582e+09
2944547,2017-08-14,174677,9,996122,0.0,1,0,3,1124,0,18,12,1,5,,,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000,0.000000,0.000000,0.000057,0.183333,0.833446,0.0,0.0,0.142857,0.370408,True,True,2017,8,33,14,0,226,False,False,False,False,False,False,1.502669e+09


In [None]:
cat_vars = ['store_nbr', 'item_nbr', 'onpromotion', 'entity', 'Holiday', 'family', 'class', 'perishable',
       'city', 'state', 'type', 'cluster', 'dcoilwtico_na', 'transactions_na', 'Year', 'Month', 'Week', 'Day', 'Dayofweek',
       'Dayofyear', 'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start']
cont_vars = ['dcoilwtico', 'transactions',
       'unit_saleslag16', 'unit_saleslag17', 'unit_saleslag18',
       'unit_saleslag19', 'unit_saleslag20', 'unit_saleslag21',
       'unit_saleslag22', 'unit_saleslag30', 'unit_saleslag60', 'mean_7',
       'std_7', 'mean_30', 'std_30', 'mean_60', 'std_60', 'ramp_upHoliday',
       'ramp_downHoliday', 'ramp_uponpromotion', 'ramp_downonpromotion']

df[cat_vars] = df[cat_vars].astype('object')

## 2 Vectorizer

In [None]:
from pandas.api.types import is_numeric_dtype, is_categorical_dtype
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.pipeline import FeatureUnion, Pipeline

import category_encoders as ce

class CategoricalSelector(BaseEstimator, TransformerMixin):
    def __init__(self, field):
        self.field = field

    def fit(self, x, y=None):
        return self

    def transform(self, dataframe):
        dt = dataframe[self.field].dtype
        return dataframe[[self.field]]


class ItemSelector(BaseEstimator, TransformerMixin):
    def __init__(self, field):
        self.field = field

    def fit(self, x, y=None):
        return self

    def transform(self, dataframe):
        dt = dataframe[self.field].dtype
        if is_categorical_dtype(dt):
            return dataframe[self.field].cat.codes[:, None]
        elif is_numeric_dtype(dt):
            return dataframe[self.field][:, None]
        else:
            return dataframe[self.field]

def create_feature_vectorizer_without_nan():
    vectorizer_tree = FeatureUnion([
        ('store_nbr',
         Pipeline([('select', CategoricalSelector('store_nbr')),
                   ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('item_nbr',
         Pipeline([('select', CategoricalSelector('item_nbr')),
                   ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('onpromotion',
         Pipeline([
             ('select', CategoricalSelector('onpromotion')),
             ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('entity',
         Pipeline(
             [('select', CategoricalSelector('entity')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Holiday',
         Pipeline(
             [('select', CategoricalSelector('Holiday')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
      
        ('family',
         Pipeline(
             [('select', CategoricalSelector('family')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('class',
         Pipeline(
             [('select', CategoricalSelector('class')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('perishable',
         Pipeline(
             [('select', CategoricalSelector('perishable')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('city',
         Pipeline(
             [('select', CategoricalSelector('city')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('state',
         Pipeline(
             [('select', CategoricalSelector('state')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('type',
         Pipeline(
             [('select', CategoricalSelector('type')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('cluster',
         Pipeline(
             [('select', CategoricalSelector('cluster')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Year',
         Pipeline(
             [('select', CategoricalSelector('Year')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Month',
         Pipeline(
             [('select', CategoricalSelector('Month')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Week',
         Pipeline(
             [('select', CategoricalSelector('Week')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Day',
         Pipeline(
             [('select', CategoricalSelector('Day')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Dayofweek',
         Pipeline(
             [('select', CategoricalSelector('Dayofweek')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Dayofyear',
         Pipeline(
             [('select', CategoricalSelector('Dayofyear')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Is_month_end',
         Pipeline(
             [('select', CategoricalSelector('Is_month_end')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Is_month_start',
         Pipeline(
             [('select', CategoricalSelector('Is_month_start')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Is_quarter_end',
         Pipeline(
             [('select', CategoricalSelector('Is_quarter_end')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Is_quarter_start',
         Pipeline(
             [('select', CategoricalSelector('Is_quarter_start')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Is_year_end',
         Pipeline(
             [('select', CategoricalSelector('Is_year_end')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('Is_year_start',
         Pipeline(
             [('select', CategoricalSelector('Is_year_start')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        
        ('dcoilwtico_na',
         Pipeline(
             [('select', CategoricalSelector('dcoilwtico_na')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
        ('transactions_na',
         Pipeline(
             [('select', CategoricalSelector('transactions_na')),
              ('oe', ce.OrdinalEncoder(handle_missing="value", handle_unknown='value'))])),
                                       
       # cont columns
        ('dcoilwtico',Pipeline([('select', ItemSelector('dcoilwtico')),
                                ('fe', fe.ArbitraryNumberImputer(arbitrary_number=-1))])),
        ('transactions',Pipeline([('select', ItemSelector('transactions')),
                                ('fe', fe.ArbitraryNumberImputer(arbitrary_number=-1))])),
        ('unit_saleslag16',Pipeline([('select', ItemSelector('unit_saleslag16'))])),
        ('unit_saleslag17',Pipeline([('select', ItemSelector('unit_saleslag17'))])),
        ('unit_saleslag18',Pipeline([('select', ItemSelector('unit_saleslag18'))])),
        ('unit_saleslag19',Pipeline([('select', ItemSelector('unit_saleslag19'))])),
        ('unit_saleslag20',Pipeline([('select', ItemSelector('unit_saleslag20'))])),
        ('unit_saleslag21',Pipeline([('select', ItemSelector('unit_saleslag21'))])),
        ('unit_saleslag22',Pipeline([('select', ItemSelector('unit_saleslag22'))])),
        ('unit_saleslag30',Pipeline([('select', ItemSelector('unit_saleslag30'))])),
        ('unit_saleslag60',Pipeline([('select', ItemSelector('unit_saleslag60'))])),
        ('ramp_upHoliday',Pipeline([('select', ItemSelector('ramp_upHoliday'))])),
        ('ramp_downHoliday',Pipeline([('select', ItemSelector('ramp_downHoliday'))])),
        ('ramp_uponpromotion',Pipeline([('select', ItemSelector('ramp_uponpromotion'))])),
        ('ramp_downonpromotion',Pipeline([('select', ItemSelector('ramp_downonpromotion'))])),
        ('mean_7',Pipeline([('select', ItemSelector('mean_7'))])),
        ('std_7',Pipeline([('select', ItemSelector('std_7'))])),
        ('mean_30',Pipeline([('select', ItemSelector('mean_30'))])),
        ('std_30',Pipeline([('select', ItemSelector('std_30'))])),
        ('mean_60',Pipeline([('select', ItemSelector('mean_60'))])),
        ('std_60',Pipeline([('select', ItemSelector('std_60'))])),
    ], n_jobs=1)

    return vectorizer_tree

def create_feature_vectorizer_with_nan():
    vectorizer_tree = FeatureUnion([
        ('store_nbr',
         Pipeline([('select', CategoricalSelector('store_nbr')),
                   ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('item_nbr',
         Pipeline([('select', CategoricalSelector('item_nbr')),
                   ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('onpromotion',
         Pipeline([
             ('select', CategoricalSelector('onpromotion')),
             ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('entity',
         Pipeline(
             [('select', CategoricalSelector('entity')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Holiday',
         Pipeline(
             [('select', CategoricalSelector('Holiday')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('family',
         Pipeline(
             [('select', CategoricalSelector('family')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('class',
         Pipeline(
             [('select', CategoricalSelector('class')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('perishable',
         Pipeline(
             [('select', CategoricalSelector('perishable')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('city',
         Pipeline(
             [('select', CategoricalSelector('city')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('state',
         Pipeline(
             [('select', CategoricalSelector('state')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('type',
         Pipeline(
             [('select', CategoricalSelector('type')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('cluster',
         Pipeline(
             [('select', CategoricalSelector('cluster')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Year',
         Pipeline(
             [('select', CategoricalSelector('Year')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Month',
         Pipeline(
             [('select', CategoricalSelector('Month')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Week',
         Pipeline(
             [('select', CategoricalSelector('Week')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Day',
         Pipeline(
             [('select', CategoricalSelector('Day')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Dayofweek',
         Pipeline(
             [('select', CategoricalSelector('Dayofweek')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Dayofyear',
         Pipeline(
             [('select', CategoricalSelector('Dayofyear')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Is_month_end',
         Pipeline(
             [('select', CategoricalSelector('Is_month_end')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Is_month_start',
         Pipeline(
             [('select', CategoricalSelector('Is_month_start')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Is_quarter_end',
         Pipeline(
             [('select', CategoricalSelector('Is_quarter_end')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Is_quarter_start',
         Pipeline(
             [('select', CategoricalSelector('Is_quarter_start')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Is_year_end',
         Pipeline(
             [('select', CategoricalSelector('Is_year_end')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
        ('Is_year_start',
         Pipeline(
             [('select', CategoricalSelector('Is_year_start')),
              ('oe', ce.OrdinalEncoder(handle_missing="return_nan", handle_unknown='return_nan'))])),
                                       
       # cont columns
        ('dcoilwtico',Pipeline([('select', ItemSelector('dcoilwtico'))])),
        ('transactions',Pipeline([('select', ItemSelector('transactions'))])),
        ('unit_saleslag16',Pipeline([('select', ItemSelector('unit_saleslag16'))])),
        ('unit_saleslag17',Pipeline([('select', ItemSelector('unit_saleslag17'))])),
        ('unit_saleslag18',Pipeline([('select', ItemSelector('unit_saleslag18'))])),
        ('unit_saleslag19',Pipeline([('select', ItemSelector('unit_saleslag19'))])),
        ('unit_saleslag20',Pipeline([('select', ItemSelector('unit_saleslag20'))])),
        ('unit_saleslag21',Pipeline([('select', ItemSelector('unit_saleslag21'))])),
        ('unit_saleslag22',Pipeline([('select', ItemSelector('unit_saleslag22'))])),
        ('unit_saleslag30',Pipeline([('select', ItemSelector('unit_saleslag30'))])),
        ('unit_saleslag60',Pipeline([('select', ItemSelector('unit_saleslag60'))])),
        ('ramp_upHoliday',Pipeline([('select', ItemSelector('ramp_upHoliday'))])),
        ('ramp_downHoliday',Pipeline([('select', ItemSelector('ramp_downHoliday'))])),
        ('ramp_uponpromotion',Pipeline([('select', ItemSelector('ramp_uponpromotion'))])),
        ('ramp_downonpromotion',Pipeline([('select', ItemSelector('ramp_downonpromotion'))])),
        ('mean_7',Pipeline([('select', ItemSelector('mean_7'))])),
        ('std_7',Pipeline([('select', ItemSelector('std_7'))])),
        ('mean_30',Pipeline([('select', ItemSelector('mean_30'))])),
        ('std_30',Pipeline([('select', ItemSelector('std_30'))])),
        ('mean_60',Pipeline([('select', ItemSelector('mean_60'))])),
        ('std_60',Pipeline([('select', ItemSelector('std_60'))])),
    ], n_jobs=1)

    return vectorizer_tree

In [None]:
vectorizer_without_nan = create_feature_vectorizer_without_nan()
vectorizer_with_nan = create_feature_vectorizer_with_nan()

In [None]:
print(vectorizer_without_nan.fit_transform(df).shape)
print(vectorizer_with_nan.fit_transform(df).shape)

(2944549, 47)
(2944549, 45)


## 3 Model Application

In [None]:
TARGET = 'unit_sales'
target_transformer = "log1p" #1st place winning solution transformed also in log1p space
num_entities = df.entity.nunique() 
forecast_horizon = 16
group_ids = 'entity'

Splitting last 16 days for the test dataset and 16 days more for the validation dataset.

In [None]:
train_val_df = df.loc[df.date<"20170715"]
valid_df = df.loc[(df.date<"20170731") & (df.date>="20170715")]
train_df = df.loc[(df.date<"20170731")]
test_df = df.loc[df.date>="20170731"]

## 3.1 LightGBM

In [None]:
lightgbm_params = {
    "boosting_type": 'gbdt',
    "objective": 'regression',
    "n_jobs": -1, # num of cores available
    "min_split_gain": 0.0,
    "min_data_in_leaf": 1,
    "max_bin": 1024,
    "num_leaves": 64, # "max_leaves": 64,
    "max_depth": -1,
    "learning_rate": 0.1,
    "n_estimators": 1000,
    "feature_fraction": 0.7,
    "bagging_fraction": 0.7,
    "bagging_freq": 1,
    "seed": 1,
    "lambda": 1,
}

early_stopping_round = 20

start_time = time.perf_counter()

# fitting model on train set with early stopping on valid set
lightgbm_reg = LightGBM(vectorizer_with_nan, target_transformer=target_transformer)
lightgbm_fit_params = {**lightgbm_params, "early_stopping_round": early_stopping_round}
lightgbm_reg.fit(train_val_df, TARGET, X_val=valid_df, y_val=valid_df[TARGET], params=lightgbm_fit_params, verbose=True)
lightgbm_best_iteration = lightgbm_reg.best_iteration
print("Early stopping performed. Best iteration:", lightgbm_best_iteration)

# fitting model on train+val set with best_iteration
lightgbm_full_train_reg = LightGBM(vectorizer_with_nan, target_transformer=target_transformer)
lightgbm_full_train_params = {**lightgbm_params, "n_estimators": lightgbm_best_iteration}
lightgbm_full_train_reg.fit(train_df, TARGET, params=lightgbm_full_train_params, verbose=True)

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)

# predicting on test set with our fully trained model
lightgbm_pred = lightgbm_full_train_reg.predict(test_df)
lightgbm_metrics = lightgbm_full_train_reg.metrics(test_df[TARGET], lightgbm_pred)
lightgbm_metrics['time'] = full_time
lightgbm_metrics['rmsle'] = np.sqrt((( np.log1p(test_df[TARGET]) - np.log1p(lightgbm_pred[PredEnum.POINT_ESTIMATES]) )**2).mean())

[1]	valid_0's l2: 0.625509
[2]	valid_0's l2: 0.542085
[3]	valid_0's l2: 0.481812
[4]	valid_0's l2: 0.422787
[5]	valid_0's l2: 0.375107
[6]	valid_0's l2: 0.340915
[7]	valid_0's l2: 0.310631
[8]	valid_0's l2: 0.287077
[9]	valid_0's l2: 0.270754
[10]	valid_0's l2: 0.252195
[11]	valid_0's l2: 0.237963
[12]	valid_0's l2: 0.228619
[13]	valid_0's l2: 0.219683
[14]	valid_0's l2: 0.213411
[15]	valid_0's l2: 0.206719
[16]	valid_0's l2: 0.200759
[17]	valid_0's l2: 0.196439
[18]	valid_0's l2: 0.191201
[19]	valid_0's l2: 0.186941
[20]	valid_0's l2: 0.185299
[21]	valid_0's l2: 0.183451
[22]	valid_0's l2: 0.183426
[23]	valid_0's l2: 0.182801
[24]	valid_0's l2: 0.1816
[25]	valid_0's l2: 0.182236
[26]	valid_0's l2: 0.182239
[27]	valid_0's l2: 0.181268
[28]	valid_0's l2: 0.18032
[29]	valid_0's l2: 0.179405
[30]	valid_0's l2: 0.179229
[31]	valid_0's l2: 0.178696
[32]	valid_0's l2: 0.178264
[33]	valid_0's l2: 0.178646
[34]	valid_0's l2: 0.177428
[35]	valid_0's l2: 0.178491
[36]	valid_0's l2: 0.178259
[37]

In [None]:
lightgbm_metrics

Out[21]: {'mse': 9.744667586749113,
 'mae': 1.4488742733934084,
 'rmse': 3.1216450129297395,
 'mape': 299570476847171.8,
 'rmspe': 1528586428276296.5,
 'time': 22.59,
 'rmsle': 0.49697262726339236}

##3.2 Bootstrapping (Data Sampling)

##3.3 Quantile Regression

In [None]:
lightgbm_quant_params = {
    "boosting_type": 'gbdt',
    "objective": 'quantile',
    "n_jobs": -1, # num of cores available
    "min_split_gain": 0.0,
    "min_data_in_leaf": 1,
    "max_bin": 1024,
    "num_leaves": 64, # "max_leaves": 64,
    "max_depth": -1,
    "learning_rate": 0.1,
    "n_estimators": 1000,
    "feature_fraction": 0.7,
    "bagging_fraction": 0.7,
    "bagging_freq": 1, ########IT SHOULD BE SET IN EVERY TS NOTEBOOK LIKE REPL, TURNO, FAVORI, ROSSM FOR LGB NGB and PGB###################
    "seed": 1,
    "lambda": 1
}

early_stopping_round = 20
quantiles = [0.1, 0.5, 0.9]

start_time = time.perf_counter()

# fitting model on train set with early stopping on valid set
lightgbm_quant_reg = LightGBMQuantileRegressor(vectorizer_with_nan, target_transformer=target_transformer)
lightgbm_quant_fit_params = {**lightgbm_quant_params, "early_stopping_round": early_stopping_round}
lightgbm_quant_reg.fit(train_val_df, TARGET, X_val=valid_df, y_val=valid_df[TARGET], params=lightgbm_quant_fit_params, quantiles=quantiles, verbose=True)
lightgbm_quant_best_iteration = int(np.mean(list(lightgbm_quant_reg.best_iterations.values())))
print("Early stopping performed. Best iteration:", lightgbm_quant_best_iteration)

 # fitting model on train+val set with best_iteration
lightgbm_full_train_quant_reg = LightGBMQuantileRegressor(vectorizer_with_nan, target_transformer=target_transformer)
lightgbm_quant_full_train_params = {**lightgbm_quant_params, "n_estimators": lightgbm_quant_best_iteration}
lightgbm_full_train_quant_reg.fit(train_df, TARGET, params=lightgbm_quant_full_train_params, quantiles=quantiles, verbose=True)
lightgbm_full_train_quant_reg.best_iterations = lightgbm_quant_reg.best_iterations

# predicting on test set with our fully trained model
lightgbm_quant_pred = lightgbm_full_train_quant_reg.predict(test_df)
lightgbm_quant_metrics = lightgbm_full_train_quant_reg.metrics(test_df[TARGET], lightgbm_quant_pred, confidence_interval_quantiles = [0.1, 0.9])

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)
lightgbm_quant_metrics['time'] = full_time
lightgbm_quant_metrics['rmsle'] = np.sqrt((( np.log1p(test_df[TARGET]) - np.log1p(lightgbm_quant_pred[PredEnum.POINT_ESTIMATES]) )**2).mean())

[1]	valid_0's quantile: 0.0931991
[2]	valid_0's quantile: 0.0902572
[3]	valid_0's quantile: 0.0867108
[4]	valid_0's quantile: 0.0829177
[5]	valid_0's quantile: 0.0799785
[6]	valid_0's quantile: 0.0768833
[7]	valid_0's quantile: 0.0741146
[8]	valid_0's quantile: 0.0716478
[9]	valid_0's quantile: 0.0693986
[10]	valid_0's quantile: 0.067457
[11]	valid_0's quantile: 0.0656036
[12]	valid_0's quantile: 0.0639623
[13]	valid_0's quantile: 0.0625609
[14]	valid_0's quantile: 0.0611839
[15]	valid_0's quantile: 0.0600009
[16]	valid_0's quantile: 0.0589267
[17]	valid_0's quantile: 0.057978
[18]	valid_0's quantile: 0.0569821
[19]	valid_0's quantile: 0.0561747
[20]	valid_0's quantile: 0.0554884
[21]	valid_0's quantile: 0.0569074
[22]	valid_0's quantile: 0.0569081
[23]	valid_0's quantile: 0.0563937
[24]	valid_0's quantile: 0.0562239
[25]	valid_0's quantile: 0.0561608
[26]	valid_0's quantile: 0.0567022
[27]	valid_0's quantile: 0.0559221
[28]	valid_0's quantile: 0.0552548
[29]	valid_0's quantile: 0.0549

In [None]:
lightgbm_quant_metrics

Out[23]: {'mse': 10.467355762198327,
 'mae': 1.4338704623754797,
 'rmse': 3.2353293127900176,
 'mape': 263500096209026.03,
 'rmspe': 1672063133644776.2,
 'avg_interval_length': 3.558809584072579,
 'sharpness': 3.558809584072579,
 'coverage': 0.7525,
 'time': 39.77,
 'rmsle': 0.5040003637263841}

## 3.4 NGBoost

### 3.4.1 NGBoost with NLL

In [None]:
from ngboost.scores import LogScore, CRPScore
from sklearn.tree import DecisionTreeRegressor
from lightgbm import LGBMRegressor

ngboost_base_params = {
    'boosting': 'rf',
    'n_estimators': 1,
    'bagging_fraction': 0.99, #is not allowed to be 1.0 for RF
    'bagging_freq': 1 # is not allowed to be 0 for RF
}

learner = LGBMRegressor(**ngboost_base_params)

ngboost_nll_params = {
    'Score':LogScore, #CRPScore,
    'Base':learner, #default_tree_learner,
    'natural_gradient':True,
    "learning_rate": 0.1,
    "n_estimators": 1000,
    "col_sample": 0.7, # = feature_fraction
    "minibatch_frac": 0.7, # = bagging_fraction   
    "random_state": 1, # = seed
} 

early_stopping_round = 20
quantiles = [0.05, 0.1, 0.5, 0.9, 0.95]

start_time = time.perf_counter()

# fitting model on train set with early stopping on valid set
ngboost_nll_early_stopping_params = {**ngboost_nll_params, "early_stopping_rounds": early_stopping_round}
ngboost_nll_reg = NGBoost(vectorizer_without_nan, target_transformer=target_transformer, distribution=DistEnum.NORMAL, **ngboost_nll_early_stopping_params)
ngboost_nll_reg.fit(train_val_df, TARGET, X_val=valid_df, y_val=np.array(valid_df[TARGET]), verbose=True)
ngboost_nll_best_iteration = ngboost_nll_reg.best_iteration
print("Early stopping performed. Best iteration:", ngboost_nll_best_iteration)

# fitting model on train+val set with best_iteration
ngboost_nll_full_train_params = {**ngboost_nll_params, "n_estimators": ngboost_nll_best_iteration}
ngboost_nll_full_train_reg = NGBoost(vectorizer_without_nan, target_transformer=target_transformer, distribution=DistEnum.NORMAL, **ngboost_nll_full_train_params)
ngboost_nll_full_train_reg.fit(train_df, TARGET, verbose=True)

# predicting on test set with our fully trained model
ngboost_nll_pred = ngboost_nll_full_train_reg.predict(test_df, quantiles=quantiles, prediction_types=[PredEnum.POINT_ESTIMATES, PredEnum.QUANTILES, PredEnum.SAMPLES, PredEnum.DISTRIBUTION_PARAMS], sample_size=400)

ngboost_nll_metrics = ngboost_nll_full_train_reg.metrics(np.array(test_df[TARGET]), ngboost_nll_pred, confidence_interval_quantiles=[0.1,0.9])

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)
ngboost_nll_metrics['time'] = full_time
ngboost_nll_metrics['rmsle'] = np.sqrt((( np.log1p(test_df[TARGET]) - np.log1p(ngboost_nll_pred[PredEnum.POINT_ESTIMATES]) )**2).mean())

[iter 0] loss=1.3359 val_loss=1.2781 scale=1.0000 norm=0.9415
[LightGBM


agging_fraction is set=0.99, subsample=1.0 will be ignored. Current value: bagging_fraction=0.99
== Early stopping achieved.
== Best iteration / VAL28 (val_loss=0.4152)
Elapsed time for fitting NGBoost model: 304.07 s
Early stopping performed. Best iteration: 28
[iter 0] loss=1.3314 val_loss=0.0000 scale=1.0000 norm=0.9382
Elapsed time for fitting NGBoost model: 178.14 s
  nll_temp = torch.tensor([-dist[i].log_prob(torch.tensor(y_test[i])) for i in range(len(dist))])


In [None]:
ngboost_nll_metrics

Out[63]: {'mse': 10.038652982055439,
 'mae': 1.4261140214917156,
 'rmse': 3.168383338874171,
 'mape': 235535771319092.97,
 'rmspe': 842913698659230.9,
 'avg_interval_length': 3.2823714555301238,
 'sharpness': 3.2823714555301238,
 'coverage': 0.7875,
 'crps': 1.0863799344855578,
 'nll_from_samples': 12.274641166308252,
 'nll': 2.1313568753820213,
 'time': 486.9,
 'rmsle': 0.5019045654594531}

### 3.4.2 NGBoost with CRPS

In [None]:
from ngboost.scores import LogScore, CRPScore
from sklearn.tree import DecisionTreeRegressor
from lightgbm import LGBMRegressor

ngboost_crps_params = {
    'Score': CRPScore,
    'Base':learner, #default_tree_learner,
    'natural_gradient':True,
    "n_estimators": 1000,
    "col_sample": 0.7, # = feature_fraction
    "minibatch_frac": 0.7, # = bagging_fraction   
    "random_state": 1, # = seed
} 

early_stopping_round = 20
quantiles = [0.05, 0.1, 0.5, 0.9, 0.95]

start_time = time.perf_counter()

# fitting model on train set with early stopping on valid set
ngboost_crps_early_stopping_params = {**ngboost_crps_params, "early_stopping_rounds": early_stopping_round}
ngboost_crps_reg = NGBoost(vectorizer_without_nan, target_transformer=target_transformer, distribution=DistEnum.NORMAL, **ngboost_crps_early_stopping_params)
ngboost_crps_reg.fit(train_val_df, TARGET, X_val=valid_df, y_val=np.array(valid_df[TARGET]), verbose=True)
ngboost_crps_best_iteration = ngboost_crps_reg.best_iteration
print("Early stopping performed. Best iteration:", ngboost_crps_best_iteration)

# fitting model on train+val set with best_iteration
ngboost_crps_full_train_params = {**ngboost_crps_params, "n_estimators": ngboost_crpst_best_iteration}
ngboost_crps_full_train_reg = NGBoost(vectorizer_without_nan, target_transformer=target_transformer, distribution=DistEnum.NORMAL, **ngboost_crps_full_train_params)
ngboost_crps_full_train_reg.fit(train_df, TARGET, verbose=True)

# predicting on test set with our fully trained model
ngboost_crps_pred = ngboost_crps_full_train_reg.predict(test_df, quantiles=quantiles, prediction_types=[PredEnum.POINT_ESTIMATES, PredEnum.QUANTILES, PredEnum.SAMPLES, PredEnum.DISTRIBUTION_PARAMS], sample_size=400)

ngboost_crps_metrics = ngboost_crps_full_train_reg.metrics(np.array(test_df[TARGET]), ngboost_crps_pred, confidence_interval_quantiles=[0.1,0.9])

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)
ngboost_crps_metrics['time'] = full_time
ngboost_crps_metrics['rmsle'] = np.sqrt((( np.log1p(test_df[TARGET]) - np.log1p(ngboost_crps_pred[PredEnum.POINT_ESTIMATES]) )**2).mean())

[iter 0] loss=0.5259 val_loss=0.5288 scale=1.0000 norm=1.2463
[LightGBM


  self.scale = np.exp(params[1])
  self.var = self.scale**2
  Z = (Y - self.loc) / self.scale
  return np.exp(-x**2/2.0) / _norm_pdf_C
  return self.scale * (
  self.scale = np.exp(params[1])
  Z = (Y - self.loc) / self.scale
  return np.exp(-x**2/2.0) / _norm_pdf_C
  return self.scale * (
  self.var = self.scale**2
  self.scale = np.exp(params[1])
  Z = (Y - self.loc) / self.scale
  return np.exp(-x**2/2.0) / _norm_pdf_C
  return self.scale * (
  self.var = self.scale**2
  self.scale = np.exp(params[1])
  self.var = self.scale**2
  Z = (Y - self.loc) / self.scale
  return np.exp(-x**2/2.0) / _norm_pdf_C
  return self.scale * (
  Z = (Y - self.loc) / self.scale
  self.scale = np.exp(params[1])
  self.var = self.scale**2
  Z = (Y - self.loc) / self.scale
  return np.exp(-x**2/2.0) / _norm_pdf_C
  return self.scale * (
  self.scale = np.exp(params[1])
  self.var = self.scale**2
  Z = (Y - self.loc) / self.scale
  Z

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-228038575925240>[0m in [0;36m<cell line: 28>[0;34m()[0m
[1;32m     26[0m [0;34m[0m[0m
[1;32m     27[0m [0;31m# fitting model on train+val set with best_iteration[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0;32m---> 28[0;31m [0mngboost_crps_full_train_params[0m [0;34m=[0m [0;34m{[0m[0;34m**[0m[0mngboost_crps_params[0m[0;34m,[0m [0;34m"n_estimators"[0m[0;34m:[0m [0mngboos_crpst_best_iteration[0m[0;34m}[0m[0;34m[0m[0;34m[0m[0m
[0m[1;32m     29[0m [0mngboost_crps_full_train_reg[0m [0;34m=[0m [0mNGBoost[0m[0;34m([0m[0mvectorizer_without_nan[0m[0;34m,[0m [0mtarget_transformer[0m[0;34m=[0m[0mtarget_transformer[0m[0;34m,[0m [0mdistribution[0m[0;34m=[0m[0mDistEnum[0m[0;34m.[0m[0mNORMAL[0m[0;34m,[0m [0;34m**[0m[0mngboost_crps_full_tra

In [None]:
ngboost_crps_metrics

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-228038575925241>[0m in [0;36m<cell line: 1>[0;34m()[0m
[0;32m----> 1[0;31m [0mngboost_crps_metrics[0m[0;34m[0m[0;34m[0m[0m
[0m
[0;31mNameError[0m: name 'ngboost_crps_metrics' is not defined

## 3.5 PGBM

### 3.5.1 PGBM normal

In [None]:
pgbm_normal_params = {
    'derivatives': 'exact',
    'distribution': 'normal',
    'device': 'gpu',
    'gpu_device_id': 0,
    "n_jobs": -1, # num of cores available
    "min_split_gain": 0.0,
    "min_data_in_leaf": 1,
    "max_bin": 1024,
    "max_leaves": 64,
    "max_depth": -1,
    "learning_rate": 0.1,
    "n_estimators": 1000,
    "feature_fraction": 0.7,
    "bagging_fraction": 0.7,
    "seed": 1,
    "lambda": 1,
}

early_stopping_round = 20
quantiles = [0.05, 0.1, 0.5, 0.9, 0.95]

start_time = time.perf_counter()
    
# fitting model on train set with early stopping on valid set
pgbm_normal_fit_params = {**pgbm_normal_params, "early_stopping_round": early_stopping_round}
pgbm_normal_reg = PGBM(vectorizer_without_nan, target_transformer=target_transformer)
pgbm_normal_reg.fit(train_val_df, TARGET, X_val=valid_df, y_val=np.array(valid_df[TARGET]), params=pgbm_normal_fit_params, apply_optimize_distribution=False, verbose=True)
pgbm_normal_best_iteration = pgbm_normal_reg.best_iteration
print("Early stopping performed. Best iteration:", pgbm_normal_best_iteration)

# fitting model on train+val set with best_iteration
pgbm_normal_full_fit_params = {**pgbm_normal_params, "n_estimators": pgbm_normal_best_iteration}
pgbm_normal_full_train_reg = PGBM(vectorizer_without_nan, target_transformer=target_transformer)
pgbm_normal_full_train_reg.fit(train_df, TARGET, params=pgbm_normal_full_fit_params, apply_optimize_distribution=False, verbose=True)

# predicting on test set with our fully trained model
pgbm_normal_pred = pgbm_normal_full_train_reg.predict(test_df, quantiles=quantiles, prediction_types=[PredEnum.POINT_ESTIMATES, PredEnum.QUANTILES, PredEnum.SAMPLES, PredEnum.DISTRIBUTION_PARAMS], sample_size=300)

pgbm_normal_metrics = pgbm_normal_full_train_reg.metrics(np.array(test_df[TARGET]), pgbm_normal_pred, confidence_interval_quantiles=[0.1,0.9])

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)
pgbm_normal_metrics['time'] = full_time
pgbm_normal_metrics['rmsle'] = np.sqrt((( np.log1p(test_df[TARGET]) - np.log1p(pgbm_normal_pred[PredEnum.POINT_ESTIMATES]) )**2).mean())

Training on GPU
Estimator 0/1000, Train metric: 0.8545, Validation metric: 0.7618
Estimator 1/1000, Train metric: 0.8134, Validation metric: 0.7336
Estimator 2/1000, Train metric: 0.7630, Validation metric: 0.6859
Estimator 3/1000, Train metric: 0.7181, Validation metric: 0.6423
Estimator 4/1000, Train metric: 0.6917, Validation metric: 0.6240
Estimator 5/1000, Train metric: 0.6691, Validation metric: 0.6092
Estimator 6/1000, Train metric: 0.6372, Validation metric: 0.5766
Estimator 7/1000, Train metric: 0.6199, Validation metric: 0.5659
Estimator 8/1000, Train metric: 0.6050, Validation metric: 0.5574
Estimator 9/1000, Train metric: 0.5813, Validation metric: 0.5337
Estimator 10/1000, Train metric: 0.5687, Validation metric: 0.5272
Estimator 11/1000, Train metric: 0.5487, Validation metric: 0.5094
Estimator 12/1000, Train metric: 0.5382, Validation metric: 0.5042
Estimator 13/1000, Train metric: 0.5216, Validation metric: 0.4888
Estimator 14/1000, Train metric: 0.5070, Validation metr

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-228038575925243>[0m in [0;36m<cell line: 44>[0;34m()[0m
[1;32m     42[0m [0mend_time[0m [0;34m=[0m [0mtime[0m[0;34m.[0m[0mperf_counter[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[1;32m     43[0m [0mfull_time[0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0mround[0m[0;34m([0m[0mend_time[0m [0;34m-[0m [0mstart_time[0m[0;34m,[0m [0;36m2[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;32m---> 44[0;31m [0mpgbm_normal_metrics[0m[0;34m[[0m[0;34m'time'[0m[0;34m][0m [0;34m=[0m [0mfull_time[0m[0;34m[0m[0;34m[0m[0m
[0m[1;32m     45[0m [0mpgbm_normal_metrics[0m[0;34m[[0m[0;34m'rmsle'[0m[0;34m][0m [0;34m=[0m [0mnp[0m[0;34m.[0m[0msqrt[0m[0;34m([0m[0;34m([0m[0;34m([0m [0mnp[0m[0;34m.[0m[0mlog1p[0m[0;34m([0m[0mtest_df[0m[0;34m[[

In [None]:
pgbm_normal_metrics

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-228038575925245>[0m in [0;36m<cell line: 1>[0;34m()[0m
[0;32m----> 1[0;31m [0mpgbm_normal_metrics[0m[0;34m[0m[0;34m[0m[0m
[0m
[0;31mNameError[0m: name 'pgbm_normal_metrics' is not defined

### 3.5.2 PGBM best dist.

In [None]:
pgbm_dist_params = {
    'derivatives': 'exact',
    'distribution': 'normal',
    'device': 'gpu',
    'gpu_device_id': 0,
    "n_jobs": -1, # num of cores available
    "min_split_gain": 0.0,
    "min_data_in_leaf": 1,
    "max_bin": 1024,
    "max_leaves": 64,
    "max_depth": -1,
    "learning_rate": 0.1,
    "n_estimators": 1000,
    "feature_fraction": 0.7,
    "bagging_fraction": 0.7,
    "seed": 1,
    "lambda": 1,
}

early_stopping_round = 20
quantiles = [0.05, 0.1, 0.5, 0.9, 0.95]

start_time = time.perf_counter()
    
# fitting model on train set with early stopping on valid set
pgbm_dist_fit_params = {**pgbm_dist_params, "early_stopping_round": early_stopping_round}
pgbm_dist_reg = PGBM(vectorizer_without_nan, target_transformer=target_transformer)
pgbm_dist_reg.fit(train_val_df, TARGET, X_val=valid_df, y_val=np.array(valid_df[TARGET]), params=pgbm_dist_fit_params, apply_optimize_distribution=True, verbose=True)
pgbm_dist_best_iteration = pgbm_dist_reg.best_iteration
print("Early stopping performed. Best iteration:", pgbm_dist_best_iteration)
pgbm_dist_best_distribution = pgbm_dist_reg.model.distribution
pgbm_dist_best_tree_correlation = pgbm_dist_reg.model.tree_correlation

# fitting model on train+val set with best_iteration
pgbm_dist_full_fit_params = {**pgbm_dist_params, "n_estimators": pgbm_dist_best_iteration}
pgbm_dist_full_train_reg = PGBM(vectorizer_without_nan, target_transformer=target_transformer)
pgbm_dist_full_train_reg.fit(train_df, TARGET, params=pgbm_dist_full_fit_params, apply_optimize_distribution=False, verbose=True)

# predicting on test set with our fully trained model
pgbm_dist_full_train_reg.model.distribution = pgbm_dist_best_distribution
pgbm_dist_full_train_reg.model.tree_correlation = pgbm_dist_best_tree_correlation
pgbm_dist_pred = pgbm_dist_full_train_reg.predict(test_df, quantiles=quantiles, prediction_types=[PredEnum.POINT_ESTIMATES, PredEnum.QUANTILES, PredEnum.SAMPLES, PredEnum.DISTRIBUTION_PARAMS], sample_size=300)

pgbm_dist_metrics = pgbm_dist_full_train_reg.metrics(np.array(test_df[TARGET]), pgbm_dist_pred, confidence_interval_quantiles=[0.1,0.9])

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)
pgbm_dist_metrics['time'] = full_time
pgbm_dist_metrics['rmsle'] = np.sqrt((( np.log1p(test_df[TARGET]) - np.log1p(pgbm_dist_pred[PredEnum.POINT_ESTIMATES]) )**2).mean())

Training on GPU
Estimator 0/1000, Train metric: 0.8545, Validation metric: 0.7618
Estimator 1/1000, Train metric: 0.8134, Validation metric: 0.7336
Estimator 2/1000, Train metric: 0.7630, Validation metric: 0.6859
Estimator 3/1000, Train metric: 0.7181, Validation metric: 0.6423
Estimator 4/1000, Train metric: 0.6917, Validation metric: 0.6240
Estimator 5/1000, Train metric: 0.6691, Validation metric: 0.6092
Estimator 6/1000, Train metric: 0.6372, Validation metric: 0.5766
Estimator 7/1000, Train metric: 0.6199, Validation metric: 0.5659
Estimator 8/1000, Train metric: 0.6050, Validation metric: 0.5574
Estimator 9/1000, Train metric: 0.5813, Validation metric: 0.5337
Estimator 10/1000, Train metric: 0.5687, Validation metric: 0.5272
Estimator 11/1000, Train metric: 0.5487, Validation metric: 0.5095
Estimator 12/1000, Train metric: 0.5382, Validation metric: 0.5042
Estimator 13/1000, Train metric: 0.5215, Validation metric: 0.4886
Estimator 14/1000, Train metric: 0.5070, Validation metr

In [None]:
pgbm_dist_metrics

Out[31]: {'mse': 9.824390287359057,
 'mae': 1.4190316974752932,
 'rmse': 3.1343883434187054,
 'mape': 311193145519636.9,
 'rmspe': 1202240175802866.8,
 'avg_interval_length': 4.181321862150668,
 'sharpness': 4.181321862150668,
 'coverage': 0.53,
 'crps': 373377.4313427603,
 'nll_from_samples': 1.4241702424190785,
 'nll': nan,
 'time': 38.5,
 'rmsle': 0.4823307999100894}

## 3.6 LSF

In [None]:
lsf_params = {'min_bin_size': np.log(len(train_df))**2}

base_estimator = lightgbm_full_train_reg.model
quantiles = [0.05, 0.1, 0.5, 0.9, 0.95]

start_time = time.perf_counter()
    
# fitting model on train set with early stopping on valid set
# since we cannot apply validation we are only training on the full train dataset once
lsf_reg = LSF(vectorizer_with_nan, target_transformer=target_transformer, base_model=base_estimator, model_trained=True, **lsf_params)
lsf_reg.fit(train_df, TARGET, verbose=True)

# predicting on test set with our fully trained model
lsf_pred = lsf_reg.predict(test_df, quantiles=[0.05, 0.1, 0.5, 0.9, 0.95], prediction_types=[PredEnum.POINT_ESTIMATES, PredEnum.QUANTILES, PredEnum.SAMPLES])
lsf_metrics = lsf_reg.metrics(np.array(test_df[TARGET]), lsf_pred, prediction_types=[PredEnum.POINT_ESTIMATES, PredEnum.QUANTILES], confidence_interval_quantiles=[0.1,0.9])
lsf_metrics['rmsle'] = np.sqrt((( np.log1p(test_df[TARGET]) - np.log1p(lsf_pred[PredEnum.POINT_ESTIMATES]) )**2).mean())

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)
lsf_metrics['time'] = full_time

Elapsed time for fitting LSF model: 7.26 s


[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-228038575925250>[0m in [0;36m<cell line: 14>[0;34m()[0m
[1;32m     12[0m [0;34m[0m[0m
[1;32m     13[0m [0;31m# predicting on test set with our fully trained model[0m[0;34m[0m[0;34m[0m[0;34m[0m[0m
[0;32m---> 14[0;31m [0mlsf_pred[0m [0;34m=[0m [0mlsf_reg[0m[0;34m.[0m[0mpredict[0m[0;34m([0m[0msmall_test_df[0m[0;34m,[0m [0mquantiles[0m[0;34m=[0m[0;34m[[0m[0;36m0.05[0m[0;34m,[0m [0;36m0.1[0m[0;34m,[0m [0;36m0.5[0m[0;34m,[0m [0;36m0.9[0m[0;34m,[0m [0;36m0.95[0m[0;34m][0m[0;34m,[0m [0mprediction_types[0m[0;34m=[0m[0;34m[[0m[0mPredEnum[0m[0;34m.[0m[0mPOINT_ESTIMATES[0m[0;34m,[0m [0mPredEnum[0m[0;34m.[0m[0mQUANTILES[0m[0;34m,[0m [0mPredEnum[0m[0;34m.[0m[0mSAMPLES[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m


In [None]:
lsf_metrics

[0;31m---------------------------------------------------------------------------[0m
[0;31mNameError[0m                                 Traceback (most recent call last)
[0;32m<command-228038575925251>[0m in [0;36m<cell line: 1>[0;34m()[0m
[0;32m----> 1[0;31m [0mlsf_metrics[0m[0;34m[0m[0;34m[0m[0m
[0m
[0;31mNameError[0m: name 'lsf_metrics' is not defined

## 3.7 TFT in PytorchFC

### 3.7.1 Prepare Splits for NN Model

For this Neural Network Model the target it not only inferred by the features in the same row, but also by features of "previous" rows as they will be encoded to better predict the upcoming values.
We do not only need to provide the corresponding rows for the target in our forecast horizon , but also the previous features and target values in lookback length in order to predict. This means that we do not split our dataframe as for tabular data.
We create a `full_train_df`, that contains information from start until holdout DATE (train_val split will be done internally in fit method) and a `full_test_df` which contains information from start until end of holdout DATE.

In [None]:
df[cat_vars].astype('category')
full_train_df = df[df['date'] < "2017-07-31"].sort_values(['entity', 'date'])
full_test_df = df.copy()
display(full_train_df.tail())

date,entity,store_nbr,item_nbr,unit_sales,onpromotion,Holiday,family,class,perishable,city,state,type,cluster,dcoilwtico,transactions,promo_missing,unit_saleslag16,unit_saleslag17,unit_saleslag18,unit_saleslag19,unit_saleslag20,unit_saleslag21,unit_saleslag22,unit_saleslag30,unit_saleslag60,mean_7,std_7,mean_30,std_30,mean_60,std_60,ramp_upHoliday,ramp_downHoliday,ramp_uponpromotion,ramp_downonpromotion,dcoilwtico_na,transactions_na,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed
2017-07-26T00:00:00.000+0000,174677,9,996122,0.0,1,0,3,1124,0,18,12,1,5,48.58,1720.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0333333333333333,0.1825741947637432,0.35,0.9356408113901524,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2017,7,30,26,2,207,False,False,False,False,False,False,1501027200.0
2017-07-27T00:00:00.000+0000,174677,9,996122,0.0,1,0,3,1124,0,18,12,1,5,49.05,1717.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0333333333333333,0.1825741947637432,0.35,0.9356408113901524,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2017,7,30,27,3,208,False,False,False,False,False,False,1501113600.0
2017-07-28T00:00:00.000+0000,174677,9,996122,0.0,1,0,3,1124,0,18,12,1,5,49.72,1790.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0333333333333333,0.1825741947637432,0.35,0.9356408113901524,0.0,0.0,0.370408163265306,0.370408163265306,False,False,2017,7,30,28,4,209,False,False,False,False,False,False,1501200000.0
2017-07-29T00:00:00.000+0000,174677,9,996122,0.0,1,0,3,1124,0,18,12,1,5,,2113.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0333333333333333,0.1825741947637432,0.3166666666666666,0.9111673764055632,0.0,0.0,0.370408163265306,0.370408163265306,True,False,2017,7,30,29,5,210,False,False,False,False,False,False,1501286400.0
2017-07-30T00:00:00.000+0000,174677,9,996122,0.0,1,0,3,1124,0,18,12,1,5,,1884.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0333333333333333,0.1825741947637432,0.3166666666666666,0.9111673764055632,0.0,0.0,0.370408163265306,0.370408163265306,True,False,2017,7,30,30,6,211,False,False,False,False,False,False,1501372800.0


Does not allow for missing values: 664242 (31.37%) of dcoilwtico values were found to be NA or infinite (even after encoding). Possible strategies to fix the issue are (a) dropping the variable dcoilwtico, (b) using `NaNLabelEncoder(add_nan=True)` for categorical variables, (c) filling missing values and/or (d) optionally adding a variable indicating filled values

In [None]:
number_imputer = fe.ArbitraryNumberImputer(arbitrary_number=-1)
full_train_df = number_imputer.fit_transform(full_train_df)
full_test_df = number_imputer.fit_transform(full_test_df)

Rename type column because its name caused issues

In [None]:
full_train_df['store_type'] = full_train_df['type']
full_test_df['store_type'] = full_test_df['type']

### 3.7.2 Start Training NN Model

In [None]:
from pytorch_lightning import Trainer as Lightning_Trainer
from pytorch_lightning.callbacks import EarlyStopping
from pytorch_forecasting.data.encoders import TorchNormalizer, NaNLabelEncoder

In [None]:
tft_pytorch_params = {
    'hidden_size': 240, #default 16 paper 240 for Favorita
    'lstm_layers': 2, #default "mostly optimal"
    'dropout': 0.1, #default and paper
    'attention_head_size': 4, #paper used 4
    'learning_rate': 0.001, #default and paper
    'log_interval': -1,
    'log_val_interval': -1,
    'reduce_on_plateau_patience': 1000,  # default learning rate reduction by factor 10
}

trainer_params = {'max_epochs': 1,
                  'accelerator': 'gpu',
                  'devices': 1,
                  'limit_train_batches': 100, # How much of training dataset to check
                  'gradient_clip_algorithm': 'norm', # gradient clipping by norm,
                  'gradient_clip_val': 100 # according to paper max gradient norm 100 
                 }

params_dataloader = {
    'num_workers': 8,
    'batch_size': 128
}

static_cat_vars = ['store_nbr', 'item_nbr', 'entity', 'family', 'class', 'perishable',
       'city', 'state', 'store_type', 'cluster'] 
dynamic_cat_vars = ['onpromotion', 'Holiday', 'Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start', 'Is_year_end', 'Is_year_start', 'dcoilwtico_na', 'transactions_na']
cont_vars = ['dcoilwtico', 'transactions','ramp_upHoliday',
       'ramp_downHoliday', 'ramp_uponpromotion', 'ramp_downonpromotion']

# bring categorical features to type 'str'
full_train_df[static_cat_vars+dynamic_cat_vars] = full_train_df[static_cat_vars+dynamic_cat_vars].astype(str)

full_test_df[static_cat_vars+dynamic_cat_vars] = full_test_df[static_cat_vars+dynamic_cat_vars].astype(str)

# add_time_idx_to_df()
full_train_df = TFTPytorchFC.add_time_idx_to_df(X=full_train_df, group_ids=group_ids)
full_test_df = TFTPytorchFC.add_time_idx_to_df(X=full_test_df, group_ids=group_ids)

# obtain_y_test_out_of_X_test()
tft_y_test = TFTPytorchFC.obtain_y_test_out_of_X_test(X_test=full_test_df, forecast_horizon=forecast_horizon, time_idx="time_index_tft", target=TARGET, group_ids=group_ids)

Time index called "time_index_tft" added to provided dataframe
Time index called "time_index_tft" added to provided dataframe


In [None]:
quantiles = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95]

start_time = time.perf_counter()

lookback = forecast_horizon*3

trainer_params['callbacks'] = EarlyStopping(monitor="val_loss", patience=10, mode="min")
lightning_trainer = Lightning_Trainer(**trainer_params)

tft_reg = TFTPytorchFC(lookback=lookback, forecast_horizon=forecast_horizon, time_idx="time_index_tft", group_ids=[group_ids], static_categoricals=static_cat_vars, time_varying_known_categoricals=dynamic_cat_vars, time_varying_known_reals=cont_vars, time_varying_unknown_reals = [TARGET], quantiles = [0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95])

tft_model_trained = tft_reg.fit(full_train_df, TARGET, lightning_trainer=lightning_trainer, params_tft=tft_pytorch_params, params_dataloader=params_dataloader, params_dataset_creation={}, verbose = True)

tft_pred = tft_reg.predict(full_test_df, prediction_types=[PredEnum.POINT_ESTIMATES, PredEnum.QUANTILES])
tft_metrics = tft_reg.metrics(tft_y_test, tft_pred, confidence_interval_quantiles=[0.1,0.9])
tft_metrics['rmsle'] = np.sqrt((( np.log1p(np.reshape(full_test_df[TARGET], newshape=(-1))) - np.log1p(np.reshape(tft_pred[PredEnum.POINT_ESTIMATES], newshape=(-1))) )**2).mean())

end_time = time.perf_counter()
full_time = np.round(end_time - start_time, 2)
lsf_metrics['time'] = full_time

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(
  rank_zero_warn(
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name                               | Type                            | Params
----------------------------------------------------------------------------------------
0  | loss                               | QuantileLoss                    | 0     
1  | logging_metrics                    | ModuleList                      | 0     
2  | input_embeddings                   | MultiEmbedding                  | 804 K 
3  | prescalers                         | ModuleDict                      | 128   
4  | static_variable_selection          | VariableSelectionNetwork        | 10.0 K
5  | encoder_variable_selection         | VariableSelectionNetwork        | 66.1 K
6  | decoder_variable_selection         | VariableSelectionNetwork        | 59.8 K
7  | static_cont

Sanity Checking: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

`Trainer.fit` stopped: `max_epochs=1` reached.
Elapsed time for fitting TFTPytorchFC model: 516.55 s


[0;31m---------------------------------------------------------------------------[0m
[0;31mValueError[0m                                Traceback (most recent call last)
[0;32m<command-228038575925263>[0m in [0;36m<cell line: 16>[0;34m()[0m
[1;32m     14[0m [0mtft_pred[0m [0;34m=[0m [0mtft_reg[0m[0;34m.[0m[0mpredict[0m[0;34m([0m[0mfull_test_df[0m[0;34m,[0m [0mprediction_types[0m[0;34m=[0m[0;34m[[0m[0mPredEnum[0m[0;34m.[0m[0mPOINT_ESTIMATES[0m[0;34m,[0m [0mPredEnum[0m[0;34m.[0m[0mQUANTILES[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[1;32m     15[0m [0mtft_metrics[0m [0;34m=[0m [0mtft_reg[0m[0;34m.[0m[0mmetrics[0m[0;34m([0m[0mtft_y_test[0m[0;34m,[0m [0mtft_pred[0m[0;34m,[0m [0mconfidence_interval_quantiles[0m[0;34m=[0m[0;34m[[0m[0;36m0.1[0m[0;34m,[0m[0;36m0.9[0m[0;34m][0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;32m---> 16[0;31m [0mtft_metrics[0m[0;34m[[0m[0;34m'rmsle'[0m[0;34m][0m [0;

In [None]:
tft_y_test

Out[58]: array([[ 0.,  4.,  2., ...,  1.,  1.,  0.],
       [ 3.,  4.,  3., ...,  3.,  2.,  1.],
       [ 3.,  5.,  1., ..., 14.,  3.,  7.],
       ...,
       [ 1.,  1.,  0., ...,  0.,  0.,  3.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [None]:
tft_pred[PredEnum.POINT_ESTIMATES].shape

Out[59]: (7417, 16)

In [None]:
full_train_df

Unnamed: 0,date,entity,store_nbr,item_nbr,unit_sales,onpromotion,Holiday,family,class,perishable,city,state,type,cluster,dcoilwtico,transactions,promo_missing,unit_saleslag16,unit_saleslag17,unit_saleslag18,unit_saleslag19,unit_saleslag20,unit_saleslag21,unit_saleslag22,unit_saleslag30,unit_saleslag60,mean_7,std_7,mean_30,std_30,mean_60,std_60,ramp_upHoliday,ramp_downHoliday,ramp_uponpromotion,ramp_downonpromotion,dcoilwtico_na,transactions_na,Year,Month,Week,Day,Dayofweek,Dayofyear,Is_month_end,Is_month_start,Is_quarter_end,Is_quarter_start,Is_year_end,Is_year_start,Elapsed,store_type,time_index_tft
1322407,2016-07-15,104367,41,1001305,0.0,1,0,12,1016,0,13,4,3,3,45.930000,995.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.923732e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,False,False,2016,7,28,15,4,197,False,False,False,False,False,False,1.468541e+09,3,0
1322408,2016-07-16,104367,41,1001305,0.0,1,0,12,1016,0,13,4,3,3,-1.000000,1287.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.923732e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,True,False,2016,7,28,16,5,198,False,False,False,False,False,False,1.468627e+09,3,1
1322409,2016-07-17,104367,41,1001305,0.0,1,0,12,1016,0,13,4,3,3,-1.000000,1376.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.923732e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,True,False,2016,7,28,17,6,199,False,False,False,False,False,False,1.468714e+09,3,2
1322410,2016-07-18,104367,41,1001305,0.0,1,0,12,1016,0,13,4,3,3,45.230000,1015.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.923732e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,False,False,2016,7,29,18,0,200,False,False,False,False,False,False,1.468800e+09,3,3
1322411,2016-07-19,104367,41,1001305,0.0,1,0,12,1016,0,13,4,3,3,44.639999,960.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.923732e-08,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,False,False,2016,7,29,19,1,201,False,False,False,False,False,False,1.468886e+09,3,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1322386,2017-07-26,94740,38,996613,0.0,1,0,24,2372,1,12,8,3,3,48.580002,1405.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,False,False,2017,7,30,26,2,207,False,False,False,False,False,False,1.501027e+09,3,376
1322387,2017-07-27,94740,38,996613,0.0,1,0,24,2372,1,12,8,3,3,49.049999,1289.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,False,False,2017,7,30,27,3,208,False,False,False,False,False,False,1.501114e+09,3,377
1322388,2017-07-28,94740,38,996613,0.0,1,0,24,2372,1,12,8,3,3,49.720001,1696.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,False,False,2017,7,30,28,4,209,False,False,False,False,False,False,1.501200e+09,3,378
1322389,2017-07-29,94740,38,996613,0.0,1,0,24,2372,1,12,8,3,3,-1.000000,1968.0,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.000000e+00,0.0,0.0,0.0,0.0,0.0,0.0,0.370408,0.370408,True,False,2017,7,30,29,5,210,False,False,False,False,False,False,1.501286e+09,3,379
