In [1]:
import pandas as pd
import numpy as np
#from tqdm import tqdm_notebook as tqdm
from tqdm import tqdm
import time
from time import sleep

def get_distance_from_paydays(date):
    end_of_month = date.daysinmonth
    distance_to_1st = 0 if date.day >=15 else 15 - date.day
    distance_to15th = 0 if date.day < 15 else end_of_month - date.day
    return distance_to_1st + distance_to15th

def std(x): return np.std(x)


In [None]:
df_train = pd.read_csv('../../dataset/train/train_converted.csv')
df_test  = pd.read_csv('../../dataset/test/test_converted.csv')


In [None]:
df_train.head(2)

In [None]:
df_test.head(2)

In [None]:
df_train = df_train[['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','Z_WEEK','Z_WEEK_DATE','Demanda']].groupby(['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','Z_WEEK','Z_WEEK_DATE']).sum().reset_index()
df_test = df_test[['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','Z_WEEK','Z_WEEK_DATE','Demanda']].groupby(['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','Z_WEEK','Z_WEEK_DATE']).sum().reset_index()
df_train.replace(['',np.inf, -np.inf, np.nan],0,inplace=True)
df_test.replace(['',np.inf, -np.inf, np.nan],0,inplace=True)


In [None]:


print('Creating date_block_num ...')
N_submission = df_test.shape[0]
N_sales      = df_train.shape[0]

print(df_train.shape,df_test.shape)
df_auxiliar = pd.concat([df_train,df_test])
df_auxiliar.replace([np.inf, -np.inf, np.nan],0,inplace=True)


dates = df_auxiliar['Z_WEEK'].unique()

date = df_auxiliar['Z_WEEK'].min()
maxi = df_auxiliar['Z_WEEK'].max()


dict_dates = {}
for idx,date in enumerate(dates):
    dict_dates[date] =idx
    
    
df_auxiliar['date_block_num'] = df_auxiliar['Z_WEEK'].replace(dict_dates)

df_train, df_test = df_auxiliar[:N_sales], df_auxiliar[N_sales:]
print(df_train.shape,df_test.shape)

df_train.replace(['',np.inf, -np.inf, np.nan],0,inplace=True)
df_test.replace(['',np.inf, -np.inf, np.nan],0,inplace=True)

print('Creating date_block_num completed!')


print('Preprocessing TRAINING DATASET ...')


df_train['Z_WEEK_DATE'] = pd.to_datetime(df_train['Z_WEEK_DATE'])
df_train['days_from_payday'] = df_train['Z_WEEK_DATE'].apply(get_distance_from_paydays)


statistics_columns = [ ]

bar1 = tqdm([
    ['Z_MODELO'],
    ['Z_PUNTO_VENTA'],
    ['Z_GAMA'],
    ['Z_MODELO','Z_PUNTO_VENTA'],
    ['Z_MODELO','Z_GAMA'],
    ['Z_PUNTO_VENTA','Z_GAMA'],
    ['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA']], position=0, desc="i",colour='green', ncols=80)
time.sleep(1)

bar2 = tqdm(['mean','std','max','min'], position=1, desc="j", colour='red', ncols=80, leave=False)
time.sleep(1)

unique_columns = [ ]
        
for column_names in bar1:
    bar1.update()
    bar2.refresh()  #force print final state
    bar2.reset()  #reuse bar
    time.sleep(0.1)
    for statistic in bar2:
        
        new_column_name = statistic+'_sales_by_'+'_'.join(column_names)
        #df_train[new_column_name] = df_train.groupby(["Z_WEEK_DATE"]+column_names, observed=True).Demanda.transform(statistic)
        if statistic == 'mean':
            df_agg = df_train.groupby(["Z_WEEK"]+column_names, observed=True).Demanda.mean()
        if statistic == 'std':
            df_agg = df_train.groupby(["Z_WEEK"]+column_names, observed=True).Demanda.std(ddof=0)
        if statistic == 'max':
            df_agg = df_train.groupby(["Z_WEEK"]+column_names, observed=True).Demanda.max()
        if statistic == 'min':
            df_agg = df_train.groupby(["Z_WEEK"]+column_names, observed=True).Demanda.min()
        if df_agg.shape[0] >= df_train.shape[0]*0.7:
            unique_columns.append([["Z_WEEK"]+column_names,new_column_name])
            continue
        
        df_agg = df_agg.reset_index()
        df_agg.columns = df_agg.columns.str.replace('Demanda', new_column_name)
        
        df_train = df_train.merge(df_agg,on=["Z_WEEK"]+column_names,how='left')
        statistics_columns.append(new_column_name)
        bar2.update()
        time.sleep(0.05)
    
df_train['dayofweek'] = df_train['Z_WEEK_DATE'].dt.dayofweek.astype('str').astype('category')
df_train['month'] = df_train['Z_WEEK_DATE'].dt.month.astype('str').astype('category')
df_train['dayofyear'] = df_train['Z_WEEK_DATE'].dt.dayofyear.astype('str').astype('category')

df_train.drop(columns=['Z_WEEK_DATE'],inplace=True)
df_train.drop(columns=['Z_WEEK'],inplace=True)

print('Preprocessing TRAINING DATASET COMPLETED!')
print('Preprocessing TESTING DATASET ...')


df_test['Z_WEEK_DATE'] = pd.to_datetime(df_test['Z_WEEK_DATE'])
df_test['days_from_payday'] = df_test['Z_WEEK_DATE'].apply(get_distance_from_paydays)

inv_dict_dates = {v: k for k, v in dict_dates.items()}
#df_test['Z_WEEK'] = df_test['date_block_num'].map(inv_dict_dates)
df_test = df_test[['date_block_num','Z_MODELO','Z_PUNTO_VENTA','Z_GAMA',"Demanda","Z_WEEK_DATE"]]

df_test['dayofweek'] = df_test['Z_WEEK_DATE'].dt.dayofweek.astype('str').astype('category')
df_test['month'] = df_test['Z_WEEK_DATE'].dt.month.astype('str').astype('category')
df_test['dayofyear'] = df_test['Z_WEEK_DATE'].dt.dayofyear.astype('str').astype('category')



df_test['days_from_payday'] = df_test['Z_WEEK_DATE'].apply(get_distance_from_paydays)
df_test.drop(columns=['Z_WEEK_DATE'],inplace=True)

print('Preprocessing TESTING DATASET COMPLETED!')


In [None]:
df_train.groupby(["date_block_num"]+['Z_MODELO','Z_GAMA'], observed=True).Demanda.std(ddof=0)

In [None]:
unique_columns

In [None]:
df_train.info(verbose=True, null_counts=True)

In [None]:
print(df_train.shape)
print(list(df_train.columns))
df_train.head(2)

In [None]:
df_train.shape

In [None]:
print('statistics_columns : ',len(statistics_columns))
print(df_test.shape)
print(list(df_test.columns))
df_test.head(2)

In [None]:
df_train[['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','date_block_num','Demanda']][(df_train['Z_MODELO']=='MOD_10')&(df_train['date_block_num']==0)]

In [None]:
df_train["Z_MODELO"].value_counts()

In [None]:
df_train["Z_GAMA"].value_counts()

In [None]:
for z_modelo in list(df_train["Z_MODELO"].value_counts().index.values)[:10]+['MOD_102']:
    aux = df_train[(df_train['Z_MODELO']==z_modelo)]['Z_GAMA'].value_counts()
    print('Z_MODELO = ',z_modelo,' # GAMAS =',aux.shape[0])

In [None]:
for z_modelo in list(df_train["Z_MODELO"].value_counts().index.values)[:10]+['MOD_102']:
    aux = df_train[(df_train['Z_MODELO']==z_modelo)]['Z_PUNTO_VENTA'].value_counts()
    print('Z_MODELO = ',z_modelo,' # Z_PUNTO_VENTA =',aux.shape[0])

In [None]:
df_train[['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','date_block_num','Demanda']][(df_train['Z_MODELO']=='MOD_10')]['Z_GAMA'].value_counts()

In [None]:
aux = df_train["Z_MODELO"].astype(str) + df_train["Z_GAMA"]
aux.value_counts()

In [None]:
df_train[['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','date_block_num','Demanda']][(df_train['Z_MODELO']=='MOD_10')&(df_train['Z_PUNTO_VENTA']=='PVENT_1')]

In [None]:
df_train[['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','date_block_num','Demanda']][(df_train['Z_MODELO']=='MOD_10')&(df_train['Z_PUNTO_VENTA']=='PVENT_1')&(df_train['date_block_num']==10)]

In [None]:
df_train[['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA','date_block_num','Demanda']][(df_train['Z_MODELO']=='MOD_10')&(df_train['Z_PUNTO_VENTA']=='PVENT_2')]

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(20,5))

plt.subplot(1,2,1)
plt.hist(df_train['Demanda'],bins=100)
plt.title('train demanda hist')
plt.subplot(1,2,2)
plt.boxplot(df_train['Demanda'],vert=False)
plt.title('train demanda boxplot')
plt.suptitle('TRAIN Demanda distribution')
plt.show()

In [None]:
import matplotlib.pyplot as plt

fig = plt.figure(figsize=(20,5))

plt.subplot(1,2,1)
plt.hist(np.log10(df_train['Demanda']+0.1),bins=100)
plt.title('train demanda hist')
plt.subplot(1,2,2)
plt.boxplot(np.log10(df_train['Demanda']+0.1),vert=False)
plt.title('train demanda boxplot')
plt.suptitle('TRAIN Demanda distribution')
plt.show()

In [None]:
import warnings
import numpy as np
import pandas as pd
import copy
from pathlib import Path
import torch
import torch.nn as nn

import pytorch_lightning as pl
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor
from pytorch_lightning.loggers import TensorBoardLogger

import pytorch_forecasting
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
from pytorch_forecasting.data import GroupNormalizer,EncoderNormalizer

from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
"""Point metrics for forecasting a single point per time step."""
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

from pytorch_forecasting.metrics import QuantileLoss
from pytorch_forecasting.metrics import SMAPE, MAE,RMSE

import scipy.stats
import torch
import torch.nn.functional as F
from torch.nn.utils import rnn

from pytorch_forecasting.metrics import MultiHorizonMetric
from pytorch_forecasting.utils import create_mask, unpack_sequence, unsqueeze_like

from typing import Any, Callable, Dict, List, Optional, Tuple, Union



import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")

import os
# configure network and trainer
pl.seed_everything(42)

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [None]:
prediction_steps = df_test['date_block_num'].nunique()
prediction_steps

In [None]:
df_train['date_block_num'].nunique()

In [None]:
#'day_of_week', 'day', 'is_month_end', 'day_of_year',  'is_quarter_start', 'year', 'month', 'is_year_start', 'is_month_start', 
# 'I103','S103', 'C101','I100' , 'C100', 'ID', 'I102','S102',, 'S101', 'S100', 'item_id', 'date_block_num', 'I101'
max_prediction_length = prediction_steps

max_encoder_length = 40

training_cutoff = df_train['date_block_num'].max() - max_prediction_length

training = TimeSeriesDataSet(
    df_train[lambda x: x['date_block_num'] <= training_cutoff],
    time_idx='date_block_num',
    target="Demanda",
    group_ids=['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA'],
    min_encoder_length= max_encoder_length // 2,   
    max_encoder_length=max_encoder_length,
    min_prediction_length=1,
    max_prediction_length=max_prediction_length,
        
    static_categoricals=['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA'],
    
    time_varying_unknown_categoricals=[
                                     "month", 
                                     "dayofweek",
                                     "dayofyear"],
    
    time_varying_unknown_reals=["date_block_num",'days_from_payday'],
                                #["date_block_num","Demanda"],
    time_varying_known_categoricals=[],  

    time_varying_known_reals= statistics_columns,#'date_block_num'],
       
    #target_normalizer=GroupNormalizer(
    #    groups=['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA'], transformation="softplus"
    #),  # use softplus and normalize by group    
    
    categorical_encoders={
        #"Z_MARCA": pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                          
                          "Z_GAMA":  pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                          "Z_MODELO": pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                          #"Z_DEPARTAMENTO": pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                          "Z_PUNTO_VENTA": pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                          "dayofweek": pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                          "month": pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                          "dayofyear": pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                         #"date_block_num": pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
                         },
    #,
    #                      "item_id":pytorch_forecasting.data.encoders.NaNLabelEncoder(add_nan=True),
    #                     },
    #'day_of_week', 'day', 'is_month_end', 'day_of_year',  'is_quarter_start', 'year', 'month', 'is_year_start', 'is_month_start']},
    add_relative_time_idx=True,
    add_target_scales=True,
    add_encoder_length=True,
    allow_missing_timesteps=True,
)

In [None]:
validation = TimeSeriesDataSet.from_dataset(training, df_train, predict=True, stop_randomization=True)

In [None]:
batch_size = 128
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)

val_dataloader   = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)

In [None]:
import pytorch_lightning as pl
from pytorch_forecasting.metrics import QuantileLoss

Baseline

import torch

# calculate baseline mean absolute error, i.e. predict next value as the last available value from the history
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])
baseline_predictions = Baseline().predict(val_dataloader)
mase_val = (actuals - baseline_predictions).abs().mean().item()

criterion = nn.MSELoss()
rmse_val = torch.sqrt(criterion(actuals,baseline_predictions))
print('baseline - mase_val = ',mase_val)
print('baseline - rmse_val = ',rmse_val)

In [None]:
if True:
    import pickle

    from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters

    # create study
    study = optimize_hyperparameters(
        train_dataloader,
        val_dataloader,
        model_path="optuna_test",
        n_trials=50,
        max_epochs=20,
        gradient_clip_val_range=(0.01, 1.0),
        hidden_size_range=(8, 64),
        hidden_continuous_size_range=(8, 64),
        attention_head_size_range=(1, 4),
        learning_rate_range=(0.001, 0.1),
        dropout_range=(0.1, 0.3),
        trainer_kwargs=dict(limit_train_batches=30, log_every_n_steps=15, gpus=1),
        reduce_on_plateau_patience=4,
        use_learning_rate_finder=False,  # use Optuna to find ideal learning rate or use in-built learning rate finder
        timeout=4200,
        loss=TweedieLoss()
    )



    # show best hyperparameters
    print(study.best_trial.params)

In [None]:
#Early Stopping 
MIN_DELTA  = 1e-4
PATIENCE   = 20

#PL Trainer
MAX_EPOCHS = 500
GPUS = 1

if False:
    #study = {'gradient_clip_val': 0.5899880996240897, 'hidden_size': 24, 'dropout': 0.18045896986283255, 
    # 'hidden_continuous_size': 11, 'attention_head_size': 2, 'learning_rate': 0.012427775478680268}
    
    study = {'gradient_clip_val': 0.3468483254885978, 'hidden_size': 32, 'dropout': 0.11031796015695508, 
             'hidden_continuous_size': 19, 'attention_head_size': 15, 'learning_rate': 0.04289257757804779}
    
    GRADIENT_CLIP_VAL=study['gradient_clip_val']
    LIMIT_TRAIN_BATCHES=30

    #Fusion Transformer
    LR = study['learning_rate']
    HIDDEN_SIZE = study['hidden_size']
    DROPOUT = study['dropout']
    ATTENTION_HEAD_SIZE = study['attention_head_size']
    HIDDEN_CONTINUOUS_SIZE = study['hidden_continuous_size']
else:
    GRADIENT_CLIP_VAL=study.best_trial.params['gradient_clip_val']
    LIMIT_TRAIN_BATCHES=30

    #Fusion Transformer
    LR = study.best_trial.params['learning_rate']
    HIDDEN_SIZE = study.best_trial.params['hidden_size']
    DROPOUT = study.best_trial.params['dropout']
    ATTENTION_HEAD_SIZE = study.best_trial.params['attention_head_size']
    HIDDEN_CONTINUOUS_SIZE = study.best_trial.params['hidden_continuous_size']

OUTPUT_SIZE= 1
REDUCE_ON_PLATEAU_PATIENCE=5

In [None]:
ATTENTION_HEAD_SIZE

In [None]:
max_prediction_length

In [None]:


composite_metric = SMAPE() + 1e-4 * MAE()

In [None]:
class RMSE2(MultiHorizonMetric):
    """
    Root mean square error

    Defined as ``(y_pred - target)**2``
    """

    def __init__(self, reduction="sqrt-mean", **kwargs):
        super().__init__(reduction=reduction, **kwargs)

    def loss(self, y_pred: Dict[str, torch.Tensor], target):
        loss = torch.pow(self.to_prediction(y_pred) - target, 2)
        return loss

In [None]:
RMSE

In [None]:
class TweedieLoss(MultiHorizonMetric):
    """
    Tweedie loss

    Tweedie regression with log-link. It might be useful, e.g., for modeling total
    loss in insurance, or for any target that might be tweedie-distributed.
    """

    def __init__(self, reduction="mean", p: float = 1.5, **kwargs):
        """
        Args:
            p (float, optional): tweedie variance power which is greater equal
                1.0 and smaller 2.0. Close to ``2`` shifts to
                Gamma distribution and close to ``1`` shifts to Poisson distribution.
                Defaults to 1.5.
            reduction (str, optional): How to reduce the loss. Defaults to "mean".
        """
        super().__init__(reduction=reduction, **kwargs)
        assert 1 <= p < 2, "p must be in range [1, 2]"
        self.p = p

    def to_prediction(self, out: Dict[str, torch.Tensor]):
        rate = torch.exp(super().to_prediction(out))
        return rate


    def loss(self, y_pred, y_true):
        y_pred = super().to_prediction(y_pred)
        a = y_true * torch.exp(y_pred * (1 - self.p)) / (1 - self.p)
        b = torch.exp(y_pred * (2 - self.p)) / (2 - self.p)
        loss = -a + b
        
        return loss

In [None]:
from pytorch_lightning.callbacks import EarlyStopping, LearningRateMonitor

# configure network and trainer
early_stop_callback = EarlyStopping(monitor="val_loss", min_delta=MIN_DELTA, patience=PATIENCE, verbose=False, mode="min")
lr_logger = LearningRateMonitor()  # log the learning rate

trainer = pl.Trainer(
    max_epochs=MAX_EPOCHS,
    gpus=GPUS,
    weights_summary="top",
    gradient_clip_val=GRADIENT_CLIP_VAL,
    limit_train_batches=LIMIT_TRAIN_BATCHES,#oment in for training, running valiation every 30 batches
    #fast_dev_run=True,  # comment in to check that networkor dataset has no serious bugs
    callbacks=[lr_logger, early_stop_callback],
    log_every_n_steps=10
    
)


tft = TemporalFusionTransformer.from_dataset(
    training,
    learning_rate=LR,
    hidden_size=HIDDEN_SIZE,
    attention_head_size=ATTENTION_HEAD_SIZE,
    dropout=DROPOUT,
    hidden_continuous_size=HIDDEN_CONTINUOUS_SIZE,
    output_size=OUTPUT_SIZE,# 7 quantiles by default
    
    #loss=QuantileLoss(),
    #loss=RMSE(),
    loss=TweedieLoss(),
    log_interval=10,  # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
    reduce_on_plateau_patience=REDUCE_ON_PLATEAU_PATIENCE,
)
print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")

In [None]:
# fit network
trainer.fit(
    tft,
    train_dataloaders=train_dataloader,
    val_dataloaders=val_dataloader,
)

In [None]:
# load the best model according to the validation loss
# (given that we use early stopping, this is not necessarily the last epoch)
best_model_path = trainer.checkpoint_callback.best_model_path
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
print(best_model_path)


In [None]:
# raw predictions are a dictionary from which all kind of information including quantiles can be extracted
raw_predictions, x = best_tft.predict(val_dataloader, mode="raw", return_x=True)
print('hola')

In [None]:
for idx in range(5):  # plot 10 examples
    best_tft.plot_prediction(x, raw_predictions, idx=idx, add_loss_to_title=True);

In [None]:
predictions, x = best_tft.predict(val_dataloader, return_x=True)
predictions_vs_actuals = best_tft.calculate_prediction_actual_by_variable(x, predictions)
best_tft.plot_prediction_actual_by_variable(predictions_vs_actuals);

In [None]:
# calcualte root mean squared error on validation set
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])
val_predictions = best_tft.predict(val_dataloader)

In [None]:
criterion = nn.MSELoss()
torch.sqrt(criterion(actuals,val_predictions))

In [None]:
for i in range(1):
    print(actuals[i],val_predictions[i])


In [None]:
print('max_prediction_length:',max_prediction_length)
print('max_encoder_length   :',max_encoder_length)

In [None]:
# select last 30 days from data (max_encoder_length is 24)
encoder_data = df_train[lambda x: x.date_block_num > x.date_block_num.max() - max_encoder_length]

print(encoder_data['date_block_num'].min(),encoder_data['date_block_num'].max())
#print(encoder_data['DATE'].min(),encoder_data['DATE'].max())
encoder_data

In [None]:
df_test

In [None]:
df_test

In [None]:
df_train.columns

In [None]:
max_prediction_length

In [None]:

last_data = df_train[df_train['date_block_num'].isin([idx  -  max_prediction_length for idx in df_test['date_block_num'].unique()])]
last_data['date_block_num'] = last_data['date_block_num'] + max_prediction_length

decoder_data = pd.merge(df_test[[col for col in df_test.columns if 'Demanda' not in col]], 
        last_data[['date_block_num','Z_MODELO','Z_PUNTO_VENTA','Z_GAMA',"Demanda",
                   'average_sales_by_Z_MODELO','average_sales_by_Z_PUNTO_VENTA', 
                   'average_sales_by_Z_GAMA','average_sales_by_Z_WEEK', 'average_sales_by_Z_WEEK_DATE']],
        
        on = ['date_block_num', 'Z_MODELO','Z_PUNTO_VENTA','Z_GAMA',],
                        how='left'
        )


encoder_data.replace([np.inf, -np.inf, np.nan],0,inplace=True)
decoder_data.replace([np.inf, -np.inf, np.nan],0,inplace=True)


# combine encoder and decoder data
new_prediction_data = pd.concat([encoder_data, decoder_data], ignore_index=True)

In [None]:
last_data[['date_block_num','Z_MODELO','Z_PUNTO_VENTA','Z_GAMA',"Demanda",
                   'average_sales_by_Z_MODELO','average_sales_by_Z_PUNTO_VENTA', 
                   'average_sales_by_Z_GAMA','average_sales_by_Z_WEEK', 'average_sales_by_Z_WEEK_DATE']]

In [None]:
decoder_data['average_sales_by_Z_WEEK']

In [None]:
encoder_data

In [None]:
import matplotlib.pyplot as plt

aux = encoder_data['Demanda'].values

fig = plt.figure(figsize=(20,5))
plt.plot(aux,'o')
plt.show()

In [None]:
import matplotlib.pyplot as plt

aux = decoder_data['Demanda'].values

fig = plt.figure(figsize=(20,5))
plt.plot(aux,'o')
plt.show()

In [None]:
decoder_data

In [None]:
new_prediction_data

In [None]:
new_raw_predictions, new_x = best_tft.predict(new_prediction_data, mode="raw", return_x=True)

for idx in range(10):  # plot 10 examples
    best_tft.plot_prediction(new_x, new_raw_predictions, idx=idx, show_future_observed=False);

In [None]:
#new_raw_predictions.shape
#torch.Size([47173, 10])

In [None]:
new_raw_predictions

In [None]:
interpretation = best_tft.interpret_output(new_raw_predictions, reduction="sum")
best_tft.plot_interpretation(interpretation)

In [None]:
df_test.head()

In [None]:
new_raw_predictions = best_tft.predict(new_prediction_data, mode="prediction", return_x=False)


In [None]:
predictions = pd.DataFrame(new_raw_predictions.numpy()).T
predictions['date_block_num'] = sorted(df_test['date_block_num'].unique())
predictions = pd.melt(predictions, id_vars=['date_block_num'])
predictions = predictions.sort_values(['date_block_num', 'variable']).reset_index(drop=True)
df_test[['date_block_num','Z_MODELO','Z_PUNTO_VENTA','Z_GAMA']].sort_values(['date_block_num', 'Z_MODELO','Z_PUNTO_VENTA','Z_GAMA']).reset_index(drop=True)
df_test2 = df_test.join(predictions['value'])

In [None]:
df_test2.head()

In [None]:
import pickle

reverse_mapping_file = '../../utils/reverse_dict_mapping_list.txt'

with open(reverse_mapping_file, 'rb') as f:
    reverse_mapping = pickle.load( f)

In [None]:
#reverse_mapping#

In [None]:
#descriptive_columns = ['Z_MODELO','Z_PUNTO_VENTA','Z_GAMA']
descriptive_columns = ['Z_MARCA', 'Z_GAMA', 'Z_MODELO',
                       'Z_DEPARTAMENTO', 'Z_PUNTO_VENTA']
i=0
for column in descriptive_columns:
    if column in df_test2.columns:
        df_test2[column] = df_test2[column].map(reverse_mapping[i])
    i+=1

In [None]:
df_test2.head()

In [None]:

inv_dict_dates = {v: k for k, v in dict_dates.items()}
df_test2['Z_WEEK'] = df_test2['date_block_num'].map(inv_dict_dates)

In [None]:
df_test2['ID'] = df_test2['Z_MODELO'] + '|' + df_test2['Z_PUNTO_VENTA'] + '|' + df_test2['Z_GAMA'] + '|' + df_test2['Z_WEEK']

In [None]:
df_test2['Demanda'] = df_test2['value']
submission = df_test2[['ID','Demanda']]#.groupby('ID').sum().reset_index()

In [None]:
submission

In [None]:
#submission['Demanda'] = 0.9#submission['value']

submission[['ID', 'Demanda']].to_csv('../../results/Submission_tft_v2_.csv', index = False, sep = ',')

In [None]:
submission.boxplot(['Demanda'])

In [None]:
import matplotlib.pyplot as plt

aux = df_train['Demanda'].values

fig = plt.figure(figsize=(20,5))
plt.plot(aux,'o')
plt.show()

In [None]:
import matplotlib.pyplot as plt

aux = df_test['Demanda'].values

fig = plt.figure(figsize=(20,5))
plt.plot(aux,'o')
plt.show()

In [None]:
import matplotlib.pyplot as plt

aux = submission['Demanda'].values

fig = plt.figure(figsize=(20,5))
plt.plot(aux,'o')
plt.show()

In [None]:
aux[aux>=300]

In [None]:
df_baseline = pd.read_csv('../../results/Submission_28.csv')
import matplotlib.pyplot as plt

aux = df_baseline['Demanda']

fig = plt.figure(figsize=(10,10))

df_baseline.boxplot(['Demanda'])


fig = plt.figure(figsize=(20,5))
plt.plot(aux,'o')
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error

y_actual = df_baseline['Demanda']
y_predicted = submission['Demanda'].values

rms = mean_squared_error(y_actual, y_predicted, squared=False)
rms