In [None]:
#%pip install jupyter_tensorboard

In [None]:
import seaborn as sns  #Visualization
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import matplotlib


import pandas as pd   #preprocessing
import numpy as np
import math
from tqdm.notebook import tqdm
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.preprocessing import MinMaxScaler


import torch          #modelling
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict


from sklearn.metrics import mean_squared_error, mean_absolute_error

#import operator
import time
import os

from IPython.utils import io
import warnings
warnings.filterwarnings("ignore", ".*does not have many workers.*")
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=UserWarning) 


import tensorboard
%reload_ext tensorboard


def mean_absolute_scaled_error(y_true, y_pred, y_train):
    e_t = y_true - y_pred
    scale = mean_absolute_error(y_train[1:], y_train[:-1])
    return np.mean(np.abs(e_t / scale))



In [None]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid',palette='muted',font_scale=1.2)

rcParams['figure.figsize']=12,8

tqdm.pandas()

In [None]:
N_EPOCHS = 220

arr = np.arange(14, N_EPOCHS, 5)
# Convert the elements of the array to strings and create a list
checkpoints = [str(x) for x in arr]

BATCH_SIZE = 7
SEQUENCE_LENGTH = 15
N_HIDDEN = 32
N_LAYERS = 1
PATIENCE = 500
LEARNING = 0.005

TGT = 'Sales'

all_features = False
univariate = False

corr_train = False

Boruta_GB = False
Boruta_RF = False

BorutaSHAP_GB = False
BorutaSHAP_RF = False

IMV_Tensor = False 
IMV_Full = False 

LIME_train = False

SHAP_insta = False
SHAP_avrag = False 

In [None]:
class Dataset(Dataset):
    def __init__(self,sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self,idx):
        sequence, label = self.sequences[idx]

        return dict(sequence = torch.Tensor(sequence.to_numpy()),
            label = torch.tensor(label).float())

class DataModule(pl.LightningDataModule):
    def __init__(self, train_seqeunces,test_sequences, batch_size=8):
        super().__init__()
        self.train_sequences = train_sequences
        self.test_sequences = test_sequences
        self.batch_size = batch_size

    def setup(self,stage=None):
        self.train_dataset = Dataset(self.train_sequences)
        self.test_dataset = Dataset(self.test_sequences)

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size = self.batch_size,
            shuffle = False,
            num_workers = 0)

    def val_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size = 1,
            shuffle = False,
            num_workers = 0)

In [None]:
def features_dataframe(df,features):
    rows = []

    for _, row in df.iterrows():
        row_data = dict(
            Sales = row[f'{TGT}'],
        )
        for column in features:
            row_data[column] = row[column]
            
        rows.append(row_data)
    
    features_df = pd.DataFrame(rows)

    return features_df

#spliits the data in test and train
def train_test_spliter(ratio,features_df ):
    train_size = int(len(features_df)-ratio)
    train_df, test_df = features_df[:train_size], features_df[train_size + 1:]

    return train_df, test_df, train_size

def data_scaler(train_df,test_df):
    scaler = MinMaxScaler(feature_range=(-1,1))
    scaler = scaler.fit(train_df)
    
    train_df = pd.DataFrame(
        scaler.transform(train_df),
        index = train_df.index,
        columns = train_df.columns)

    test_df = pd.DataFrame(
        scaler.transform(test_df),
        index = test_df.index,
        columns = test_df.columns)
    
    return train_df, test_df, scaler


def create_sequences (input_data:pd.DataFrame, target_column, sequence_length):
    sequences = []
    data_size = len(input_data)

    for i in (range(data_size - sequence_length)):
        sequence = input_data[i:i+sequence_length]
        label_position = i + sequence_length
        label = input_data.iloc[label_position][target_column]
        sequences.append((sequence,label))
    
    return sequences

def descale(descaler, values):
    values_2d=np.array(values)[:,np.newaxis]
    
    return descaler.inverse_transform(values_2d).flatten()

In [None]:
def def_model():
    class PredictionModel(nn.Module):
        def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
            super().__init__()
            self.n_hidden = n_hidden
            self.lstm = nn.LSTM(
                input_size = n_features,
                hidden_size = n_hidden,
                batch_first = True,
                num_layers = n_layers,
                dropout = 0.2)
            self.regressor = nn.Linear(n_hidden,1)

        def forward(self,x):
            self.lstm.flatten_parameters()
            _, (hidden, _) = self.lstm(x)
            out = hidden[-1]
            return self.regressor(out)
        

    class Predictor(pl.LightningModule):
        def __init__(self, n_features: int):
            super().__init__()
            self.model=PredictionModel(n_features)
            self.criterion = nn.MSELoss()

        def forward(self, x, labels= None):
            output = self.model(x)
            loss = 0
            if labels is not None:
                loss = self.criterion(output, labels.unsqueeze(dim=1))
            return loss, output
        
        def training_step(self, batch, batch_index):
            sequences = batch['sequence']
            labels = batch['label']
            loss, outputs = self(sequences, labels)
            self.log('train_loss', loss, prog_bar = False, logger=True,on_step=False, on_epoch=True)
            return loss

        def validation_step(self, batch, batch_index):
            sequences = batch['sequence']
            labels = batch['label']
            loss, outputs = self(sequences, labels)
            self.log('val_loss', loss, prog_bar = False, logger=True, on_step=False, on_epoch=True)
            return loss

        def test_step(self, batch, batch_index):
            sequences = batch['sequence']
            labels = batch['label']
            loss, outputs = self(sequences, labels)
            self.log('val_loss', loss, prog_bar = False, logger=True, on_step=False, on_epoch=True)
            return loss

        def configure_optimizers(self):
            return optim.AdamW(self.parameters(), lr = LEARNING)
    return PredictionModel, Predictor
        
        
def pep_proc_summarizer(features,df):
    features_df = features_dataframe(df,features) 
    #returns dataframe with the features to be analised
    
    #split into test and train and minmaxscaler
    train_df, test_df, train_size =  train_test_spliter(105,features_df)
    train_df, test_df, scaler = data_scaler(train_df,test_df)
    
    #make sequences with the data
    train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
    test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)
    train_dataset = Dataset(train_sequences)
    test_dataset = Dataset(test_sequences)
    
    return (train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler)

def def_trainer(folder_path1,folder_path2,filename,logs,store):
    checkpoint_callback = ModelCheckpoint(
        dirpath = f'{folder_path1}/{folder_path2}/Checkpoints',
        filename = filename+'_{epoch:02d}',
        save_top_k = len(checkpoints)+2,
        verbose = False,
        monitor='epoch',
        every_n_epochs = 5)
    
    logger = TensorBoardLogger(f'{folder_path1}/{logs}', name = f'{store}Sales')
    early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
    
    trainer = pl.Trainer(
        logger = logger,
        callbacks=[early_stopping_callback, checkpoint_callback],
        max_epochs = N_EPOCHS,
        gpus = 0,)
    return trainer
    
def recal_predict(folder_path1,folder_path2,filename,features,epoch,test_dataset):
    trained_model = Predictor.load_from_checkpoint(
            f'{folder_path1}/{folder_path2}/Checkpoints/{filename}_epoch={epoch}.ckpt',
            n_features = len(features))

    predictions = []
    labels = []

    for item in test_dataset:
        sequence = item['sequence']
        label = item['label']

        if len(predictions) > SEQUENCE_LENGTH:
            for j in range(SEQUENCE_LENGTH):
                sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
        else: 
            for j in range(len(predictions)):
                sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

        _,output = trained_model(sequence.unsqueeze(dim=0))
        predictions.append(output.item())
        labels.append(label.item())
    
    return (predictions)

def descale_(predictions):
    descaler = MinMaxScaler()
    descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]
    predictions_descaled = descale(descaler,predictions)
    
    return (predictions_descaled)
    
def calculate_errors(predictions_descaled,df,train_size):
    test_data = df[train_size+1:]
    test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

    dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date)
    full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())
    
    dic = {}
    dic[f'store_truth'] = Rossmann_df[TGT]
    dic[f'store_truth_dates'] = full_dates
    truth_df = pd.DataFrame.from_dict(dic)

    dic= {}
    dic[f'store_pred'] = predictions_descaled
    dic[f'store_pred_dates'] = dates
    prediction_df = pd.DataFrame.from_dict(dic)
    prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)

    #plt.figure(figsize=(21, 7))
    #plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
    #plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
    #plt.legend()
    #plt.show();

    predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)
    mean_abs_corrf=(mean_absolute_scaled_error(truth_df.iloc[-len(prediction_df):,0],
                                     predictions_descaled,
                                     truth_df.iloc[:-len(predictions_descaled),0]))
    root_mean_corrf=(mean_squared_error(truth_df.iloc[-len(prediction_df):,0],
                                    predictions_descaled)**(1/2))
    
    return (mean_abs_corrf,root_mean_corrf)

def mk_dir(folder_path1,folder_path2):
    try:
        os.mkdir(f'{folder_path1}')
    except:
        pass
    try:
        os.mkdir(f'{folder_path1}/{folder_path2}')
    except:
        pass
    try:
        os.mkdir(f'{folder_path1}/{folder_path2}/Forecast')
    except:
        pass

In [None]:
Rossmann = pd.read_csv('01-Data/Rossmann_treated.csv')
Rosmann_stores = [23,64,103,133,135,216,224,256,266,311,355,405,455,487,558,649,672,714,738,742,773,785,
             830,870,880,899,937,978,1011,1069]

Rossmann.drop(columns = 'Customers', inplace = True)

folder_path1='02-Results/LSTM'
try:
    os.mkdir(f'{folder_path1}')
except:
    pass

for store in tqdm(Rosmann_stores):
    try:
        os.mkdir(f'{folder_path1}/Store{store}')
    except:
        pass

## LSTM Using All Features

In [None]:
if all_features == True:
    for store in tqdm(Rosmann_stores):
        torch.manual_seed(42);
        np.random.seed(42);
        pl.seed_everything(42);  

        Rossmann_df = Rossmann[Rossmann['Store'] == store]
        folder_path2=f'Store{store}/All_Features'
        filename=f'Rossmann_All_LSTM'
        logs='logs_AllF'

        mk_dir(folder_path1,folder_path2)

        df = Rossmann_df.drop(columns  = 'Date')
        features = list(df.columns)

        train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

        PredictionModel, Predictor = def_model()
        data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
        model = Predictor(n_features = len(train_df.columns))

        iteration_start = time.monotonic()

        trainer.fit(model, data_module)


        iteration_end = time.monotonic()
        
        Error_dic = {}
        MASE=[]
        RMSE=[]

        for i in checkpoints:
            predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
            predictions_descaled = descale_(predictions)
            csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
            csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}_epoch{i}.csv')
            mase,rmse=calculate_errors(predictions_descaled,df,train_size)
            MASE.append(mase)
            RMSE.append(rmse)

        Error_dic['MASE'] = MASE
        Error_dic['RMSE'] = RMSE
        Error_dic['TIME'] =iteration_end-iteration_start
        Error_df = pd.DataFrame.from_dict(Error_dic)
        Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_AllF --host localhost --port=3000

## LSTM Univariate

In [None]:
if univariate == True:
    for store in tqdm(Rosmann_stores):
        torch.manual_seed(42);
        np.random.seed(42);
        pl.seed_everything(42);  

        Rossmann_df = Rossmann[Rossmann['Store'] == store]
        folder_path2=f'Store{store}/univariate'
        filename=f'Rossmann_univariate_LSTM'
        logs='logs_univariate'

        mk_dir(folder_path1,folder_path2)

        df = Rossmann_df.drop(columns  = 'Date')
        features = [TGT]

        train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

        PredictionModel, Predictor = def_model()
        data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
        model = Predictor(n_features = len(train_df.columns))

        iteration_start = time.monotonic()

        trainer.fit(model, data_module)


        iteration_end = time.monotonic()
        
        Error_dic = {}
        MASE=[]
        RMSE=[]

        for i in checkpoints:
            predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
            predictions_descaled = descale_(predictions)
            csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
            csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}_epoch{i}.csv')
            mase,rmse=calculate_errors(predictions_descaled,df,train_size)
            MASE.append(mase)
            RMSE.append(rmse)

        Error_dic['MASE'] = MASE
        Error_dic['RMSE'] = RMSE
        Error_dic['TIME'] =iteration_end-iteration_start
        Error_df = pd.DataFrame.from_dict(Error_dic)
        Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_univariate --host localhost --port=3001

## LSTM Correlation

In [None]:
corr_bsline = pd.read_csv(f'02-Results/FS/Store{store}/Correlation/correlation_df.csv')
corr_bsline.drop(columns = 'all_features', inplace = True)

if corr_train == True:
    for corr_col in corr_bsline.columns[-1:]:
        for store in tqdm(Rosmann_stores):        
            
            corr_feat = pd.read_csv(f'02-Results/FS/Store{store}/Correlation/correlation_df.csv')
            corr_feat.drop(columns = 'all_features', inplace = True)
            corr_dic = {}

            torch.manual_seed(42);
            np.random.seed(42);
            pl.seed_everything(42);  

            Rossmann_df = Rossmann[Rossmann['Store'] == store]
            folder_path2=f'Store{store}/Correlation{corr_col}'
            filename=f'Rossmann_{corr_col}_LSTM'
            logs=f'logs_{corr_col}'

            mk_dir(folder_path1,folder_path2)

            df = Rossmann_df.drop(columns  = 'Date')
            
            features = list(corr_feat[corr_col].fillna(0))
            features = [x for x in features if x != 0]
            
            train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

            PredictionModel, Predictor = def_model()
            data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
            trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
            model = Predictor(n_features = len(train_df.columns))

            iteration_start = time.monotonic()

            trainer.fit(model, data_module)

            iteration_end = time.monotonic()

            Error_dic = {}
            MASE=[]
            RMSE=[]

            for i in checkpoints:
                predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
                predictions_descaled = descale_(predictions)
                csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
                csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}__epoch{i}.csv')
                mase,rmse=calculate_errors(predictions_descaled,df,train_size)
                MASE.append(mase)
                RMSE.append(rmse)

            Error_dic['MASE'] = MASE
            Error_dic['RMSE'] = RMSE
            Error_dic['TIME'] =iteration_end-iteration_start
            Error_df = pd.DataFrame.from_dict(Error_dic)
            Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors{corr_col}.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_correlation_th_=_0.05 --host localhost --port=3002
%tensorboard --logdir=./02-Results/LSTM/logs_correlation_th_=_0.1 --host localhost --port=3003
%tensorboard --logdir=./02-Results/LSTM/logs_correlation_th_=_0.15 --host localhost --port=3004
%tensorboard --logdir=./02-Results/LSTM/logs_correlation_th_=_0.2 --host localhost --port=3005
%tensorboard --logdir=./02-Results/LSTM/logs_correlation_th_=_0.25 --host localhost --port=3006

## LSTM Boruta-GB

In [None]:
if Boruta_GB == True:
    for store in tqdm(Rosmann_stores):
        torch.manual_seed(42);
        np.random.seed(42);
        pl.seed_everything(42);  

        Rossmann_df = Rossmann[Rossmann['Store'] == store]
        
        folder_path2=f'Store{store}/Boruta-GB'
        filename=f'Rossmann_Boruta-GB_LSTM'
        logs='logs_Boruta-GB'

        mk_dir(folder_path1,folder_path2)
        
        df = Rossmann_df.drop(columns  = 'Date')
        
        BoGB_feat = pd.read_csv(f'02-Results/FS/Store{store}/Boruta-GB/borutaGB_df.csv')
        features = list(BoGB_feat['important'].fillna(0))
        features = [x for x in features if x != 0]
        
        train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

        PredictionModel, Predictor = def_model()
        data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
        model = Predictor(n_features = len(train_df.columns))
        

        iteration_start = time.monotonic()

        trainer.fit(model, data_module)
        
        iteration_end = time.monotonic()
        
        Error_dic = {}
        MASE=[]
        RMSE=[]

        for i in checkpoints:
            predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
            predictions_descaled = descale_(predictions)
            csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
            csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}_epoch{i}.csv')
            mase,rmse=calculate_errors(predictions_descaled,df,train_size)
            MASE.append(mase)
            RMSE.append(rmse)

        Error_dic['MASE'] = MASE
        Error_dic['RMSE'] = RMSE
        Error_dic['TIME'] =iteration_end-iteration_start
        Error_df = pd.DataFrame.from_dict(Error_dic)
        Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_Boruta-GB --host localhost --port=3007

## LSTM Boruta-RF

In [None]:
if Boruta_RF == True:
    for store in tqdm(Rosmann_stores):
        torch.manual_seed(42);
        np.random.seed(42);
        pl.seed_everything(42);  

        Rossmann_df = Rossmann[Rossmann['Store'] == store]
        
        folder_path2=f'Store{store}/Boruta-RF'
        filename=f'Rossmann_Boruta-RF_LSTM'
        logs='logs_Boruta-RF'

        mk_dir(folder_path1,folder_path2)
        
        df = Rossmann_df.drop(columns  = 'Date')
        
        BoRF_feat = pd.read_csv(f'02-Results/FS/Store{store}/Boruta-RF/BorutaRF_df.csv')
        features = list(BoRF_feat['important'].fillna(0))
        features = [x for x in features if x != 0]
        
        train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

        PredictionModel, Predictor = def_model()
        data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
        model = Predictor(n_features = len(train_df.columns))
        

        iteration_start = time.monotonic()

        trainer.fit(model, data_module)
        
        iteration_end = time.monotonic()
        
        Error_dic = {}
        MASE=[]
        RMSE=[]

        for i in checkpoints:
            predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
            predictions_descaled = descale_(predictions)
            csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
            csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}_epoch{i}.csv')
            mase,rmse=calculate_errors(predictions_descaled,df,train_size)
            MASE.append(mase)
            RMSE.append(rmse)

        Error_dic['MASE'] = MASE
        Error_dic['RMSE'] = RMSE
        Error_dic['TIME'] =iteration_end-iteration_start
        Error_df = pd.DataFrame.from_dict(Error_dic)
        Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_Boruta-RF --host localhost --port=3008

## LSTM BorutaSHAP-GB

In [None]:
if BorutaSHAP_GB == True:
    for store in tqdm(Rosmann_stores):
        torch.manual_seed(42);
        np.random.seed(42);
        pl.seed_everything(42);  

        Rossmann_df = Rossmann[Rossmann['Store'] == store]
        
        folder_path2=f'Store{store}/BorutaSHAP-GB'
        filename=f'Rossmann_BorutaSHAP-GB_LSTM'
        logs='logs_BorutaSHAP-GB'

        mk_dir(folder_path1,folder_path2)
        
        df = Rossmann_df.drop(columns  = 'Date')
        
        BoSHAPGB_feat = pd.read_csv(f'02-Results/FS/Store{store}/BorutaSHAP-GB/borutaSHAPGB_df.csv')
        features = list(BoSHAPGB_feat['important'].fillna(0))
        features = [x for x in features if x != 0]
        
        train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

        PredictionModel, Predictor = def_model()
        data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
        model = Predictor(n_features = len(train_df.columns))
        

        iteration_start = time.monotonic()

        trainer.fit(model, data_module)
        
        iteration_end = time.monotonic()
        
        Error_dic = {}
        MASE=[]
        RMSE=[]

        for i in checkpoints:
            predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
            predictions_descaled = descale_(predictions)
            csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
            csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}_epoch{i}.csv')
            mase,rmse=calculate_errors(predictions_descaled,df,train_size)
            MASE.append(mase)
            RMSE.append(rmse)

        Error_dic['MASE'] = MASE
        Error_dic['RMSE'] = RMSE
        Error_dic['TIME'] =iteration_end-iteration_start
        Error_df = pd.DataFrame.from_dict(Error_dic)
        Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_BorutaSHAP-GB --host localhost --port=3009

## LSTM BorutaSHAP-RF

In [None]:
if BorutaSHAP_RF == True:
    for store in tqdm(Rosmann_stores):
        torch.manual_seed(42);
        np.random.seed(42);
        pl.seed_everything(42);  

        Rossmann_df = Rossmann[Rossmann['Store'] == store]
        
        folder_path2=f'Store{store}/BorutaSHAP-RF'
        filename=f'Rossmann_BorutaSHAP-RF_LSTM'
        logs='logs_BorutaSHAP-RF'

        mk_dir(folder_path1,folder_path2)
        
        df = Rossmann_df.drop(columns  = 'Date')
        
        BoSHAPRF_feat = pd.read_csv(f'02-Results/FS/Store{store}/BorutaSHAP-RF/borutaSHAPRF_df.csv')
        features = list(BoSHAPRF_feat['important'].fillna(0))
        features = [x for x in features if x != 0]
        
        train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

        PredictionModel, Predictor = def_model()
        data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
        model = Predictor(n_features = len(train_df.columns))
        

        iteration_start = time.monotonic()

        trainer.fit(model, data_module)
        
        iteration_end = time.monotonic()
        
        Error_dic = {}
        MASE=[]
        RMSE=[]

        for i in checkpoints:
            predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
            predictions_descaled = descale_(predictions)
            csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
            csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}_epoch{i}.csv')
            mase,rmse=calculate_errors(predictions_descaled,df,train_size)
            MASE.append(mase)
            RMSE.append(rmse)

        Error_dic['MASE'] = MASE
        Error_dic['RMSE'] = RMSE
        Error_dic['TIME'] =iteration_end-iteration_start
        Error_df = pd.DataFrame.from_dict(Error_dic)
        Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_BorutaSHAP-RF --host localhost --port=3010

## LSTM IMV-Tensor

In [None]:
if IMV_Tensor == True:
    for store in tqdm(Rosmann_stores):
        torch.manual_seed(42);
        np.random.seed(42);
        pl.seed_everything(42);  

        Rossmann_df = Rossmann[Rossmann['Store'] == store]
        
        folder_path2=f'Store{store}/IMV_Tensor'
        filename=f'Rossmann_IMV_Tensor_LSTM'
        logs='logs_IMV_Tensor'

        mk_dir(folder_path1,folder_path2)
        df = Rossmann_df.drop(columns  = 'Date')
        
        IMV_Tensor_feats = pd.read_csv(f'02-Results/FS/Store{store}/IMV-LSTM/IMV_Tensor.csv')
        features = list(IMV_Tensor_feats['features'].loc[IMV_Tensor_feats['Importance'] > 0.045])

        
        train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)
        PredictionModel, Predictor = def_model()
        data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
        model = Predictor(n_features = len(train_df.columns))
        

        iteration_start = time.monotonic()

        trainer.fit(model, data_module)
        
        iteration_end = time.monotonic()
        
        Error_dic = {}
        MASE=[]
        RMSE=[]

        for i in checkpoints:
            predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
            predictions_descaled = descale_(predictions)
            csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
            csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}_epoch{i}.csv')
            mase,rmse=calculate_errors(predictions_descaled,df,train_size)
            MASE.append(mase)
            RMSE.append(rmse)

        Error_dic['MASE'] = MASE
        Error_dic['RMSE'] = RMSE
        Error_dic['TIME'] =iteration_end-iteration_start
        Error_df = pd.DataFrame.from_dict(Error_dic)
        Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors.csv')
    

In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_IMV_Tensor --host localhost --port=6012

## LSTM IMV-Full

In [None]:
if IMV_Full == True:
    for store in tqdm(Rosmann_stores):
        torch.manual_seed(42);
        np.random.seed(42);
        pl.seed_everything(42);  

        Rossmann_df = Rossmann[Rossmann['Store'] == store]
        
        folder_path2=f'Store{store}/IMV_Full'
        filename=f'Rossmann_IMV_Full_LSTM'
        logs='logs_IMV_Full'

        mk_dir(folder_path1,folder_path2)
        
        df = Rossmann_df.drop(columns  = 'Date')
        
        IMV_Full_feats = pd.read_csv(f'02-Results/FS/Store{store}/IMV-LSTM/IMV_Full.csv')
        features = list(IMV_Full_feats['features'].loc[IMV_Full_feats['Importance'] > 0.06])
        
        train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

        PredictionModel, Predictor = def_model()
        data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
        model = Predictor(n_features = len(train_df.columns))
        

        iteration_start = time.monotonic()

        trainer.fit(model, data_module)
        
        iteration_end = time.monotonic()
        
        Error_dic = {}
        MASE=[]
        RMSE=[]

        for i in checkpoints:
            predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
            predictions_descaled = descale_(predictions)
            csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
            csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}_epoch{i}.csv')
            mase,rmse=calculate_errors(predictions_descaled,df,train_size)
            MASE.append(mase)
            RMSE.append(rmse)

        Error_dic['MASE'] = MASE
        Error_dic['RMSE'] = RMSE
        Error_dic['TIME'] =iteration_end-iteration_start
        Error_df = pd.DataFrame.from_dict(Error_dic)
        Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors.csv')
    

In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_LIME-LSTM --host localhost --port=3012

## LSTM LIME-LSTM

In [None]:
if LIME_train == True:
    LIME_inst_th = [0.007,0.008,0.009,0.01]
    for threshold in LIME_inst_th:
        for store in tqdm(Rosmann_stores):        
            
            inst_LSTMLIME = pd.read_csv(f'02-Results/FS/Store{store}/LIME-LSTM/LIME-LSTM.csv')

            torch.manual_seed(42);
            np.random.seed(42);
            pl.seed_everything(42);  

            Rossmann_df = Rossmann[Rossmann['Store'] == store]
            folder_path2=f'Store{store}/LIME={threshold}'
            filename=f'Rossmann_LIME={threshold}_LSTM'
            logs=f'logs_LIME={threshold}'

            mk_dir(folder_path1,folder_path2)

            df = Rossmann_df.drop(columns  = 'Date')
            
            features = list(inst_LSTMLIME['Features'].loc[inst_LSTMLIME['LIME_value'] > threshold])
            
            train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

            PredictionModel, Predictor = def_model()
            data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
            trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
            model = Predictor(n_features = len(train_df.columns))

            iteration_start = time.monotonic()

            trainer.fit(model, data_module)

            iteration_end = time.monotonic()

            Error_dic = {}
            MASE=[]
            RMSE=[]

            for i in checkpoints:
                predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
                predictions_descaled = descale_(predictions)
                csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
                csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}__epoch{i}.csv')
                mase,rmse=calculate_errors(predictions_descaled,df,train_size)
                MASE.append(mase)
                RMSE.append(rmse)

            Error_dic['MASE'] = MASE
            Error_dic['RMSE'] = RMSE
            Error_dic['TIME'] =iteration_end-iteration_start
            Error_df = pd.DataFrame.from_dict(Error_dic)
            Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors{threshold}.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_LIME=0.007 --host localhost --port=3013
%tensorboard --logdir=./02-Results/LSTM/logs_LIME=0.008 --host localhost --port=3014
%tensorboard --logdir=./02-Results/LSTM/logs_LIME=0.009 --host localhost --port=3015
%tensorboard --logdir=./02-Results/LSTM/logs_LIME=0.01 --host localhost --port=3016

## LSTM Instance SHAP-LSTM

In [None]:
if SHAP_insta == True:
    SHAP_inst_th = [0.05,0.1,0.15,0.2]
    for threshold in SHAP_inst_th:
        for store in tqdm(Rosmann_stores):        
            
            inst_SHAPLSTM = pd.read_csv(f'02-Results/FS/Store{store}/SHAP-LSTM/inst_SHAP_LSTM.csv')

            torch.manual_seed(42);
            np.random.seed(42);
            pl.seed_everything(42);  

            Rossmann_df = Rossmann[Rossmann['Store'] == store]
            folder_path2=f'Store{store}/instSHAP={threshold}'
            filename=f'Rossmann_instSHAP={threshold}_LSTM'
            logs=f'logs_instSHAP={threshold}'

            mk_dir(folder_path1,folder_path2)

            df = Rossmann_df.drop(columns  = 'Date')
            
            features = list(inst_SHAPLSTM['feature_name'].loc[inst_SHAPLSTM['max_shap_value'] > threshold])
            
            train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

            PredictionModel, Predictor = def_model()
            data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
            trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
            model = Predictor(n_features = len(train_df.columns))

            iteration_start = time.monotonic()

            trainer.fit(model, data_module)

            iteration_end = time.monotonic()

            Error_dic = {}
            MASE=[]
            RMSE=[]

            for i in checkpoints:
                predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
                predictions_descaled = descale_(predictions)
                csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
                csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}__epoch{i}.csv')
                mase,rmse=calculate_errors(predictions_descaled,df,train_size)
                MASE.append(mase)
                RMSE.append(rmse)

            Error_dic['MASE'] = MASE
            Error_dic['RMSE'] = RMSE
            Error_dic['TIME'] =iteration_end-iteration_start
            Error_df = pd.DataFrame.from_dict(Error_dic)
            Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors{threshold}.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_instSHAP=0.05 --host localhost --port=1017
%tensorboard --logdir=./02-Results/LSTM/logs_instSHAP=0.1 --host localhost --port=1018
%tensorboard --logdir=./02-Results/LSTM/logs_instSHAP=0.15 --host localhost --port=1019
%tensorboard --logdir=./02-Results/LSTM/logs_instSHAP=0.2 --host localhost --port=1020

## LSTM Average SHAP-LSTM

In [None]:
if SHAP_avrag == True:
    SHAP_avg_th = [0.01,0.02,0.03,0.04]
    for threshold in SHAP_avg_th:
        for store in tqdm(Rosmann_stores):        
            
            avg_SHAPLSTM = pd.read_csv(f'02-Results/FS/Store{store}/SHAP-LSTM/avg_SHAP_LSTM.csv')

            torch.manual_seed(42);
            np.random.seed(42);
            pl.seed_everything(42);  

            Rossmann_df = Rossmann[Rossmann['Store'] == store]
            folder_path2=f'Store{store}/avgSHAP={threshold}'
            filename=f'Rossmann_avgSHAP={threshold}_LSTM'
            logs=f'logs_avgSHAP={threshold}'

            mk_dir(folder_path1,folder_path2)

            df = Rossmann_df.drop(columns  = 'Date')
            
            features = list(avg_SHAPLSTM['Unnamed: 0'].loc[avg_SHAPLSTM['Importance'] > threshold])
            
            train_df,train_sequences,test_sequences,train_size,train_dataset,test_dataset,scaler = pep_proc_summarizer(features,df)

            PredictionModel, Predictor = def_model()
            data_module = DataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
            trainer = def_trainer(folder_path1,folder_path2,filename,logs,store)
            model = Predictor(n_features = len(train_df.columns))

            iteration_start = time.monotonic()

            trainer.fit(model, data_module)

            iteration_end = time.monotonic()

            Error_dic = {}
            MASE=[]
            RMSE=[]

            for i in checkpoints:
                predictions = recal_predict(folder_path1,folder_path2,filename,train_df.columns,i,test_dataset)
                predictions_descaled = descale_(predictions)
                csv_dataframe = pd.DataFrame(predictions_descaled, columns=[f'Rosmann{store}_epoch={i}'])
                csv_dataframe.to_csv(f'{folder_path1}/{folder_path2}/Forecast/Rossmann{store}__epoch{i}.csv')
                mase,rmse=calculate_errors(predictions_descaled,df,train_size)
                MASE.append(mase)
                RMSE.append(rmse)

            Error_dic['MASE'] = MASE
            Error_dic['RMSE'] = RMSE
            Error_dic['TIME'] =iteration_end-iteration_start
            Error_df = pd.DataFrame.from_dict(Error_dic)
            Error_df.to_csv(f'{folder_path1}/{folder_path2}/Errors{threshold}.csv')


In [None]:
%%capture
%tensorboard --logdir=./02-Results/LSTM/logs_avgSHAP=0.01 --host localhost --port=3021
%tensorboard --logdir=./02-Results/LSTM/logs_avgSHAP=0.02 --host localhost --port=3022
%tensorboard --logdir=./02-Results/LSTM/logs_avgSHAP=0.03 --host localhost --port=3023
%tensorboard --logdir=./02-Results/LSTM/logs_avgSHAP=0.04 --host localhost --port=3024

In [None]:
%tensorboard dev upload --logdir './02-Results/LSTM/logs_univariate' --one_shot