In [None]:
import seaborn as sns  #Visualization
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
import math
import matplotlib


import pandas as pd   #preprocessing
import numpy as np
from tqdm.notebook import tqdm
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
from sklearn.preprocessing import MinMaxScaler


import torch          #modelling
import torch.autograd as autograd
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from collections import defaultdict

from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error

from sklearn.metrics import mean_squared_error, mean_absolute_error
from IMV_LSTM import IMVTensorLSTM
from IMV_LSTM import IMVFullLSTM

from boruta import BorutaPy
from BorutaShap import BorutaShap

import shap
import shap.plots

import operator

import time

import os
from IPython.utils import io

import warnings
warnings.filterwarnings("ignore", ".*does not have many workers.*")
warnings.filterwarnings("ignore", category=DeprecationWarning) 
warnings.filterwarnings("ignore", category=UserWarning) 


def mean_absolute_scaled_error(y_true, y_pred, y_train):
    e_t = y_true - y_pred
    scale = mean_absolute_error(y_train[1:], y_train[:-1])
    return np.mean(np.abs(e_t / scale))

In [None]:
%matplotlib inline
%config InlineBackend.figure_format='retina'

sns.set(style='whitegrid',palette='muted',font_scale=1.2)

#HAPPY_COLORS_PALETTE = ['#01BEFE','#FFDD00','#FF7D00','#FF006D','#ADFF02','#8F00FF']

#sns.set_palette(sns.color_palette(HAPPY_COLORS_PALETTE))

rcParams['figure.figsize']=12,8

tqdm.pandas()

In [None]:
N_EPOCHS = 1001
BATCH_SIZE = 7
SEQUENCE_LENGTH = 15
#N_HIDDEN = 32
N_LAYERS = 1
PATIENCE = 50
LEARNING = 0.005

TGT = 'Sales'

univariate = True
all_features = True

corr_train = True

Boruta_GB = True
Boruta_RF = True

Boruta_SHAPGB = True
Boruta_SHAPRF = True

LIME_train = True

SHAP_insta = True
SHAP_avrag = True 

IMV_Tens = True 
IMV_Full = True 

In [None]:
torch.manual_seed(8)
np.random.seed(8)
pl.seed_everything(8);

In [None]:
def features_dataframe(df, corr):
    rows = []
    '''
    ['rec_id', 'Date', 'year', 'month', 'temp', 'atemp', 'humidity', 'windspeed', TGT,
    'season_1', 'season_2', 'season_3', 'season_4', 'is_workingday_0', 'is_workingday_1', 'is_holiday_0',
    'is_holiday_1', 'weather_condition_1', 'weather_condition_2', 'weather_condition_3', 'weekday_0',
    'weekday_1', 'weekday_2', 'weekday_3', 'weekday_4', 'weekday_5', 'weekday_6', 'day']
    
    '''

    for _, row in df.iterrows():
        row_data = dict(
            Sales = row.Sales,
        )
        for column in corr:
            row_data[column] = row[column]
            
        rows.append(row_data)
    
    features_df = pd.DataFrame(rows)

    return features_df

#spliits the data in test and train
def train_test_spliter(ratio,features_df ):
    train_size = int(len(features_df)-ratio)
    train_df, test_df = features_df[:train_size], features_df[train_size + 1:]

    return train_df, test_df, train_size

def data_scaler(train_df,test_df):
    scaler = MinMaxScaler(feature_range=(-1,1))
    scaler = scaler.fit(train_df)

    train_df = pd.DataFrame(
        scaler.transform(train_df),
        index = train_df.index,
        columns = train_df.columns
        )

    test_df = pd.DataFrame(
        scaler.transform(test_df),
        index = test_df.index,
        columns = test_df.columns
        )
    
    return train_df, test_df, scaler


def create_sequences (input_data:pd.DataFrame, target_column, sequence_length):
    sequences = []
    data_size = len(input_data)

    for i in (range(data_size - sequence_length)):

        sequence = input_data[i:i+sequence_length]

        label_position = i + sequence_length
        label = input_data.iloc[label_position][target_column]

        sequences.append((sequence,label))
    
    return sequences

def descale(descaler, values):
    values_2d=np.array(values)[:,np.newaxis]
    
    return descaler.inverse_transform(values_2d).flatten()

class Dataset(Dataset):

    def __init__(self,sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self,idx):
        sequence, label = self.sequences[idx]

        return dict(
            sequence = torch.Tensor(sequence.to_numpy()),
            label = torch.tensor(label).float()
        )

class SalesDataModule(pl.LightningDataModule):

    def __init__(self, train_seqeunces,test_sequences, batch_size=8):
        super().__init__()
        self.train_sequences = train_sequences
        self.test_sequences = test_sequences
        self.batch_size = batch_size

    def setup(self,stage=None):
        self.train_dataset = Dataset(self.train_sequences)
        self.test_dataset = Dataset(self.test_sequences)

    def train_dataloader(self):
        return DataLoader(
            self.train_dataset,
            batch_size = self.batch_size,
            shuffle = False,
            num_workers = 0
        )

    def val_dataloader(self):
        return DataLoader(
            self.test_dataset,
            batch_size = 1,
            shuffle = False,
            num_workers = 0
        )


In [None]:
hidden_values = [32]
Rossmann_df = pd.read_csv('Rossmann_treated.csv')
Rossmann_df = Rossmann_df[Rossmann_df['Store'] == 266]
Rossmann_df.drop(columns = 'Customers', inplace = True)

## Univariate treatment

In [None]:
univariate = True
if univariate == True:
    folder_path = 'Predictions/grid_search_uni'
    try:
        os.mkdir(f'{folder_path}')
    except:
        pass

    for N_HIDDEN in hidden_values:
        torch.manual_seed(8)
        np.random.seed(8)
        pl.seed_everything(8);
        iteration_start = time.monotonic()
        class SalesPredictionModel(nn.Module):
            def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                super().__init__()
                self.n_hidden = n_hidden

                self.lstm = nn.LSTM(
                    input_size = n_features,
                    hidden_size = n_hidden,
                    batch_first = True,
                    num_layers = n_layers,
                    dropout = 0.2
                )
                self.regressor = nn.Linear(n_hidden,1)

            def forward(self,x):
                self.lstm.flatten_parameters()

                _, (hidden, _) = self.lstm(x)
                out = hidden[-1]

                return self.regressor(out)


        class SalesPredictor(pl.LightningModule):

            def __init__(self, n_features: int):
                super().__init__()
                self.model=SalesPredictionModel(n_features)
                self.criterion = nn.MSELoss()

            def forward(self, x, labels= None):
                output = self.model(x)
                loss = 0
                if labels is not None:
                    loss = self.criterion(output, labels.unsqueeze(dim=1))
                return loss, output

            def training_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('train_loss', loss, prog_bar = True, logger=False)
                return loss

            def validation_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('val_loss', loss, prog_bar = True, logger=False)
                return loss

            def test_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('test_loss', loss, prog_bar = True, logger=False)
                return loss

            def configure_optimizers(self):
                return optim.AdamW(self.parameters(), lr = LEARNING)


        #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

        #to omit outputs
        #with io.capture_output() as captured:
        df = Rossmann_df.drop(columns  = 'Date')
        sales_dependencies = {}
        dic = {}

        #return all columns names ('features') except for customers, since it's not an available 
        #information for future points
        features_df = features_dataframe(df,[TGT]) 
        #returns dataframe with the features to be analised

        #split into test and train and minmaxscaler
        train_df, test_df, train_size =  train_test_spliter(105,features_df)
        train_df, test_df, scaler = data_scaler(train_df,test_df)
        #make sequences with the data
        train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
        test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

        #trains the model and store the most recent checkpoints removing previous ones if existing
        data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        data_module.setup()
        train_dataset = Dataset(train_sequences)
        test_dataset = Dataset(test_sequences)
        model = SalesPredictor(n_features = train_df.shape[1])



        try:
            os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}.ckpt")
            #pass
        except:
            pass

        checkpoint_callback = ModelCheckpoint(
            dirpath = f'{folder_path}/Checkpoints',
            filename = f'Rossmann_LSTM_hidden{N_HIDDEN}',
            save_top_k = 1,
            verbose = False ,
            monitor = 'val_loss',
            mode = 'min'
        )
        logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
        early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
        trainer = pl.Trainer(
            logger = logger,
            callbacks=[early_stopping_callback, checkpoint_callback],
            max_epochs = N_EPOCHS,
            gpus = 0,
        )
        trainer.fit(model, data_module)

        #load the best model from checkpoint
        trained_model = SalesPredictor.load_from_checkpoint(
        f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}.ckpt',
        n_features = train_df.shape[1]
        )

        predictions = []
        labels = []

        for item in test_dataset:
            sequence = item['sequence']
            label = item['label']

            if len(predictions) > SEQUENCE_LENGTH:
                for j in range(SEQUENCE_LENGTH):
                    sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
            else: 
                for j in range(len(predictions)):
                    sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

            _,output = trained_model(sequence.unsqueeze(dim=0))
            predictions.append(output.item())
            labels.append(label.item())


        descaler = MinMaxScaler()
        descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

        predictions_descaled = descale(descaler,predictions)
        labels_descaled = descale(descaler,labels)

        test_data = df[train_size+1:]
        test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

        dates = matplotlib.dates.date2num(Rossmann_df.Date.iloc[-len(predictions_descaled):])
        full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

        predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)

        dic= {}
        dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
        pred_df = pd.DataFrame.from_dict(dic)
        pred_df = pred_df.shift(-1)


        dic = {}
        dic[f'store_pred_dates'] = dates
        dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
        truth_df = pd.DataFrame.from_dict(dic)


        if N_HIDDEN != hidden_values[0]:
            predictions_df = predictions_df.join(pred_df, how = 'left')
        else:
            truth_df.reset_index(inplace = True)
            truth_df.drop('index', axis=1)
            predictions_df = truth_df
            predictions_df = predictions_df.join(pred_df, how = 'left')
            predictions_df = predictions_df.iloc[:-1]

        display(predictions_df)

        dic = {}
        dic[f'store_truth'] = Rossmann_df[TGT]
        dic[f'store_truth_dates'] = full_dates
        truth_df = pd.DataFrame.from_dict(dic)

        dic= {}
        dic[f'store_pred'] = predictions_descaled
        dic[f'store_pred_dates'] = dates
        prediction_df = pd.DataFrame.from_dict(dic)
        prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(truth_df.iloc[-2*len(prediction_df):,1],truth_df.iloc[-2*len(prediction_df):,0],'-', label='Truth')
        plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
        plt.legend()
        plt.show();

        print('mean absolute scaled error: ')
        print(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))
        print('\n','mean squared error: ')
        print(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))
        print('\n','root mean squared error: ')
        print((mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))**(1/2)) 
        iteration_end = time.monotonic()
        print('\n',"Iteration time: ", iteration_end - iteration_start)
    predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden{N_HIDDEN}.csv')

## Multivariate treatment

#### All features

In [None]:
if all_features == True:

    folder_path = 'Predictions/grid_search_allfeatures'

    try:
        os.mkdir(f'{folder_path}')
    except:
        pass

    mean_abs_allf = []
    mean_sqr_allf = []
    time_allf = []

    for N_HIDDEN in hidden_values:        
        torch.manual_seed(8)
        np.random.seed(8)
        pl.seed_everything(8);
        
        iteration_start = time.monotonic()
        class SalesPredictionModel(nn.Module):
            def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                super().__init__()
                self.n_hidden = n_hidden

                self.lstm = nn.LSTM(
                    input_size = n_features,
                    hidden_size = n_hidden,
                    batch_first = True,
                    num_layers = n_layers,
                    dropout = 0.2
                )
                self.regressor = nn.Linear(n_hidden,1)

            def forward(self,x):
                self.lstm.flatten_parameters()

                _, (hidden, _) = self.lstm(x)
                out = hidden[-1]

                return self.regressor(out)


        class SalesPredictor(pl.LightningModule):

            def __init__(self, n_features: int):
                super().__init__()
                self.model=SalesPredictionModel(n_features)
                self.criterion = nn.MSELoss()

            def forward(self, x, labels= None):
                output = self.model(x)
                loss = 0
                if labels is not None:
                    loss = self.criterion(output, labels.unsqueeze(dim=1))
                return loss, output

            def training_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('train_loss', loss, prog_bar = True, logger=False)
                return loss

            def validation_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('val_loss', loss, prog_bar = True, logger=False)
                return loss

            def test_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('test_loss', loss, prog_bar = True, logger=False)
                return loss

            def configure_optimizers(self):
                return optim.AdamW(self.parameters(), lr = LEARNING)


        #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

        #to omit outputs
        #with io.capture_output() as captured:
        df = Rossmann_df.drop(columns  = 'Date')
        sales_dependencies = {}
        dic = {}

        #return all columns names ('features') except for customers, since it's not an available 
        #information for future points
        features_df = features_dataframe(df,list(df.columns)) 
        #returns dataframe with the features to be analised

        #split into test and train and minmaxscaler
        train_df, test_df, train_size =  train_test_spliter(105,features_df)
        train_df, test_df, scaler = data_scaler(train_df,test_df)
        #make sequences with the data
        train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
        test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

        #trains the model and store the most recent checkpoints removing previous ones if existing
        data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        data_module.setup()
        train_dataset = Dataset(train_sequences)
        test_dataset = Dataset(test_sequences)
        model = SalesPredictor(n_features = train_df.shape[1])



        try:
            os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}.ckpt")
            #pass
        except:
            pass

        checkpoint_callback = ModelCheckpoint(
            dirpath = f'{folder_path}/Checkpoints',
            filename = f'Rossmann_LSTM_hidden{N_HIDDEN}',
            save_top_k = 1,
            verbose = False ,
            monitor = 'val_loss',
            mode = 'min'
        )
        logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
        early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
        trainer = pl.Trainer(
            logger = logger,
            callbacks=[early_stopping_callback, checkpoint_callback],
            max_epochs = N_EPOCHS,
            gpus = 0,
        )
        trainer.fit(model, data_module)

        #load the best model from checkpoint
        trained_model = SalesPredictor.load_from_checkpoint(
        f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}.ckpt',
        n_features = train_df.shape[1]
        )

        predictions = []
        labels = []

        for item in test_dataset:
            sequence = item['sequence']
            label = item['label']

            if len(predictions) > SEQUENCE_LENGTH:
                for j in range(SEQUENCE_LENGTH):
                    sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
            else: 
                for j in range(len(predictions)):
                    sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

            _,output = trained_model(sequence.unsqueeze(dim=0))
            predictions.append(output.item())
            labels.append(label.item())



        descaler = MinMaxScaler()
        descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

        predictions_descaled = descale(descaler,predictions)
        labels_descaled = descale(descaler,labels)

        test_data = df[train_size+1:]
        test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

        dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
        full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

        predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)

        dic= {}
        dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
        pred_df = pd.DataFrame.from_dict(dic)
        pred_df = pred_df.shift(-1)


        dic = {}
        dic[f'store_pred_dates'] = dates
        dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
        truth_df = pd.DataFrame.from_dict(dic)


        if N_HIDDEN != hidden_values[0]:
            predictions_df = predictions_df.join(pred_df, how = 'left')
        else:
            truth_df.reset_index(inplace = True)
            truth_df.drop('index', axis=1)
            predictions_df = truth_df
            predictions_df = predictions_df.join(pred_df, how = 'left')
            predictions_df = predictions_df.iloc[:-1]

        #display(predictions_df)
        
        dic = {}
        dic[f'store_truth'] = Rossmann_df[TGT]
        dic[f'store_truth_dates'] = full_dates
        truth_df = pd.DataFrame.from_dict(dic)

        dic= {}
        dic[f'store_pred'] = predictions_descaled
        dic[f'store_pred_dates'] = dates
        prediction_df = pd.DataFrame.from_dict(dic)
        prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
        plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
        plt.legend()
        plt.show();

        print('mean absolute scaled error: ')
        print(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))
        print('\n','mean squared error: ')
        print(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))
        print('\n','root mean squared error: ')
        print((mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))**(1/2)) 

        iteration_end = time.monotonic()
        print('\n',"Iteration time: ", iteration_end - iteration_start)

        mean_abs_allf.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))

        mean_sqr_allf.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))

        time_allf.append(iteration_end - iteration_start)

    predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden{N_HIDDEN}.csv')

    %store mean_abs_allf
    %store mean_sqr_allf
    %store time_allf
    
    print(mean_abs_allf)
    print(mean_sqr_allf)
    print(time_allf)
    
else:
    %store -r mean_abs_allf
    %store -r mean_sqr_allf
    %store -r time_allf
    
    print(mean_abs_allf)
    print(mean_sqr_allf)
    print(time_allf)

In [None]:
folder_path = 'Predictions/grid_search_corr'

if corr_train == True:
    corr_feat = pd.read_csv('Predictions/Single_Run/correlation_df.csv')
    corr_feat.drop(columns = 'all_features', inplace = True)
    corr_dic = {}

    try:
        os.mkdir(f'{folder_path}')
    except:
        pass

    for corr_col in tqdm(corr_feat.columns):
        
        mean_abs_corrf = []
        mean_sqr_corrf = []
        time_corrf = []
        
        for N_HIDDEN in hidden_values:
            torch.manual_seed(8)
            np.random.seed(8)
            pl.seed_everything(8);
        
            iteration_start = time.monotonic()
            
            hidden_dic = {}

            class SalesPredictionModel(nn.Module):
                def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                    super().__init__()
                    self.n_hidden = n_hidden

                    self.lstm = nn.LSTM(
                        input_size = n_features,
                        hidden_size = n_hidden,
                        batch_first = True,
                        num_layers = n_layers,
                        dropout = 0.2
                    )
                    self.regressor = nn.Linear(n_hidden,1)

                def forward(self,x):
                    self.lstm.flatten_parameters()

                    _, (hidden, _) = self.lstm(x)
                    out = hidden[-1]

                    return self.regressor(out)


            class SalesPredictor(pl.LightningModule):

                def __init__(self, n_features: int):
                    super().__init__()
                    self.model=SalesPredictionModel(n_features)
                    self.criterion = nn.MSELoss()

                def forward(self, x, labels= None):
                    output = self.model(x)
                    loss = 0
                    if labels is not None:
                        loss = self.criterion(output, labels.unsqueeze(dim=1))
                    return loss, output

                def training_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('train_loss', loss, prog_bar = True, logger=False)
                    return loss

                def validation_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('val_loss', loss, prog_bar = True, logger=False)
                    return loss

                def test_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('test_loss', loss, prog_bar = True, logger=False)
                    return loss

                def configure_optimizers(self):
                    return optim.AdamW(self.parameters(), lr = LEARNING)


            #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

            #to omit outputs
            #with io.capture_output() as captured:
            df = Rossmann_df.drop(columns  = 'Date')
    
            features = list(corr_feat[corr_col].fillna(0))

            features = [x for x in features if x != 0]

            #return all columns names ('features') except for customers, since it's not an available 
            #information for future points
            features_df = features_dataframe(df,features) 
            #returns dataframe with the features to be analised

            #split into test and train and minmaxscaler
            train_df, test_df, train_size =  train_test_spliter(105,features_df)
            train_df, test_df, scaler = data_scaler(train_df,test_df)
            #make sequences with the data
            train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
            test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

            #trains the model and store the most recent checkpoints removing previous ones if existing
            data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
            data_module.setup()
            train_dataset = Dataset(train_sequences)
            test_dataset = Dataset(test_sequences)
            model = SalesPredictor(n_features = train_df.shape[1])



            try:
                os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_corr{corr_col}.ckpt")
            except:
                pass

            checkpoint_callback = ModelCheckpoint(
                dirpath = f'{folder_path}/Checkpoints',
                filename = f'Rossmann_LSTM_hidden{N_HIDDEN}_corr{corr_col}',
                save_top_k = 1,
                verbose = False ,
                monitor = 'val_loss',
                mode = 'min'
            )
            logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
            early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
            trainer = pl.Trainer(
                logger = logger,
                callbacks=[early_stopping_callback, checkpoint_callback],
                max_epochs = N_EPOCHS,
                gpus = 0,
            )
            trainer.fit(model, data_module)

            #load the best model from checkpoint
            trained_model = SalesPredictor.load_from_checkpoint(
            f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_corr{corr_col}.ckpt',
            n_features = train_df.shape[1]
            )

            predictions = []
            labels = []

            for item in test_dataset:
                sequence = item['sequence']
                label = item['label']

                if len(predictions) > SEQUENCE_LENGTH:
                    for j in range(SEQUENCE_LENGTH):
                        sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
                else: 
                    for j in range(len(predictions)):
                        sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

                _,output = trained_model(sequence.unsqueeze(dim=0))
                predictions.append(output.item())
                labels.append(label.item())



            descaler = MinMaxScaler()
            descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

            predictions_descaled = descale(descaler,predictions)
            labels_descaled = descale(descaler,labels)

            test_data = df[train_size+1:]
            test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

            dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date)
            full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())
            
            dic= {}
            dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
            pred_df = pd.DataFrame.from_dict(dic)
            pred_df = pred_df.shift(-1)


            dic = {}
            dic[f'store_pred_dates'] = dates
            dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
            truth_df = pd.DataFrame.from_dict(dic)
            
            if N_HIDDEN != hidden_values[0]:
                predictions_df = predictions_df.join(pred_df, how = 'left')
            else:
                truth_df.reset_index(inplace = True)
                truth_df.drop('index', axis=1)
                predictions_df = truth_df
                predictions_df = predictions_df.join(pred_df, how = 'left')
                predictions_df = predictions_df.iloc[:-1]

            display(predictions_df)
            
            dic = {}
            dic[f'store_truth'] = Rossmann_df[TGT]
            dic[f'store_truth_dates'] = full_dates
            truth_df = pd.DataFrame.from_dict(dic)
            
            dic= {}
            dic[f'store_pred'] = predictions_descaled
            dic[f'store_pred_dates'] = dates
            prediction_df = pd.DataFrame.from_dict(dic)
            prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
            
            plt.figure(figsize=(21, 7))
            plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
            plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
            plt.legend()
            plt.show();
            
            iteration_end = time.monotonic()
            
            predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)

            mean_abs_corrf.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                             predictions_df[f'store_pred_{N_HIDDEN}'],
                                             truth_df.iloc[:-len(prediction_df),0]))

            mean_sqr_corrf.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                             predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

            time_corrf.append(iteration_end - iteration_start)
            
            hidden_dic['mase'] = mean_abs_corrf
            hidden_dic['mse']  = mean_sqr_corrf
            hidden_dic['time'] = time_corrf
            
            print(corr_col)
            print(N_HIDDEN)
            print(hidden_dic)
            
        predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_correlation.csv')
        corr_dic[corr_col] = hidden_dic
    %store corr_dic
else:
    %store -r corr_dic
    corr_feat = pd.read_csv('Predictions/Single_Run/correlation_df.csv')

    for i in corr_feat.columns[1:]:
        plt.figure(figsize=(21, 7))
        predictions_corr = pd.read_csv(f'{folder_path}/Rossmann_LSTM_{i}.csv')
        plt.plot_date(predictions_corr['store_pred_dates'],
                          predictions_corr['store_truth'],'--', label='Truth')
        for N in hidden_values[:3]:
            #display(predictions_corr)

            plt.plot_date(predictions_corr['store_pred_dates'],
                      predictions_corr[f'store_pred_{N}'],'-', label=f'{i},_Hidden_size_=_{N}')
            
            mase = mean_absolute_scaled_error(predictions_corr['store_pred_dates'],
                                                    predictions_corr[f'store_pred_{N}'],
                                                     Rossmann_df[TGT].iloc[:-len(predictions_corr)])

            mse = (mean_squared_error(predictions_corr['store_pred_dates'],
                                            predictions_corr[f'store_pred_{N}']))
            print(f'{i},_Hidden_size_=_{N} mase: ', mase, ' rmse: ', (mse)**(1/2))
        plt.legend()
        plt.show()
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(predictions_corr['store_pred_dates'],
                      predictions_corr['store_truth'],'--', label='Truth')
        for N in hidden_values[3:]:
            #display(predictions_corr)

            plt.plot_date(predictions_corr['store_pred_dates'],
                      predictions_corr[f'store_pred_{N}'],'-', label=f'{i},_Hidden_size_=_{N}')
            
            mase = mean_absolute_scaled_error(predictions_corr['store_pred_dates'],
                                                    predictions_corr[f'store_pred_{N}'],
                                                     Rossmann_df[TGT].iloc[:-len(predictions_corr)])

            mse = (mean_squared_error(predictions_corr['store_pred_dates'],
                                            predictions_corr[f'store_pred_{N}']))
            print(f'{i},_Hidden_size_=_{N} mase: ', mase, ' rmse: ', (mse)**(1/2))
        plt.legend()  
        plt.show()



In [None]:
if Boruta_GB == True:
    folder_path = 'Predictions/grid_search_BoGB'
    BoGB_feat = pd.read_csv('Predictions/Single_Run/borutaGB_df.csv')
    
    try:
        os.mkdir(f'{folder_path}')
    except:
        pass
    
    mean_abs_BoGB = []
    mean_sqr_BoGB = []
    time_BoGB = []

    for N_HIDDEN in hidden_values:
        torch.manual_seed(8)
        np.random.seed(8)
        pl.seed_everything(8);
        
        iteration_start = time.monotonic()

        class SalesPredictionModel(nn.Module):
            def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                super().__init__()
                self.n_hidden = n_hidden

                self.lstm = nn.LSTM(
                    input_size = n_features,
                    hidden_size = n_hidden,
                    batch_first = True,
                    num_layers = n_layers,
                    dropout = 0.2
                )
                self.regressor = nn.Linear(n_hidden,1)

            def forward(self,x):
                self.lstm.flatten_parameters()

                _, (hidden, _) = self.lstm(x)
                out = hidden[-1]

                return self.regressor(out)


        class SalesPredictor(pl.LightningModule):

            def __init__(self, n_features: int):
                super().__init__()
                self.model=SalesPredictionModel(n_features)
                self.criterion = nn.MSELoss()

            def forward(self, x, labels= None):
                output = self.model(x)
                loss = 0
                if labels is not None:
                    loss = self.criterion(output, labels.unsqueeze(dim=1))
                return loss, output

            def training_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('train_loss', loss, prog_bar = True, logger=False)
                return loss

            def validation_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('val_loss', loss, prog_bar = True, logger=False)
                return loss

            def test_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('test_loss', loss, prog_bar = True, logger=False)
                return loss

            def configure_optimizers(self):
                return optim.AdamW(self.parameters(), lr = LEARNING)


        #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

        #to omit outputs
        #with io.capture_output() as captured:
        df = Rossmann_df.drop(columns  = 'Date')

        features = list(BoGB_feat['important'].fillna(0))

        features = [x for x in features if x != 0]
        
        #return all columns names ('features') except for customers, since it's not an available 
        #information for future points
        features_df = features_dataframe(df,features) 
        #returns dataframe with the features to be analised

        #split into test and train and minmaxscaler
        train_df, test_df, train_size =  train_test_spliter(105,features_df)
        train_df, test_df, scaler = data_scaler(train_df,test_df)
        #make sequences with the data
        train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
        test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

        #trains the model and store the most recent checkpoints removing previous ones if existing
        data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        data_module.setup()
        train_dataset = Dataset(train_sequences)
        test_dataset = Dataset(test_sequences)
        model = SalesPredictor(n_features = train_df.shape[1])



        try:
            os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_GB.ckpt")
        except:
            pass

        checkpoint_callback = ModelCheckpoint(
            dirpath = f'{folder_path}/Checkpoints',
            filename = f'Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_GB',
            save_top_k = 1,
            verbose = False ,
            monitor = 'val_loss',
            mode = 'min'
        )
        logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
        early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
        trainer = pl.Trainer(
            logger = logger,
            callbacks=[early_stopping_callback, checkpoint_callback],
            max_epochs = N_EPOCHS,
            gpus = 0,
        )
        trainer.fit(model, data_module)

        #load the best model from checkpoint
        trained_model = SalesPredictor.load_from_checkpoint(
        f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_GB.ckpt',
        n_features = train_df.shape[1]
        )

        predictions = []
        labels = []

        for item in test_dataset:
            sequence = item['sequence']
            label = item['label']

            if len(predictions) > SEQUENCE_LENGTH:
                for j in range(SEQUENCE_LENGTH):
                    sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
            else: 
                for j in range(len(predictions)):
                    sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

            _,output = trained_model(sequence.unsqueeze(dim=0))
            predictions.append(output.item())
            labels.append(label.item())



        descaler = MinMaxScaler()
        descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

        predictions_descaled = descale(descaler,predictions)
        labels_descaled = descale(descaler,labels)

        test_data = df[train_size+1:]
        test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

        dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].datetime.tolist())
        full_dates = matplotlib.dates.date2num(Rossmann_df.datetime.tolist())

        predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)


        dic= {}
        dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
        pred_df = pd.DataFrame.from_dict(dic)
        pred_df = pred_df.shift(-1)


        dic = {}
        dic[f'store_pred_dates'] = dates
        dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
        truth_df = pd.DataFrame.from_dict(dic)


        if N_HIDDEN != hidden_values[0]:
            predictions_df = predictions_df.join(pred_df, how = 'left')
        else:
            truth_df.reset_index(inplace = True)
            truth_df.drop('index', axis=1)
            predictions_df = truth_df
            predictions_df = predictions_df.join(pred_df, how = 'left')
            predictions_df = predictions_df.iloc[:-1]

        #display(predictions_df)
        
        dic = {}
        dic[f'store_truth'] = Rossmann_df[TGT]
        dic[f'store_truth_dates'] = full_dates
        truth_df = pd.DataFrame.from_dict(dic)

        dic= {}
        dic[f'store_pred'] = predictions_descaled
        dic[f'store_pred_dates'] = dates
        prediction_df = pd.DataFrame.from_dict(dic)
        prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
        plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
        plt.legend()
        plt.show();

        print('mean absolute scaled error: ')
        print(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))
        print('\n','mean squared error: ')
        print(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))
        print('\n','root mean squared error: ')
        print((mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))**(1/2)) 

        iteration_end = time.monotonic()
        print('\n',"Iteration time: ", iteration_end - iteration_start)

        mean_abs_BoGB.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))

        mean_sqr_BoGB.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

        time_BoGB.append(iteration_end - iteration_start)

    predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden_BoGB.csv')

    %store mean_abs_BoGB
    %store mean_sqr_BoGB
    %store time_BoGB
 
else:
    %store -r mean_abs_BoGB
    %store -r mean_sqr_BoGB
    %store -r time_BoGB
    
    print(mean_abs_BoGB)
    print(mean_sqr_BoGB)
    print(time_BoGB)

In [None]:
if Boruta_RF == True:
    folder_path = 'Predictions/grid_search_BoRF'
    BoRF_feat = pd.read_csv('Predictions/Single_Run/borutaRF_df.csv')
    
    try:
        os.mkdir(f'{folder_path}')
    except:
        pass
    
    mean_abs_BoRF = []
    mean_sqr_BoRF = []
    time_BoRF = []

    for N_HIDDEN in hidden_values:
        torch.manual_seed(8)
        np.random.seed(8)
        pl.seed_everything(8);
        iteration_start = time.monotonic()
        
        class SalesPredictionModel(nn.Module):
            def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                super().__init__()
                self.n_hidden = n_hidden

                self.lstm = nn.LSTM(
                    input_size = n_features,
                    hidden_size = n_hidden,
                    batch_first = True,
                    num_layers = n_layers,
                    dropout = 0.2
                )
                self.regressor = nn.Linear(n_hidden,1)

            def forward(self,x):
                self.lstm.flatten_parameters()

                _, (hidden, _) = self.lstm(x)
                out = hidden[-1]

                return self.regressor(out)


        class SalesPredictor(pl.LightningModule):

            def __init__(self, n_features: int):
                super().__init__()
                self.model=SalesPredictionModel(n_features)
                self.criterion = nn.MSELoss()

            def forward(self, x, labels= None):
                output = self.model(x)
                loss = 0
                if labels is not None:
                    loss = self.criterion(output, labels.unsqueeze(dim=1))
                return loss, output

            def training_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('train_loss', loss, prog_bar = True, logger=False)
                return loss

            def validation_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('val_loss', loss, prog_bar = True, logger=False)
                return loss

            def test_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('test_loss', loss, prog_bar = True, logger=False)
                return loss

            def configure_optimizers(self):
                return optim.AdamW(self.parameters(), lr = LEARNING)


        #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

        #to omit outputs
        #with io.capture_output() as captured:
        df = Rossmann_df.drop(columns  = 'Date')

        features = list(BoRF_feat['important'].fillna(0))

        features = [x for x in features if x != 0]

        #return all columns names ('features') except for customers, since it's not an available 
        #information for future points
        features_df = features_dataframe(df,features) 
        #returns dataframe with the features to be analised

        #split into test and train and minmaxscaler
        train_df, test_df, train_size =  train_test_spliter(105,features_df)
        train_df, test_df, scaler = data_scaler(train_df,test_df)
        #make sequences with the data
        train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
        test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

        #trains the model and store the most recent checkpoints removing previous ones if existing
        data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        data_module.setup()
        train_dataset = Dataset(train_sequences)
        test_dataset = Dataset(test_sequences)
        model = SalesPredictor(n_features = train_df.shape[1])



        try:
            os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_RF.ckpt")
        except:
            pass

        checkpoint_callback = ModelCheckpoint(
            dirpath = f'{folder_path}/Checkpoints',
            filename = f'Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_RF',
            save_top_k = 1,
            verbose = False ,
            monitor = 'val_loss',
            mode = 'min'
        )
        logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
        early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
        trainer = pl.Trainer(
            logger = logger,
            callbacks=[early_stopping_callback, checkpoint_callback],
            max_epochs = N_EPOCHS,
            gpus = 0,
        )
        trainer.fit(model, data_module)

        #load the best model from checkpoint
        trained_model = SalesPredictor.load_from_checkpoint(
        f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_RF.ckpt',
        n_features = train_df.shape[1]
        )

        predictions = []
        labels = []

        for item in test_dataset:
            sequence = item['sequence']
            label = item['label']

            if len(predictions) > SEQUENCE_LENGTH:
                for j in range(SEQUENCE_LENGTH):
                    sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
            else: 
                for j in range(len(predictions)):
                    sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

            _,output = trained_model(sequence.unsqueeze(dim=0))
            predictions.append(output.item())
            labels.append(label.item())



        descaler = MinMaxScaler()
        descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

        predictions_descaled = descale(descaler,predictions)
        labels_descaled = descale(descaler,labels)

        test_data = df[train_size+1:]
        test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

        dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
        full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

        predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)


        dic= {}
        dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
        pred_df = pd.DataFrame.from_dict(dic)
        pred_df = pred_df.shift(-1)


        dic = {}
        dic[f'store_pred_dates'] = dates
        dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
        truth_df = pd.DataFrame.from_dict(dic)


        if N_HIDDEN != hidden_values[0]:
            predictions_df = predictions_df.join(pred_df, how = 'left')
        else:
            truth_df.reset_index(inplace = True)
            truth_df.drop('index', axis=1)
            predictions_df = truth_df
            predictions_df = predictions_df.join(pred_df, how = 'left')
            predictions_df = predictions_df.iloc[:-1]

        #display(predictions_df)
        
        dic = {}
        dic[f'store_truth'] = Rossmann_df[TGT]
        dic[f'store_truth_dates'] = full_dates
        truth_df = pd.DataFrame.from_dict(dic)

        dic= {}
        dic[f'store_pred'] = predictions_descaled
        dic[f'store_pred_dates'] = dates
        prediction_df = pd.DataFrame.from_dict(dic)
        prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
        plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
        plt.legend()
        plt.show();

        print('mean absolute scaled error: ')
        print(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))
        print('\n','mean squared error: ')
        print(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))
        print('\n','root mean squared error: ')
        print((mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))**(1/2)) 

        iteration_end = time.monotonic()
        print('\n',"Iteration time: ", iteration_end - iteration_start)

        mean_abs_BoRF.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))

        mean_sqr_BoRF.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

        time_BoRF.append(iteration_end - iteration_start)

    predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden_BoRF.csv')

    %store mean_abs_BoRF
    %store mean_sqr_BoRF
    %store time_BoRF

else:
    %store -r mean_abs_BoRF
    %store -r mean_sqr_BoRF
    %store -r time_BoRF
    
    print(mean_abs_BoRF)
    print(mean_sqr_BoRF)
    print(time_BoRF)

In [None]:
if Boruta_SHAPGB == True:
    folder_path = 'Predictions/grid_search_BoSHAPGB'
    BoSHAPGB_feat = pd.read_csv('Predictions/Single_Run/borutaSHAPGB_df.csv')
    
    try:
        os.mkdir(f'{folder_path}')
    except:
        pass
    
    mean_abs_BoSHAPGB = []
    mean_sqr_BoSHAPGB = []
    time_BoSHAPGB = []

    for N_HIDDEN in hidden_values:
        torch.manual_seed(8)
        np.random.seed(8)
        pl.seed_everything(8);
        iteration_start = time.monotonic()

        class SalesPredictionModel(nn.Module):
            def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                super().__init__()
                self.n_hidden = n_hidden

                self.lstm = nn.LSTM(
                    input_size = n_features,
                    hidden_size = n_hidden,
                    batch_first = True,
                    num_layers = n_layers,
                    dropout = 0.2
                )
                self.regressor = nn.Linear(n_hidden,1)

            def forward(self,x):
                self.lstm.flatten_parameters()

                _, (hidden, _) = self.lstm(x)
                out = hidden[-1]

                return self.regressor(out)


        class SalesPredictor(pl.LightningModule):

            def __init__(self, n_features: int):
                super().__init__()
                self.model=SalesPredictionModel(n_features)
                self.criterion = nn.MSELoss()

            def forward(self, x, labels= None):
                output = self.model(x)
                loss = 0
                if labels is not None:
                    loss = self.criterion(output, labels.unsqueeze(dim=1))
                return loss, output

            def training_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('train_loss', loss, prog_bar = True, logger=False)
                return loss

            def validation_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('val_loss', loss, prog_bar = True, logger=False)
                return loss

            def test_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('test_loss', loss, prog_bar = True, logger=False)
                return loss

            def configure_optimizers(self):
                return optim.AdamW(self.parameters(), lr = LEARNING)


        #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

        #to omit outputs
        #with io.capture_output() as captured:
        df = Rossmann_df.drop(columns  = 'Date')

        features = list(BoSHAPGB_feat['important'].fillna(0))

        features = [x for x in features if x != 0]

        
        #return all columns names ('features') except for customers, since it's not an available 
        #information for future points
        features_df = features_dataframe(df,features) 
        #returns dataframe with the features to be analised

        #split into test and train and minmaxscaler
        train_df, test_df, train_size =  train_test_spliter(105,features_df)
        train_df, test_df, scaler = data_scaler(train_df,test_df)
        #make sequences with the data
        train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
        test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

        #trains the model and store the most recent checkpoints removing previous ones if existing
        data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        data_module.setup()
        train_dataset = Dataset(train_sequences)
        test_dataset = Dataset(test_sequences)
        model = SalesPredictor(n_features = train_df.shape[1])



        try:
            os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_SHAPGB.ckpt")
        except:
            pass

        checkpoint_callback = ModelCheckpoint(
            dirpath = f'{folder_path}/Checkpoints',
            filename = f'Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_SHAPGB',
            save_top_k = 1,
            verbose = False ,
            monitor = 'val_loss',
            mode = 'min'
        )
        logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
        early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
        trainer = pl.Trainer(
            logger = logger,
            callbacks=[early_stopping_callback, checkpoint_callback],
            max_epochs = N_EPOCHS,
            gpus = 0,
        )
        trainer.fit(model, data_module)

        #load the best model from checkpoint
        trained_model = SalesPredictor.load_from_checkpoint(
        f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_SHAPGB.ckpt',
        n_features = train_df.shape[1]
        )

        predictions = []
        labels = []

        for item in test_dataset:
            sequence = item['sequence']
            label = item['label']

            if len(predictions) > SEQUENCE_LENGTH:
                for j in range(SEQUENCE_LENGTH):
                    sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
            else: 
                for j in range(len(predictions)):
                    sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

            _,output = trained_model(sequence.unsqueeze(dim=0))
            predictions.append(output.item())
            labels.append(label.item())



        descaler = MinMaxScaler()
        descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

        predictions_descaled = descale(descaler,predictions)
        labels_descaled = descale(descaler,labels)

        test_data = df[train_size+1:]
        test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

        dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
        full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

        predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)


        dic= {}
        dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
        pred_df = pd.DataFrame.from_dict(dic)
        pred_df = pred_df.shift(-1)


        dic = {}
        dic[f'store_pred_dates'] = dates
        dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
        truth_df = pd.DataFrame.from_dict(dic)


        if N_HIDDEN != hidden_values[0]:
            predictions_df = predictions_df.join(pred_df, how = 'left')
        else:
            truth_df.reset_index(inplace = True)
            truth_df.drop('index', axis=1)
            predictions_df = truth_df
            predictions_df = predictions_df.join(pred_df, how = 'left')
            predictions_df = predictions_df.iloc[:-1]

        #display(predictions_df)
        
        dic = {}
        dic[f'store_truth'] = Rossmann_df[TGT]
        dic[f'store_truth_dates'] = full_dates
        truth_df = pd.DataFrame.from_dict(dic)

        dic= {}
        dic[f'store_pred'] = predictions_descaled
        dic[f'store_pred_dates'] = dates
        prediction_df = pd.DataFrame.from_dict(dic)
        prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
        plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
        plt.legend()
        plt.show();

        print('mean absolute scaled error: ')
        print(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))
        print('\n','mean squared error: ')
        print(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))
        print('\n','root mean squared error: ')
        print((mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))**(1/2)) 

        iteration_end = time.monotonic()
        print('\n',"Iteration time: ", iteration_end - iteration_start)

        mean_abs_BoSHAPGB.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))

        mean_sqr_BoSHAPGB.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

        time_BoSHAPGB.append(iteration_end - iteration_start)

    predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden_BoSHAPGB.csv')

    %store mean_abs_BoSHAPGB
    %store mean_sqr_BoSHAPGB
    %store time_BoSHAPGB

else:
    %store -r mean_abs_BoSHAPGB
    %store -r mean_sqr_BoSHAPGB
    %store -r time_BoSHAPGB
    
    print(mean_abs_BoSHAPGB)
    print(mean_sqr_BoSHAPGB)
    print(time_BoSHAPGB)

In [None]:
if Boruta_SHAPRF == True:
    folder_path = 'Predictions/grid_search_BoSHAPRF'
    BoSHAPRF_feat = pd.read_csv('Predictions/Single_Run/borutaSHAPRF_df.csv')
    
    try:
        os.mkdir(f'{folder_path}')
    except:
        pass
    
    mean_abs_BoSHAPRF = []
    mean_sqr_BoSHAPRF = []
    time_BoSHAPRF = []

    for N_HIDDEN in hidden_values:
        torch.manual_seed(8)
        np.random.seed(8)
        pl.seed_everything(8);
        iteration_start = time.monotonic()
        
        class SalesPredictionModel(nn.Module):
            def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                super().__init__()
                self.n_hidden = n_hidden

                self.lstm = nn.LSTM(
                    input_size = n_features,
                    hidden_size = n_hidden,
                    batch_first = True,
                    num_layers = n_layers,
                    dropout = 0.2
                )
                self.regressor = nn.Linear(n_hidden,1)

            def forward(self,x):
                self.lstm.flatten_parameters()

                _, (hidden, _) = self.lstm(x)
                out = hidden[-1]

                return self.regressor(out)


        class SalesPredictor(pl.LightningModule):

            def __init__(self, n_features: int):
                super().__init__()
                self.model=SalesPredictionModel(n_features)
                self.criterion = nn.MSELoss()

            def forward(self, x, labels= None):
                output = self.model(x)
                loss = 0
                if labels is not None:
                    loss = self.criterion(output, labels.unsqueeze(dim=1))
                return loss, output

            def training_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('train_loss', loss, prog_bar = True, logger=False)
                return loss

            def validation_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('val_loss', loss, prog_bar = True, logger=False)
                return loss

            def test_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('test_loss', loss, prog_bar = True, logger=False)
                return loss

            def configure_optimizers(self):
                return optim.AdamW(self.parameters(), lr = LEARNING)


        #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

        #to omit outputs
        #with io.capture_output() as captured:
        df = Rossmann_df.drop(columns  = 'Date')

        features = list(BoSHAPRF_feat['important'].fillna(0))

        features = [x for x in features if x != 0]

        #return all columns names ('features') except for customers, since it's not an available 
        #information for future points
        features_df = features_dataframe(df,features) 
        #returns dataframe with the features to be analised

        #split into test and train and minmaxscaler
        train_df, test_df, train_size =  train_test_spliter(105,features_df)
        train_df, test_df, scaler = data_scaler(train_df,test_df)
        #make sequences with the data
        train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
        test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

        #trains the model and store the most recent checkpoints removing previous ones if existing
        data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        data_module.setup()
        train_dataset = Dataset(train_sequences)
        test_dataset = Dataset(test_sequences)
        model = SalesPredictor(n_features = train_df.shape[1])



        try:
            os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_SHAPRF.ckpt")
        except:
            pass

        checkpoint_callback = ModelCheckpoint(
            dirpath = f'{folder_path}/Checkpoints',
            filename = f'Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_SHAPRF',
            save_top_k = 1,
            verbose = False ,
            monitor = 'val_loss',
            mode = 'min'
        )
        logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
        early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
        trainer = pl.Trainer(
            logger = logger,
            callbacks=[early_stopping_callback, checkpoint_callback],
            max_epochs = N_EPOCHS,
            gpus = 0,
        )
        trainer.fit(model, data_module)

        #load the best model from checkpoint
        trained_model = SalesPredictor.load_from_checkpoint(
        f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_Boruta_SHAPRF.ckpt',
        n_features = train_df.shape[1]
        )

        predictions = []
        labels = []

        for item in test_dataset:
            sequence = item['sequence']
            label = item['label']

            if len(predictions) > SEQUENCE_LENGTH:
                for j in range(SEQUENCE_LENGTH):
                    sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
            else: 
                for j in range(len(predictions)):
                    sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

            _,output = trained_model(sequence.unsqueeze(dim=0))
            predictions.append(output.item())
            labels.append(label.item())



        descaler = MinMaxScaler()
        descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

        predictions_descaled = descale(descaler,predictions)
        labels_descaled = descale(descaler,labels)

        test_data = df[train_size+1:]
        test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

        dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
        full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

        predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)


        dic= {}
        dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
        pred_df = pd.DataFrame.from_dict(dic)
        pred_df = pred_df.shift(-1)


        dic = {}
        dic[f'store_pred_dates'] = dates
        dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
        truth_df = pd.DataFrame.from_dict(dic)


        if N_HIDDEN != hidden_values[0]:
            predictions_df = predictions_df.join(pred_df, how = 'left')
        else:
            truth_df.reset_index(inplace = True)
            truth_df.drop('index', axis=1)
            predictions_df = truth_df
            predictions_df = predictions_df.join(pred_df, how = 'left')
            predictions_df = predictions_df.iloc[:-1]

        #display(predictions_df)
        
        dic = {}
        dic[f'store_truth'] = Rossmann_df[TGT]
        dic[f'store_truth_dates'] = full_dates
        truth_df = pd.DataFrame.from_dict(dic)

        dic= {}
        dic[f'store_pred'] = predictions_descaled
        dic[f'store_pred_dates'] = dates
        prediction_df = pd.DataFrame.from_dict(dic)
        prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
        plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
        plt.legend()
        plt.show();

        print('mean absolute scaled error: ')
        print(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))
        print('\n','mean squared error: ')
        print(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))
        print('\n','root mean squared error: ')
        print((mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))**(1/2)) 

        iteration_end = time.monotonic()
        print('\n',"Iteration time: ", iteration_end - iteration_start)

        mean_abs_BoSHAPRF.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))

        mean_sqr_BoSHAPRF.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

        time_BoSHAPRF.append(iteration_end - iteration_start)

    predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden_BoSHAPRF.csv')

    %store mean_abs_BoSHAPRF
    %store mean_sqr_BoSHAPRF
    %store time_BoSHAPRF

else:
    %store -r mean_abs_BoSHAPRF
    %store -r mean_sqr_BoSHAPRF
    %store -r time_BoSHAPRF
    
    print(mean_abs_BoSHAPRF)
    print(mean_sqr_BoSHAPRF)
    print(time_BoSHAPRF)

In [None]:
if LIME_train == True:
    folder_path = 'Predictions/grid_search_LIME'

    LIME_inst_th = [0.007,0.008,0.009,0.01]
    inst_LSTMLIME= pd.read_csv('Predictions/Single_Run/Features_LSTMLIME.csv')
    LIME_inst_dic = {}


    try:
        os.mkdir(f'{folder_path}')
    except:
        pass

    for LIME_th in LIME_inst_th:
        mean_abs_LIME_inst = []
        mean_sqr_LIME_inst = []
        time_LIME_inst = []

        for N_HIDDEN in hidden_values:
            torch.manual_seed(8)
            np.random.seed(8)
            pl.seed_everything(8);
            iteration_start = time.monotonic()

            hidden_dic = {}

            class SalesPredictionModel(nn.Module):
                def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                    super().__init__()
                    self.n_hidden = n_hidden

                    self.lstm = nn.LSTM(
                        input_size = n_features,
                        hidden_size = n_hidden,
                        batch_first = True,
                        num_layers = n_layers,
                        dropout = 0.2
                    )
                    self.regressor = nn.Linear(n_hidden,1)

                def forward(self,x):
                    self.lstm.flatten_parameters()

                    _, (hidden, _) = self.lstm(x)
                    out = hidden[-1]

                    return self.regressor(out)


            class SalesPredictor(pl.LightningModule):

                def __init__(self, n_features: int):
                    super().__init__()
                    self.model=SalesPredictionModel(n_features)
                    self.criterion = nn.MSELoss()

                def forward(self, x, labels= None):
                    output = self.model(x)
                    loss = 0
                    if labels is not None:
                        loss = self.criterion(output, labels.unsqueeze(dim=1))
                    return loss, output

                def training_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('train_loss', loss, prog_bar = True, logger=False)
                    return loss

                def validation_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('val_loss', loss, prog_bar = True, logger=False)
                    return loss

                def test_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('test_loss', loss, prog_bar = True, logger=False)
                    return loss

                def configure_optimizers(self):
                    return optim.AdamW(self.parameters(), lr = LEARNING)


            #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

            #to omit outputs
            #with io.capture_output() as captured:
            df = Rossmann_df.drop(columns  = 'Date')
            features = list(inst_LSTMLIME['features'].loc[inst_LSTMLIME['LIME_value'] > LIME_th])

            #return all columns names ('features') except for customers, since it's not an available 
            #information for future points
            features_df = features_dataframe(df,features) 
            #returns dataframe with the features to be analised

            #split into test and train and minmaxscaler
            train_df, test_df, train_size =  train_test_spliter(105,features_df)
            train_df, test_df, scaler = data_scaler(train_df,test_df)
            #make sequences with the data
            train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
            test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

            #trains the model and store the most recent checkpoints removing previous ones if existing
            data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
            data_module.setup()
            train_dataset = Dataset(train_sequences)
            test_dataset = Dataset(test_sequences)
            model = SalesPredictor(n_features = train_df.shape[1])



            try:
                os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_LIME{LIME_th}.ckpt")
            except:
                pass

            checkpoint_callback = ModelCheckpoint(
                dirpath = f'{folder_path}/Checkpoints',
                filename = f'Rossmann_LSTM_hidden{N_HIDDEN}_LIME{LIME_th}',
                save_top_k = 1,
                verbose = False ,
                monitor = 'val_loss',
                mode = 'min'
            )
            logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
            early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
            trainer = pl.Trainer(
                logger = logger,
                callbacks=[early_stopping_callback, checkpoint_callback],
                max_epochs = N_EPOCHS,
                gpus = 0,
            )
            trainer.fit(model, data_module)

            #load the best model from checkpoint
            trained_model = SalesPredictor.load_from_checkpoint(
            f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_LIME{LIME_th}.ckpt',
            n_features = train_df.shape[1]
            )

            predictions = []
            labels = []

            for item in test_dataset:
                sequence = item['sequence']
                label = item['label']

                if len(predictions) > SEQUENCE_LENGTH:
                    for j in range(SEQUENCE_LENGTH):
                        sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
                else: 
                    for j in range(len(predictions)):
                        sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

                _,output = trained_model(sequence.unsqueeze(dim=0))
                predictions.append(output.item())
                labels.append(label.item())



            descaler = MinMaxScaler()
            descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

            predictions_descaled = descale(descaler,predictions)
            labels_descaled = descale(descaler,labels)

            test_data = df[train_size+1:]
            test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

            dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
            full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

            dic= {}
            dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
            pred_df = pd.DataFrame.from_dict(dic)
            pred_df = pred_df.shift(-1)


            dic = {}
            dic[f'store_pred_dates'] = dates
            dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
            truth_df = pd.DataFrame.from_dict(dic)

            if N_HIDDEN != hidden_values[0]:
                predictions_df = predictions_df.join(pred_df, how = 'left')
            else:
                truth_df.reset_index(inplace = True)
                truth_df.drop('index', axis=1)
                predictions_df = truth_df
                predictions_df = predictions_df.join(pred_df, how = 'left')
                predictions_df = predictions_df.iloc[:-1]

            display(predictions_df)

            dic = {}
            dic[f'store_truth'] = Rossmann_df[TGT]
            dic[f'store_truth_dates'] = full_dates
            truth_df = pd.DataFrame.from_dict(dic)

            dic= {}
            dic[f'store_pred'] = predictions_descaled
            dic[f'store_pred_dates'] = dates
            prediction_df = pd.DataFrame.from_dict(dic)
            prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)

            plt.figure(figsize=(21, 7))
            plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
            plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
            plt.legend()
            plt.show();

            iteration_end = time.monotonic()

            predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)

            mean_abs_LIME_inst.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                             predictions_df[f'store_pred_{N_HIDDEN}'],
                                             truth_df.iloc[:-len(prediction_df),0]))

            mean_sqr_LIME_inst.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                             predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

            time_LIME_inst.append(iteration_end - iteration_start)

            hidden_dic['mase'] = mean_abs_LIME_inst
            hidden_dic['mse']  = mean_sqr_LIME_inst
            hidden_dic['time'] = time_LIME_inst
            
            print(LIME_th)
            print(N_HIDDEN)
            print(hidden_dic)
        LIME_inst_dic[LIME_th] = hidden_dic
        
    #predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_LIME.csv')
    
    %store LIME_inst_dic

    """

            dic = {}
            dic[f'store_truth'] = Rossmann_df[TGT]
            dic[f'store_truth_dates'] = full_dates
            truth_df = pd.DataFrame.from_dict(dic)

            dic= {}
            dic[f'store_pred'] = predictions_descaled
            dic[f'store_pred_dates'] = dates
            prediction_df = pd.DataFrame.from_dict(dic)
            prediction_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden{N_HIDDEN}.csv')

            plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
            plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
            plt.legend()
            plt.show();
    """

In [None]:
if SHAP_insta == True:
    folder_path = 'Predictions/grid_search_SHAP_inst'

    SHAP_inst_th = [0.05,0.1,0.15,0.2]
    inst_LSTMSHAP= pd.read_csv('Predictions/Single_Run/Features_inst_LSTMSHAP.csv')
    SHAP_inst_dic = {}


    try:
        os.mkdir(f'{folder_path}')
    except:
        pass

    for SHAP_th in SHAP_inst_th:
        mean_abs_SHAP_inst = []
        mean_sqr_SHAP_inst = []
        time_SHAP_inst = []

        for N_HIDDEN in hidden_values:
            torch.manual_seed(8)
            np.random.seed(8)
            pl.seed_everything(8);
            iteration_start = time.monotonic()

            hidden_dic = {}

            class SalesPredictionModel(nn.Module):
                def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                    super().__init__()
                    self.n_hidden = n_hidden

                    self.lstm = nn.LSTM(
                        input_size = n_features,
                        hidden_size = n_hidden,
                        batch_first = True,
                        num_layers = n_layers,
                        dropout = 0.2
                    )
                    self.regressor = nn.Linear(n_hidden,1)

                def forward(self,x):
                    self.lstm.flatten_parameters()

                    _, (hidden, _) = self.lstm(x)
                    out = hidden[-1]

                    return self.regressor(out)


            class SalesPredictor(pl.LightningModule):

                def __init__(self, n_features: int):
                    super().__init__()
                    self.model=SalesPredictionModel(n_features)
                    self.criterion = nn.MSELoss()

                def forward(self, x, labels= None):
                    output = self.model(x)
                    loss = 0
                    if labels is not None:
                        loss = self.criterion(output, labels.unsqueeze(dim=1))
                    return loss, output

                def training_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('train_loss', loss, prog_bar = True, logger=False)
                    return loss

                def validation_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('val_loss', loss, prog_bar = True, logger=False)
                    return loss

                def test_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('test_loss', loss, prog_bar = True, logger=False)
                    return loss

                def configure_optimizers(self):
                    return optim.AdamW(self.parameters(), lr = LEARNING)


            #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

            #to omit outputs
            #with io.capture_output() as captured:
            df = Rossmann_df.drop(columns  = 'Date')
            features = list(inst_LSTMSHAP['feature_name'].loc[inst_LSTMSHAP['shap_value'] > SHAP_th])

            #return all columns names ('features') except for customers, since it's not an available 
            #information for future points
            features_df = features_dataframe(df,features) 
            #returns dataframe with the features to be analised

            #split into test and train and minmaxscaler
            train_df, test_df, train_size =  train_test_spliter(105,features_df)
            train_df, test_df, scaler = data_scaler(train_df,test_df)
            #make sequences with the data
            train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
            test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

            #trains the model and store the most recent checkpoints removing previous ones if existing
            data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
            data_module.setup()
            train_dataset = Dataset(train_sequences)
            test_dataset = Dataset(test_sequences)
            model = SalesPredictor(n_features = train_df.shape[1])



            try:
                os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_SHAPins{SHAP_th}.ckpt")
            except:
                pass

            checkpoint_callback = ModelCheckpoint(
                dirpath = f'{folder_path}/Checkpoints',
                filename = f'Rossmann_LSTM_hidden{N_HIDDEN}_SHAPins{SHAP_th}',
                save_top_k = 1,
                verbose = False ,
                monitor = 'val_loss',
                mode = 'min'
            )
            logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
            early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
            trainer = pl.Trainer(
                logger = logger,
                callbacks=[early_stopping_callback, checkpoint_callback],
                max_epochs = N_EPOCHS,
                gpus = 0,
            )
            trainer.fit(model, data_module)

            #load the best model from checkpoint
            trained_model = SalesPredictor.load_from_checkpoint(
            f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_SHAPins{SHAP_th}.ckpt',
            n_features = train_df.shape[1]
            )

            predictions = []
            labels = []

            for item in test_dataset:
                sequence = item['sequence']
                label = item['label']

                if len(predictions) > SEQUENCE_LENGTH:
                    for j in range(SEQUENCE_LENGTH):
                        sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
                else: 
                    for j in range(len(predictions)):
                        sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

                _,output = trained_model(sequence.unsqueeze(dim=0))
                predictions.append(output.item())
                labels.append(label.item())



            descaler = MinMaxScaler()
            descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

            predictions_descaled = descale(descaler,predictions)
            labels_descaled = descale(descaler,labels)

            test_data = df[train_size+1:]
            test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

            dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
            full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

            dic= {}
            dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
            pred_df = pd.DataFrame.from_dict(dic)
            pred_df = pred_df.shift(-1)


            dic = {}
            dic[f'store_pred_dates'] = dates
            dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
            truth_df = pd.DataFrame.from_dict(dic)

            if N_HIDDEN != hidden_values[0]:
                predictions_df = predictions_df.join(pred_df, how = 'left')
            else:
                truth_df.reset_index(inplace = True)
                truth_df.drop('index', axis=1)
                predictions_df = truth_df
                predictions_df = predictions_df.join(pred_df, how = 'left')
                predictions_df = predictions_df.iloc[:-1]

            display(predictions_df)

            dic = {}
            dic[f'store_truth'] = Rossmann_df[TGT]
            dic[f'store_truth_dates'] = full_dates
            truth_df = pd.DataFrame.from_dict(dic)

            dic= {}
            dic[f'store_pred'] = predictions_descaled
            dic[f'store_pred_dates'] = dates
            prediction_df = pd.DataFrame.from_dict(dic)
            prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)

            plt.figure(figsize=(21, 7))
            plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
            plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
            plt.legend()
            plt.show();

            iteration_end = time.monotonic()

            predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)

            mean_abs_SHAP_inst.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                             predictions_df[f'store_pred_{N_HIDDEN}'],
                                             truth_df.iloc[:-len(prediction_df),0]))

            mean_sqr_SHAP_inst.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                             predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

            time_SHAP_inst.append(iteration_end - iteration_start)

            hidden_dic['mase'] = mean_abs_SHAP_inst
            hidden_dic['mse']  = mean_sqr_SHAP_inst
            hidden_dic['time'] = time_SHAP_inst

            print(SHAP_th)
            print(N_HIDDEN)
            print(hidden_dic)

        #predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM__SHAPins{SHAP_th}.csv')
        SHAP_inst_dic[SHAP_th] = hidden_dic
    %store SHAP_inst_dic

    """

            dic = {}
            dic[f'store_truth'] = Rossmann_df[TGT]
            dic[f'store_truth_dates'] = full_dates
            truth_df = pd.DataFrame.from_dict(dic)

            dic= {}
            dic[f'store_pred'] = predictions_descaled
            dic[f'store_pred_dates'] = dates
            prediction_df = pd.DataFrame.from_dict(dic)
            prediction_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden{N_HIDDEN}.csv')

            plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
            plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
            plt.legend()
            plt.show();
    """

In [None]:
if SHAP_avrag == True:
    folder_path = 'Predictions/grid_search_SHAP'

    SHAP_avg_th = [0.01,0.02,0.03,0.04]
    avg_LSTMSHAP= pd.read_csv('Predictions/Single_Run/Features_avg_LSTMSHAP.csv')
    SHAP_avg_dic = {}


    try:
        os.mkdir(f'{folder_path}')
    except:
        pass

    for SHAP_th in SHAP_avg_th:
        mean_abs_SHAP_avg = []
        mean_sqr_SHAP_avg = []
        time_SHAP_avg = []

        for N_HIDDEN in hidden_values:
            torch.manual_seed(8)
            np.random.seed(8)
            pl.seed_everything(8);
            iteration_start = time.monotonic()

            hidden_dic = {}

            class SalesPredictionModel(nn.Module):
                def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                    super().__init__()
                    self.n_hidden = n_hidden

                    self.lstm = nn.LSTM(
                        input_size = n_features,
                        hidden_size = n_hidden,
                        batch_first = True,
                        num_layers = n_layers,
                        dropout = 0.2
                    )
                    self.regressor = nn.Linear(n_hidden,1)

                def forward(self,x):
                    self.lstm.flatten_parameters()

                    _, (hidden, _) = self.lstm(x)
                    out = hidden[-1]

                    return self.regressor(out)


            class SalesPredictor(pl.LightningModule):

                def __init__(self, n_features: int):
                    super().__init__()
                    self.model=SalesPredictionModel(n_features)
                    self.criterion = nn.MSELoss()

                def forward(self, x, labels= None):
                    output = self.model(x)
                    loss = 0
                    if labels is not None:
                        loss = self.criterion(output, labels.unsqueeze(dim=1))
                    return loss, output

                def training_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('train_loss', loss, prog_bar = True, logger=False)
                    return loss

                def validation_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('val_loss', loss, prog_bar = True, logger=False)
                    return loss

                def test_step(self, batch, batch_index):
                    sequences = batch['sequence']
                    labels = batch['label']

                    loss, outputs = self(sequences, labels)
                    self.log('test_loss', loss, prog_bar = True, logger=False)
                    return loss

                def configure_optimizers(self):
                    return optim.AdamW(self.parameters(), lr = LEARNING)


            #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

            #to omit outputs
            #with io.capture_output() as captured:
            df = Rossmann_df.drop(columns  = 'Date')
            features = list(avg_LSTMSHAP['feature_name'].loc[avg_LSTMSHAP['shap_value'] > SHAP_th])

            #return all columns names ('features') except for customers, since it's not an available 
            #information for future points
            features_df = features_dataframe(df,features) 
            #returns dataframe with the features to be analised

            #split into test and train and minmaxscaler
            train_df, test_df, train_size =  train_test_spliter(105,features_df)
            train_df, test_df, scaler = data_scaler(train_df,test_df)
            #make sequences with the data
            train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
            test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

            #trains the model and store the most recent checkpoints removing previous ones if existing
            data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
            data_module.setup()
            train_dataset = Dataset(train_sequences)
            test_dataset = Dataset(test_sequences)
            model = SalesPredictor(n_features = train_df.shape[1])



            try:
                os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_SHAPavg{SHAP_th}.ckpt")
            except:
                pass

            checkpoint_callback = ModelCheckpoint(
                dirpath = f'{folder_path}/Checkpoints',
                filename = f'Rossmann_LSTM_hidden{N_HIDDEN}_SHAPavg{SHAP_th}',
                save_top_k = 1,
                verbose = False ,
                monitor = 'val_loss',
                mode = 'min'
            )
            logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
            early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
            trainer = pl.Trainer(
                logger = logger,
                callbacks=[early_stopping_callback, checkpoint_callback],
                max_epochs = N_EPOCHS,
                gpus = 0,
            )
            trainer.fit(model, data_module)

            #load the best model from checkpoint
            trained_model = SalesPredictor.load_from_checkpoint(
            f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}_SHAPavg{SHAP_th}.ckpt',
            n_features = train_df.shape[1]
            )

            predictions = []
            labels = []

            for item in test_dataset:
                sequence = item['sequence']
                label = item['label']

                if len(predictions) > SEQUENCE_LENGTH:
                    for j in range(SEQUENCE_LENGTH):
                        sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
                else: 
                    for j in range(len(predictions)):
                        sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

                _,output = trained_model(sequence.unsqueeze(dim=0))
                predictions.append(output.item())
                labels.append(label.item())



            descaler = MinMaxScaler()
            descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

            predictions_descaled = descale(descaler,predictions)
            labels_descaled = descale(descaler,labels)

            test_data = df[train_size+1:]
            test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

            dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
            full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

            dic= {}
            dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
            pred_df = pd.DataFrame.from_dict(dic)
            pred_df = pred_df.shift(-1)


            dic = {}
            dic[f'store_pred_dates'] = dates
            dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
            truth_df = pd.DataFrame.from_dict(dic)

            if N_HIDDEN != hidden_values[0]:
                predictions_df = predictions_df.join(pred_df, how = 'left')
            else:
                truth_df.reset_index(inplace = True)
                truth_df.drop('index', axis=1)
                predictions_df = truth_df
                predictions_df = predictions_df.join(pred_df, how = 'left')
                predictions_df = predictions_df.iloc[:-1]

            display(predictions_df)

            dic = {}
            dic[f'store_truth'] = Rossmann_df[TGT]
            dic[f'store_truth_dates'] = full_dates
            truth_df = pd.DataFrame.from_dict(dic)

            dic= {}
            dic[f'store_pred'] = predictions_descaled
            dic[f'store_pred_dates'] = dates
            prediction_df = pd.DataFrame.from_dict(dic)
            prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)

            plt.figure(figsize=(21, 7))
            plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
            plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
            plt.legend()
            plt.show();

            iteration_end = time.monotonic()

            predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)

            mean_abs_SHAP_avg.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                             predictions_df[f'store_pred_{N_HIDDEN}'],
                                             truth_df.iloc[:-len(prediction_df),0]))

            mean_sqr_SHAP_avg.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                             predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

            time_SHAP_avg.append(iteration_end - iteration_start)

            hidden_dic['mase'] = mean_abs_SHAP_avg
            hidden_dic['mse']  = mean_sqr_SHAP_avg
            hidden_dic['time'] = time_SHAP_avg

            print(SHAP_th)
            print(N_HIDDEN)
            print(hidden_dic)

        #predictions_df.to_csv(f'{folder_path}/Rossmann_LSTM_SHAPavg{SHAP_th}.csv')
        SHAP_avg_dic[SHAP_th] = hidden_dic
    %store SHAP_avg_dic

    """

            dic = {}
            dic[f'store_truth'] = Rossmann_df[TGT]
            dic[f'store_truth_dates'] = full_dates
            truth_df = pd.DataFrame.from_dict(dic)

            dic= {}
            dic[f'store_pred'] = predictions_descaled
            dic[f'store_pred_dates'] = dates
            prediction_df = pd.DataFrame.from_dict(dic)
            prediction_df.to_csv(f'{folder_path}/Rossmann_LSTM_hidden{N_HIDDEN}.csv')

            plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
            plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
            plt.legend()
            plt.show();
    """

In [None]:
IMV_Full = True
if IMV_Full == True:

    IMV_Full_feats=pd.read_csv(f"Predictions/Single_Run/Features_IMV_Full.csv")
    
    folder_path = 'Predictions/grid_search_IMVF'
    
    features = list(IMV_Full_feats['features'].loc[IMV_Full_feats['Importance'] > 0.06])
    
    try:
        os.mkdir(f'{folder_path}')
    except:
        pass
    
    mean_abs_IMV_Full = []
    mean_sqr_IMV_Full = []
    time_IMV_Full = []
    
    for N_HIDDEN in hidden_values:
        torch.manual_seed(8)
        np.random.seed(8)
        pl.seed_everything(8);
        iteration_start = time.monotonic()

        class SalesPredictionModel(nn.Module):
            def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                super().__init__()
                self.n_hidden = n_hidden

                self.lstm = nn.LSTM(
                    input_size = n_features,
                    hidden_size = n_hidden,
                    batch_first = True,
                    num_layers = n_layers,
                    dropout = 0.2
                )
                self.regressor = nn.Linear(n_hidden,1)

            def forward(self,x):
                self.lstm.flatten_parameters()

                _, (hidden, _) = self.lstm(x)
                out = hidden[-1]

                return self.regressor(out)


        class SalesPredictor(pl.LightningModule):

            def __init__(self, n_features: int):
                super().__init__()
                self.model=SalesPredictionModel(n_features)
                self.criterion = nn.MSELoss()

            def forward(self, x, labels= None):
                output = self.model(x)
                loss = 0
                if labels is not None:
                    loss = self.criterion(output, labels.unsqueeze(dim=1))
                return loss, output

            def training_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('train_loss', loss, prog_bar = True, logger=False)
                return loss

            def validation_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('val_loss', loss, prog_bar = True, logger=False)
                return loss

            def test_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('test_loss', loss, prog_bar = True, logger=False)
                return loss

            def configure_optimizers(self):
                return optim.AdamW(self.parameters(), lr = LEARNING)


        #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

        #to omit outputs
        #with io.capture_output() as captured:
        df = Rossmann_df.drop(columns  = 'Date')
        
        #return all columns names ('features') except for customers, since it's not an available 
        #information for future points
        features_df = features_dataframe(df,features) 
        #returns dataframe with the features to be analised

        #split into test and train and minmaxscaler
        train_df, test_df, train_size =  train_test_spliter(105,features_df)
        train_df, test_df, scaler = data_scaler(train_df,test_df)
        #make sequences with the data
        train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
        test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

        #trains the model and store the most recent checkpoints removing previous ones if existing
        data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        data_module.setup()
        train_dataset = Dataset(train_sequences)
        test_dataset = Dataset(test_sequences)
        model = SalesPredictor(n_features = train_df.shape[1])



        try:
            os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}.ckpt")
        except:
            pass

        checkpoint_callback = ModelCheckpoint(
            dirpath = f'{folder_path}/Checkpoints',
            filename = f'Rossmann_LSTM_hidden{N_HIDDEN}',
            save_top_k = 1,
            verbose = False ,
            monitor = 'val_loss',
            mode = 'min'
        )
        logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
        early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
        trainer = pl.Trainer(
            logger = logger,
            callbacks=[early_stopping_callback, checkpoint_callback],
            max_epochs = N_EPOCHS,
            gpus = 0,
        )
        trainer.fit(model, data_module)

        #load the best model from checkpoint
        trained_model = SalesPredictor.load_from_checkpoint(
        f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}.ckpt',
        n_features = train_df.shape[1]
        )

        predictions = []
        labels = []

        for item in test_dataset:
            sequence = item['sequence']
            label = item['label']

            if len(predictions) > SEQUENCE_LENGTH:
                for j in range(SEQUENCE_LENGTH):
                    sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
            else: 
                for j in range(len(predictions)):
                    sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

            _,output = trained_model(sequence.unsqueeze(dim=0))
            predictions.append(output.item())
            labels.append(label.item())



        descaler = MinMaxScaler()
        descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

        predictions_descaled = descale(descaler,predictions)
        labels_descaled = descale(descaler,labels)

        test_data = df[train_size+1:]
        test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

        dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
        full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

        predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)
        
        dic= {}
        dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
        pred_df = pd.DataFrame.from_dict(dic)
        pred_df = pred_df.shift(-1)


        dic = {}
        dic[f'store_pred_dates'] = dates
        dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
        truth_df = pd.DataFrame.from_dict(dic)


        if N_HIDDEN != hidden_values[0]:
            predictions_df = predictions_df.join(pred_df, how = 'left')
        else:
            truth_df.reset_index(inplace = True)
            truth_df.drop('index', axis=1)
            predictions_df = truth_df
            predictions_df = predictions_df.join(pred_df, how = 'left')
            predictions_df = predictions_df.iloc[:-1]

        #display(predictions_df)
        
        dic = {}
        dic[f'store_truth'] = Rossmann_df[TGT]
        dic[f'store_truth_dates'] = full_dates
        truth_df = pd.DataFrame.from_dict(dic)

        dic= {}
        dic[f'store_pred'] = predictions_descaled
        dic[f'store_pred_dates'] = dates
        prediction_df = pd.DataFrame.from_dict(dic)
        prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
        plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
        plt.legend()
        plt.show();
    
    
    
        print('mean absolute scaled error: ')
        print(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))
        print('\n','mean squared error: ')
        print(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))
        print('\n','root mean squared error: ')
        print((mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))**(1/2)) 

        iteration_end = time.monotonic()
        print('\n',"Iteration time: ", iteration_end - iteration_start)

        mean_abs_IMV_Full.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))

        mean_sqr_IMV_Full.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

        time_IMV_Full.append(iteration_end - iteration_start)
        

In [None]:
IMV_Tensor = True
if IMV_Tensor == True:

    IMV_Tensor_feats=pd.read_csv(f"Predictions/Single_Run/Features_IMV_Tensor.csv")
    
    folder_path = 'Predictions/grid_search_IMVF'
    
    features = list(IMV_Tensor_feats['features'].loc[IMV_Tensor_feats['Importance'] > 0.045])
    
    try:
        os.mkdir(f'{folder_path}')
    except:
        pass
    
    mean_abs_IMV_Tensor = []
    mean_sqr_IMV_Tensor = []
    time_IMV_Tensor = []
    
    for N_HIDDEN in hidden_values:
        torch.manual_seed(8)
        np.random.seed(8)
        pl.seed_everything(8);
        iteration_start = time.monotonic()

        class SalesPredictionModel(nn.Module):
            def __init__(self, n_features, n_hidden = N_HIDDEN, n_layers = N_LAYERS):
                super().__init__()
                self.n_hidden = n_hidden

                self.lstm = nn.LSTM(
                    input_size = n_features,
                    hidden_size = n_hidden,
                    batch_first = True,
                    num_layers = n_layers,
                    dropout = 0.2
                )
                self.regressor = nn.Linear(n_hidden,1)

            def forward(self,x):
                self.lstm.flatten_parameters()

                _, (hidden, _) = self.lstm(x)
                out = hidden[-1]

                return self.regressor(out)


        class SalesPredictor(pl.LightningModule):

            def __init__(self, n_features: int):
                super().__init__()
                self.model=SalesPredictionModel(n_features)
                self.criterion = nn.MSELoss()

            def forward(self, x, labels= None):
                output = self.model(x)
                loss = 0
                if labels is not None:
                    loss = self.criterion(output, labels.unsqueeze(dim=1))
                return loss, output

            def training_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('train_loss', loss, prog_bar = True, logger=False)
                return loss

            def validation_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('val_loss', loss, prog_bar = True, logger=False)
                return loss

            def test_step(self, batch, batch_index):
                sequences = batch['sequence']
                labels = batch['label']

                loss, outputs = self(sequences, labels)
                self.log('test_loss', loss, prog_bar = True, logger=False)
                return loss

            def configure_optimizers(self):
                return optim.AdamW(self.parameters(), lr = LEARNING)


        #makes SHAP calculations for all stores inside the rossmann_treated dataset if removed the [:1]

        #to omit outputs
        #with io.capture_output() as captured:
        df = Rossmann_df.drop(columns  = 'Date')
        
        #return all columns names ('features') except for customers, since it's not an available 
        #information for future points
        features_df = features_dataframe(df,features) 
        #returns dataframe with the features to be analised

        #split into test and train and minmaxscaler
        train_df, test_df, train_size =  train_test_spliter(105,features_df)
        train_df, test_df, scaler = data_scaler(train_df,test_df)
        #make sequences with the data
        train_sequences = create_sequences(train_df,TGT,SEQUENCE_LENGTH)
        test_sequences = create_sequences (test_df,TGT,SEQUENCE_LENGTH)

        #trains the model and store the most recent checkpoints removing previous ones if existing
        data_module = SalesDataModule(train_sequences, test_sequences, batch_size = BATCH_SIZE)
        data_module.setup()
        train_dataset = Dataset(train_sequences)
        test_dataset = Dataset(test_sequences)
        model = SalesPredictor(n_features = train_df.shape[1])



        try:
            os.remove(f"{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}.ckpt")
        except:
            pass

        checkpoint_callback = ModelCheckpoint(
            dirpath = f'{folder_path}/Checkpoints',
            filename = f'Rossmann_LSTM_hidden{N_HIDDEN}',
            save_top_k = 1,
            verbose = False ,
            monitor = 'val_loss',
            mode = 'min'
        )
        logger = TensorBoardLogger('lightning_logs', name = 'btc-price')
        early_stopping_callback = EarlyStopping(monitor= 'val_loss', patience = PATIENCE)
        trainer = pl.Trainer(
            logger = logger,
            callbacks=[early_stopping_callback, checkpoint_callback],
            max_epochs = N_EPOCHS,
            gpus = 0,
        )
        trainer.fit(model, data_module)

        #load the best model from checkpoint
        trained_model = SalesPredictor.load_from_checkpoint(
        f'{folder_path}/Checkpoints/Rossmann_LSTM_hidden{N_HIDDEN}.ckpt',
        n_features = train_df.shape[1]
        )

        predictions = []
        labels = []

        for item in test_dataset:
            sequence = item['sequence']
            label = item['label']

            if len(predictions) > SEQUENCE_LENGTH:
                for j in range(SEQUENCE_LENGTH):
                    sequence[-SEQUENCE_LENGTH+j,0] = float(predictions[-SEQUENCE_LENGTH+j])
            else: 
                for j in range(len(predictions)):
                    sequence[-len(predictions)+j,0] = float(predictions[-len(predictions)+j])

            _,output = trained_model(sequence.unsqueeze(dim=0))
            predictions.append(output.item())
            labels.append(label.item())



        descaler = MinMaxScaler()
        descaler.min_, descaler.scale_ = scaler.min_[0], scaler.scale_[0]

        predictions_descaled = descale(descaler,predictions)
        labels_descaled = descale(descaler,labels)

        test_data = df[train_size+1:]
        test_sequences_data = test_data.iloc[SEQUENCE_LENGTH:]

        dates = matplotlib.dates.date2num(Rossmann_df.iloc[-len(predictions_descaled):].Date.tolist())
        full_dates = matplotlib.dates.date2num(Rossmann_df.Date.tolist())

        predictions_descaled = np.where(predictions_descaled<0, 0, predictions_descaled)
        
        dic= {}
        dic[f'store_pred_{N_HIDDEN}'] = predictions_descaled
        pred_df = pd.DataFrame.from_dict(dic)
        pred_df = pred_df.shift(-1)


        dic = {}
        dic[f'store_pred_dates'] = dates
        dic[f'store_truth'] = Rossmann_df[TGT].iloc[-len(pred_df):]
        truth_df = pd.DataFrame.from_dict(dic)


        if N_HIDDEN != hidden_values[0]:
            predictions_df = predictions_df.join(pred_df, how = 'left')
        else:
            truth_df.reset_index(inplace = True)
            truth_df.drop('index', axis=1)
            predictions_df = truth_df
            predictions_df = predictions_df.join(pred_df, how = 'left')
            predictions_df = predictions_df.iloc[:-1]

        #display(predictions_df)
        
        dic = {}
        dic[f'store_truth'] = Rossmann_df[TGT]
        dic[f'store_truth_dates'] = full_dates
        truth_df = pd.DataFrame.from_dict(dic)

        dic= {}
        dic[f'store_pred'] = predictions_descaled
        dic[f'store_pred_dates'] = dates
        prediction_df = pd.DataFrame.from_dict(dic)
        prediction_df['store_pred'] =prediction_df['store_pred'].shift(-1)
        
        plt.figure(figsize=(21, 7))
        plt.plot_date(truth_df.iloc[-3*len(prediction_df):,1],truth_df.iloc[-3*len(prediction_df):,0],'-', label='Truth')
        plt.plot_date(prediction_df.iloc[:,1],prediction_df.iloc[:,0],'-',label ='Prediction')
        plt.legend()
        plt.show();
        
        print('mean absolute scaled error: ')
        print(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))
        print('\n','mean squared error: ')
        print(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))
        print('\n','root mean squared error: ')
        print((mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}']))**(1/2)) 

        iteration_end = time.monotonic()
        print('\n',"Iteration time: ", iteration_end - iteration_start)

        mean_abs_IMV_Tensor.append(mean_absolute_scaled_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'],
                                         truth_df.iloc[:-len(prediction_df),0]))

        mean_sqr_IMV_Tensor.append(mean_squared_error(predictions_df['store_truth'].iloc[-(len(prediction_df)+1):],
                                         predictions_df[f'store_pred_{N_HIDDEN}'])**(1/2))

        time_IMV_Tensor.append(iteration_end - iteration_start)

        