In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
from itertools import product

from tqdm import tqdm_notebook

import time as time

import random
import math
import os

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from pylab import rcParams

import seaborn as sns

from sklearn import metrics
from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error
from sklearn.metrics import median_absolute_error, mean_squared_error, mean_squared_log_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import KFold

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, Subset, TensorDataset

from utils import (
    load_and_partition_data,
    split_sequence
)

import utils

import warnings
warnings.filterwarnings("ignore")

# Data Preprocessing

In [2]:
train = pd.read_csv("sales_train_validation.csv")
sell_prices = pd.read_csv("sell_prices.csv")
calendar = pd.read_csv("calendar.csv")
# validation contains 28 more dates
validation = pd.read_csv("sales_train_evaluation.csv")

In [3]:
d_cols = [c for c in train.columns if 'd_' in c]
dates = calendar[calendar.d.isin(d_cols)]['date']
dates_list = [datetime.strptime(date, '%Y-%m-%d').date() for date in dates]
df_sales = train[d_cols].T
df_sales.columns = train['id'].values
df_sales = pd.DataFrame(df_sales).set_index([dates_list])
df_sales.index = pd.to_datetime(df_sales.index)
df_sales.columns = [i for i in range(len(df_sales.columns))]

d_cols = [c for c in validation.columns if 'd_' in c]
dates = calendar[calendar.d.isin(d_cols)]['date']
dates_list = [datetime.strptime(date, '%Y-%m-%d').date() for date in dates]
df_validation = validation[d_cols].T
df_validation.columns = validation['id'].values
df_validation = pd.DataFrame(df_validation).set_index([dates_list])
df_validation.index = pd.to_datetime(df_validation.index)
df_validation.columns = [i for i in range(len(df_validation.columns))]

In [4]:
SEED = 1345
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(SEED)

# Running Transformer Model

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [6]:
class PositionalEncoding(torch.nn.Module):
    def __init__(self, d_model: int, dropout: float = 0.1, max_len: int = 5000) -> None:
        super().__init__()
        self.dropout = torch.nn.Dropout(p=dropout)
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(
            torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model)
        )
        pe = torch.zeros(1, max_len, d_model)
        pe[0, :, 0::2] = torch.sin(position * div_term)
        pe[0, :, 1::2] = torch.cos(position * div_term)
        self.register_buffer("pe", pe)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = x + self.pe[:, : x.size(1)]
        return self.dropout(x)


class TransformerWithPE(torch.nn.Module):
    def __init__(
        self, in_dim: int, out_dim: int, embed_dim: int, num_heads: int, num_layers: int
    ) -> None:
        super().__init__()
        self.positional_encoding = PositionalEncoding(embed_dim)
        self.encoder_embedding = torch.nn.Linear(
            in_features=in_dim, out_features=embed_dim
        )
        self.decoder_embedding = torch.nn.Linear(
            in_features=out_dim, out_features=embed_dim
        )
        self.output_layer = torch.nn.Linear(in_features=embed_dim, out_features=out_dim)
        self.transformer = torch.nn.Transformer(
            nhead=num_heads,
            num_encoder_layers=num_layers,
            num_decoder_layers=num_layers,
            d_model=embed_dim,
            batch_first=True,
        )

    def forward(self, src: torch.Tensor, tgt: torch.Tensor) -> torch.Tensor:
        # if self.train:
        # Add noise to decoder inputs during training
        # tgt = tgt + torch.normal(0, 0.1, size=tgt.shape).to(tgt.device)

        # Embed encoder input and add positional encoding.
        # [bs, src_seq_len, embed_dim]
        src = self.encoder_embedding(src)
        src = self.positional_encoding(src)

        # Generate mask to avoid attention to future outputs.
        # [tgt_seq_len, tgt_seq_len]
        tgt_mask = torch.nn.Transformer.generate_square_subsequent_mask(tgt.shape[1])
        # Embed decoder input and add positional encoding.
        # [bs, tgt_seq_len, embed_dim]
        tgt = self.decoder_embedding(tgt)
        tgt = self.positional_encoding(tgt)

        # Get prediction from transformer and map to output dimension.
        # [bs, tgt_seq_len, embed_dim]
        pred = self.transformer(src, tgt, tgt_mask=tgt_mask.to(device))
        pred = self.output_layer(pred)

        return pred

    def infer(self, src: torch.Tensor, tgt_len: int) -> torch.Tensor:
        output = torch.zeros((src.shape[0], tgt_len + 1, src.shape[2])).to(src.device)
        output[:, 0] = src[:, -1]
        for i in range(tgt_len):
            output[:, i + 1] = self.forward(src, output)[:, i]

        return output[:, 1:]

In [7]:
optimal_batch_size = 32
optimal_d_model = 64
optimal_num_head = 4
optimal_num_layer = 2
optimal_sequence_length = 28

## Batch 1

In [8]:
results1 = pd.read_csv("Baseline_LSTM_Results_1.csv")
results1 = results1.iloc[:, 1:]

In [9]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm", "r2_train_lstm", "rmse_test_lstm", "r2_test_lstm", "Time Taken"])
prediction_df = pd.DataFrame()
prediction_df.index = df_validation.index[optimal_sequence_length:]
results = results1

for string_i in results.columns: 
    
    i = int(string_i)
    start_time = time.time()

    data = np.array(df_sales.iloc[:, i]).reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data_normalized = scaler.fit_transform(data)
    sequences = load_and_partition_data(train_data_normalized, optimal_sequence_length+1)

    train_data = torch.tensor(np.array(sequences[:-28]), dtype=torch.float)
    val_data = torch.tensor(np.array(sequences[-28:]), dtype=torch.float)
    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_features = len(df_sales[[i]].columns)
    D_MODEL = optimal_d_model  
    NUM_HEADS = optimal_num_head 
    NUM_LAYERS = optimal_num_layer 
    NUM_EPOCHS = 200
    LR = 0.001
    
    #####Init the Model #######################
    model = TransformerWithPE(num_features, num_features, D_MODEL, NUM_HEADS, NUM_LAYERS)
    model.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(model.parameters(), lr=LR,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(NUM_EPOCHS): 
        train_loss, val_loss = 0, 0
        model.train()
        for batch in train_loader:
            src, tgt, tgt_y = split_sequence(batch, optimal_sequence_length/(optimal_sequence_length+1))
            pred = model(src.to(device), tgt.to(device))
            loss = criterion(pred, tgt_y.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        model.eval()
        with torch.no_grad():
            for batch in val_loader:
                src, tgt, tgt_y = split_sequence(batch, optimal_sequence_length/(optimal_sequence_length+1))
                val = model(src.to(device), tgt.to(device))
                loss = criterion(val, tgt_y.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, model, 'Models/transformer_final.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing  ##########################################################
    model = torch.load('Models/transformer_final.pth')
    sequences_tensor = Variable(torch.Tensor(sequences))
    model.eval()
    with torch.no_grad():
        src, tgt, tgt_y = split_sequence(sequences_tensor, optimal_sequence_length/(optimal_sequence_length+1))
        predicts_ = model(src.to(device), tgt.to(device))
    predicts_ = predicts_.cpu().numpy().reshape(-1)
    forecasts_ = np.zeros(28)
    src = train_data_normalized[-optimal_sequence_length:]
    src = Variable(torch.Tensor(src)).unsqueeze(0)
    tgt_len = 1
    model.eval()
    with torch.no_grad():
        forecasted = model.infer(src.to(device), tgt_len)
    forecasts_[0] = forecasted.cpu().data.numpy()[0][0][0]
    for j in range(1, 28):
        src = torch.cat((src[:, 1:].to(device), forecasted), dim = 1)
        model.eval()
        with torch.no_grad():
            forecasted = model.infer(src.to(device), tgt_len)
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0][0]
    all_prediction = np.append(predicts_, forecasts_)
    
    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    prediction_df[i] = scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm = np.sqrt(mean_squared_error(sales_validation[optimal_sequence_length:-28], prediction_df.iloc[:-28][i]))
    r2_train_lstm = r2_score(sales_validation[optimal_sequence_length:-28], prediction_df.iloc[:-28][i])
    rmse_test_lstm = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df.iloc[-28:][i]))
    r2_test_lstm = r2_score(sales_validation[-28:], prediction_df.iloc[-28:][i])
    time_taken = time.time() - start_time

    summary_df.loc[i] = [rmse_train_lstm, r2_train_lstm, rmse_test_lstm, r2_test_lstm, time_taken]    
    print(f"Item {i} test rmse, test r2, time taken: {rmse_test_lstm}, {r2_test_lstm}, {time_taken}")

summary_df.to_csv("Baseline_transformer_Summary_1.csv")
prediction_df.to_csv("Baseline_transformer_Results_1.csv")

Item 2 test rmse, test r2, time taken: 1.0126758141651615, -0.1372017635113678, 51.0062575340271
Item 268 test rmse, test r2, time taken: 0.6210680250798335, -0.011400620578015497, 53.606062173843384
Item 345 test rmse, test r2, time taken: 4.863219436457825, -0.8767518397893395, 50.29061532020569
Item 380 test rmse, test r2, time taken: 0.4786438375494548, -0.22186625719503383, 29.99994730949402
Item 465 test rmse, test r2, time taken: 0.5221294560828535, -0.1368799384305912, 44.63188815116882
Item 478 test rmse, test r2, time taken: 0.8366258770818243, -0.06347907137809816, 27.712098360061646
Item 520 test rmse, test r2, time taken: 0.5853080087781909, -0.10076641258063246, 28.49231743812561
Item 529 test rmse, test r2, time taken: 0.8218823979722413, -0.2639252746055738, 63.9230740070343
Item 569 test rmse, test r2, time taken: 1.1349450317936016, -0.12708769704385725, 21.71237063407898
Item 576 test rmse, test r2, time taken: 1.0803582313798283, -0.22334805342026454, 29.61724615097

### Batch 2

In [11]:
results2 = pd.read_csv("Baseline_LSTM_Results_2.csv")
results2 = results2.iloc[:, 1:]

In [12]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm", "r2_train_lstm", "rmse_test_lstm", "r2_test_lstm", "Time Taken"])
prediction_df = pd.DataFrame()
prediction_df.index = df_validation.index[optimal_sequence_length:]
results = results2

for string_i in results.columns: 
    
    i = int(string_i)
    start_time = time.time()

    data = np.array(df_sales.iloc[:, i]).reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data_normalized = scaler.fit_transform(data)
    sequences = load_and_partition_data(train_data_normalized, optimal_sequence_length+1)

    train_data = torch.tensor(np.array(sequences[:-28]), dtype=torch.float)
    val_data = torch.tensor(np.array(sequences[-28:]), dtype=torch.float)
    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_features = len(df_sales[[i]].columns)
    D_MODEL = optimal_d_model  
    NUM_HEADS = optimal_num_head 
    NUM_LAYERS = optimal_num_layer 
    NUM_EPOCHS = 200
    LR = 0.001
    
    #####Init the Model #######################
    model = TransformerWithPE(num_features, num_features, D_MODEL, NUM_HEADS, NUM_LAYERS)
    model.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(model.parameters(), lr=LR,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(NUM_EPOCHS): 
        train_loss, val_loss = 0, 0
        model.train()
        for batch in train_loader:
            src, tgt, tgt_y = split_sequence(batch, optimal_sequence_length/(optimal_sequence_length+1))
            pred = model(src.to(device), tgt.to(device))
            loss = criterion(pred, tgt_y.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        model.eval()
        with torch.no_grad():
            for batch in val_loader:
                src, tgt, tgt_y = split_sequence(batch, optimal_sequence_length/(optimal_sequence_length+1))
                val = model(src.to(device), tgt.to(device))
                loss = criterion(val, tgt_y.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, model, 'Models/transformer_final.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing  ##########################################################
    model = torch.load('Models/transformer_final.pth')
    sequences_tensor = Variable(torch.Tensor(sequences))
    model.eval()
    with torch.no_grad():
        src, tgt, tgt_y = split_sequence(sequences_tensor, optimal_sequence_length/(optimal_sequence_length+1))
        predicts_ = model(src.to(device), tgt.to(device))
    predicts_ = predicts_.cpu().numpy().reshape(-1)
    forecasts_ = np.zeros(28)
    src = train_data_normalized[-optimal_sequence_length:]
    src = Variable(torch.Tensor(src)).unsqueeze(0)
    tgt_len = 1
    model.eval()
    with torch.no_grad():
        forecasted = model.infer(src.to(device), tgt_len)
    forecasts_[0] = forecasted.cpu().data.numpy()[0][0][0]
    for j in range(1, 28):
        src = torch.cat((src[:, 1:].to(device), forecasted), dim = 1)
        model.eval()
        with torch.no_grad():
            forecasted = model.infer(src.to(device), tgt_len)
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0][0]
    all_prediction = np.append(predicts_, forecasts_)
    
    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    prediction_df[i] = scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm = np.sqrt(mean_squared_error(sales_validation[optimal_sequence_length:-28], prediction_df.iloc[:-28][i]))
    r2_train_lstm = r2_score(sales_validation[optimal_sequence_length:-28], prediction_df.iloc[:-28][i])
    rmse_test_lstm = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df.iloc[-28:][i]))
    r2_test_lstm = r2_score(sales_validation[-28:], prediction_df.iloc[-28:][i])
    time_taken = time.time() - start_time

    summary_df.loc[i] = [rmse_train_lstm, r2_train_lstm, rmse_test_lstm, r2_test_lstm, time_taken]    
    print(f"Item {i} test rmse, test r2, time taken: {rmse_test_lstm}, {r2_test_lstm}, {time_taken}")

summary_df.to_csv("Baseline_transformer_Summary_2.csv")
prediction_df.to_csv("Baseline_transformer_Results_2.csv")

Item 6100 test rmse, test r2, time taken: 0.39112594947198903, -0.04292116997118356, 21.37938404083252
Item 6366 test rmse, test r2, time taken: 1.0015036679257245, -0.08016418124319391, 21.076661586761475
Item 6443 test rmse, test r2, time taken: 3.883641709902396, -0.006196015811826605, 59.56764554977417
Item 6478 test rmse, test r2, time taken: 0.26509694144981794, -0.05955170151888045, 25.595710277557373
Item 6563 test rmse, test r2, time taken: 0.2823227187931304, -1.3144294872832614, 29.11493945121765
Item 6576 test rmse, test r2, time taken: 1.848810750523011, -0.5200177730797366, 36.38629388809204
Item 6618 test rmse, test r2, time taken: 0.2059998816422971, -0.23221428776022468, 22.513224601745605
Item 6627 test rmse, test r2, time taken: 0.7881595595701999, -0.05873318524357196, 20.1295063495636
Item 6667 test rmse, test r2, time taken: 1.4904248196390708, -0.3489938467149396, 19.76951003074646
Item 6674 test rmse, test r2, time taken: 1.58774881429458, -0.006837441201413164,

### Batch 3

In [13]:
results3 = pd.read_csv("Baseline_LSTM_Results_3.csv")
results3 = results3.iloc[:, 1:]

In [None]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm", "r2_train_lstm", "rmse_test_lstm", "r2_test_lstm", "Time Taken"])
prediction_df = pd.DataFrame()
prediction_df.index = df_validation.index[optimal_sequence_length:]
results = results3

for string_i in results.columns: 
    
    i = int(string_i)
    start_time = time.time()

    data = np.array(df_sales.iloc[:, i]).reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data_normalized = scaler.fit_transform(data)
    sequences = load_and_partition_data(train_data_normalized, optimal_sequence_length+1)

    train_data = torch.tensor(np.array(sequences[:-28]), dtype=torch.float)
    val_data = torch.tensor(np.array(sequences[-28:]), dtype=torch.float)
    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_features = len(df_sales[[i]].columns)
    D_MODEL = optimal_d_model  
    NUM_HEADS = optimal_num_head 
    NUM_LAYERS = optimal_num_layer 
    NUM_EPOCHS = 200
    LR = 0.001
    
    #####Init the Model #######################
    model = TransformerWithPE(num_features, num_features, D_MODEL, NUM_HEADS, NUM_LAYERS)
    model.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(model.parameters(), lr=LR,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(NUM_EPOCHS): 
        train_loss, val_loss = 0, 0
        model.train()
        for batch in train_loader:
            src, tgt, tgt_y = split_sequence(batch, optimal_sequence_length/(optimal_sequence_length+1))
            pred = model(src.to(device), tgt.to(device))
            loss = criterion(pred, tgt_y.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        model.eval()
        with torch.no_grad():
            for batch in val_loader:
                src, tgt, tgt_y = split_sequence(batch, optimal_sequence_length/(optimal_sequence_length+1))
                val = model(src.to(device), tgt.to(device))
                loss = criterion(val, tgt_y.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, model, 'Models/transformer_final.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing  ##########################################################
    model = torch.load('Models/transformer_final.pth')
    sequences_tensor = Variable(torch.Tensor(sequences))
    model.eval()
    with torch.no_grad():
        src, tgt, tgt_y = split_sequence(sequences_tensor, optimal_sequence_length/(optimal_sequence_length+1))
        predicts_ = model(src.to(device), tgt.to(device))
    predicts_ = predicts_.cpu().numpy().reshape(-1)
    forecasts_ = np.zeros(28)
    src = train_data_normalized[-optimal_sequence_length:]
    src = Variable(torch.Tensor(src)).unsqueeze(0)
    tgt_len = 1
    model.eval()
    with torch.no_grad():
        forecasted = model.infer(src.to(device), tgt_len)
    forecasts_[0] = forecasted.cpu().data.numpy()[0][0][0]
    for j in range(1, 28):
        src = torch.cat((src[:, 1:].to(device), forecasted), dim = 1)
        model.eval()
        with torch.no_grad():
            forecasted = model.infer(src.to(device), tgt_len)
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0][0]
    all_prediction = np.append(predicts_, forecasts_)
    
    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    prediction_df[i] = scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm = np.sqrt(mean_squared_error(sales_validation[optimal_sequence_length:-28], prediction_df.iloc[:-28][i]))
    r2_train_lstm = r2_score(sales_validation[optimal_sequence_length:-28], prediction_df.iloc[:-28][i])
    rmse_test_lstm = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df.iloc[-28:][i]))
    r2_test_lstm = r2_score(sales_validation[-28:], prediction_df.iloc[-28:][i])
    time_taken = time.time() - start_time

    summary_df.loc[i] = [rmse_train_lstm, r2_train_lstm, rmse_test_lstm, r2_test_lstm, time_taken]    
    print(f"Item {i} test rmse, test r2, time taken: {rmse_test_lstm}, {r2_test_lstm}, {time_taken}")

summary_df.to_csv("Baseline_transformer_Summary_3.csv")
prediction_df.to_csv("Baseline_transformer_Results_3.csv")

Item 12269 test rmse, test r2, time taken: 1.6386697505892927, -0.12458708566942156, 60.79895782470703
Item 12274 test rmse, test r2, time taken: 0.6243483210266192, -0.018705625198354836, 20.08068537712097
Item 12308 test rmse, test r2, time taken: 0.07561212066809937, 0.0, 46.395798206329346
Item 12314 test rmse, test r2, time taken: 0.4746681618744622, -0.032999609915017825, 23.04255223274231
Item 12342 test rmse, test r2, time taken: 0.8238117294540982, -0.007715833749135204, 80.37517642974854
Item 12363 test rmse, test r2, time taken: 0.4560931139296918, -0.07295005264526955, 32.14507484436035
Item 12466 test rmse, test r2, time taken: 3.080471745941351, -0.04842390687622267, 24.556649684906006
Item 12490 test rmse, test r2, time taken: 0.1855780630413639, -1.2833556762625875e-05, 41.29540014266968
Item 12493 test rmse, test r2, time taken: 0.6229631484940233, -0.11859241968810297, 25.661415576934814
Item 12552 test rmse, test r2, time taken: 0.001954040486990559, 0.0, 21.22359085