In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
from itertools import product
import time as time

from tqdm import tqdm_notebook

import random

import os

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from pylab import rcParams

import seaborn as sns

from sklearn import metrics
from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error
from sklearn.metrics import median_absolute_error, mean_squared_error, mean_squared_log_error

from sklearn.model_selection import KFold

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, Subset

from sklearn.preprocessing import MinMaxScaler

import utils

# import statsmodels.api as sm
# from statsmodels.tsa.stattools import adfuller
# from statsmodels.tsa.stattools import acf, pacf
# from statsmodels.tsa.arima.model import ARIMA
# from statsmodels.tsa.statespace.sarimax import SARIMAX
# from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
# from pmdarima import auto_arima

import warnings
warnings.filterwarnings("ignore")

from scipy import stats

## Data Preprocessing

In [2]:
train = pd.read_csv("sales_train_validation.csv")
sell_prices = pd.read_csv("sell_prices.csv")
calendar = pd.read_csv("calendar.csv")
# validation contains 28 more dates
validation = pd.read_csv("sales_train_evaluation.csv")

In [3]:
d_cols = [c for c in train.columns if 'd_' in c]
dates = calendar[calendar.d.isin(d_cols)]['date']
dates_list = [datetime.strptime(date, '%Y-%m-%d').date() for date in dates]
df_sales = train[d_cols].T
df_sales.columns = train['id'].values
df_sales = pd.DataFrame(df_sales).set_index([dates_list])
df_sales.index = pd.to_datetime(df_sales.index)
df_sales.columns = [i for i in range(len(df_sales.columns))]

d_cols = [c for c in validation.columns if 'd_' in c]
dates = calendar[calendar.d.isin(d_cols)]['date']
dates_list = [datetime.strptime(date, '%Y-%m-%d').date() for date in dates]
df_validation = validation[d_cols].T
df_validation.columns = validation['id'].values
df_validation = pd.DataFrame(df_validation).set_index([dates_list])
df_validation.index = pd.to_datetime(df_validation.index)
df_validation.columns = [i for i in range(len(df_validation.columns))]

In [4]:
SEED = 1345
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(SEED)

## Running LSTM Model

In [5]:
optimal_hidden_neuron = 512
optimal_batch_size = 16
optimal_hidden_layer = 2
optimal_sequence = 91

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [7]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        #self.seq_length = seq_length
        self.dropout = nn.Dropout(p=0.2)
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True,dropout = 0.25)
        self.fc = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).to(device))
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).to(device))
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        # IMPORTANT: only use the hidden state of the last layer
        h_out = h_out[-1,:,:].view(-1, self.hidden_size)
        out = self.fc(h_out)
        out = self.dropout(out)
        return out

### Batch 1

In [8]:
batch1 = pd.read_csv("Baseline_LSTM_Results_1.csv")
batch1 = batch1.iloc[:, 1:]
batch1.index = df_validation.index[optimal_sequence:]

In [9]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm_pr", "r2_train_lstm_pr", "rmse_test_lstm_pr", "r2_test_lstm_pr", "rmse_train_lstm_mu", "r2_train_lstm_mu", "rmse_test_lstm_mu", "r2_test_lstm_mu", "Time Taken"])
prediction_df_1 = pd.DataFrame()
prediction_df_1.index = df_validation.index[optimal_sequence:]
prediction_df_2 = pd.DataFrame()
prediction_df_2.index = df_validation.index[optimal_sequence:]
batch = batch1

for string_i in batch.columns:
    
    i = int(string_i)
    start_time = time.time()
    
    original_prediction_residuals_df = df_sales.iloc[optimal_sequence:, i] - batch[string_i][:-28]
    mu = original_prediction_residuals_df.mean()
    total_residuals = [mu]*(optimal_sequence-1)+list(original_prediction_residuals_df)
    total_residuals = np.array(total_residuals).reshape(len(total_residuals), 1)
    total_residuals_df = pd.DataFrame(total_residuals)
    total_residuals_df.index = df_sales.index[:-1]

    y_scaler = MinMaxScaler((-1, 1))
    r_scaler = MinMaxScaler((-1, 1))
    y_scaler.fit(df_sales.iloc[:, i:i+1].values.reshape(-1, 1))
    r_scaler.fit(total_residuals_df.values.reshape(-1, 1))
    normalized_y = y_scaler.transform(df_sales.iloc[:-1, i:i+1])
    normalized_r = r_scaler.transform(total_residuals_df)
    normalized_data = np.concatenate([normalized_y, normalized_r], axis=1)

    X, y = utils.sliding_windows_mutli_features_2(normalized_data, optimal_sequence)
    train_data = utils.CustomDataset(X[:-28], y[:-28])
    val_data = utils.CustomDataset(X[-28:], y[-28:])
    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)
    dataX = Variable(torch.Tensor(np.array(X)))
    dataY = Variable(torch.Tensor(np.array(y)))
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_epochs = 200
    learning_rate = 1e-3
    input_size = 2
    hidden_size = optimal_hidden_neuron
    num_layers = optimal_hidden_layer
    num_classes = 2
    
    #####Init the Model #######################
    lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
    lstm.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(num_epochs): 
        train_loss, val_loss = 0, 0
        lstm.train()
        for _, (X_train, Y_train) in enumerate(train_loader):
            outputs = lstm(X_train.to(device))
            # obtain the loss function
            loss = criterion(outputs, Y_train.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        lstm.eval()
        with torch.no_grad():
            for _, (X_val, Y_val) in enumerate(val_loader):
                valid = lstm(X_val.to(device))
                loss = criterion(valid, Y_val.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, lstm, 'Models\lstm_final_w_residuals.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing 1 ##########################################################
    lstm = torch.load('Models\lstm_final_w_residuals.pth')
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()[:, 0]
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecasts_ = np.zeros(28)
    forecast_input = normalized_data[-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    predicted_ = np.concatenate([predicted_, [forecasted.cpu().data.numpy()[0][0]]])
    forecasted[0][0] = y_scaler.transform([[df_sales.iloc[-1, i]]])[0][0]
    for j in range(28):
        to_concat = forecasted.unsqueeze(0)
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), to_concat), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)
    
    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    
    prediction_df_1[i] = y_scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm_1 = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df_1.iloc[:-28][i]))
    r2_train_lstm_1 = r2_score(sales_validation[optimal_sequence:-28], prediction_df_1.iloc[:-28][i])
    rmse_test_lstm_1 = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df_1.iloc[-28:][i]))
    r2_test_lstm_1 = r2_score(sales_validation[-28:], prediction_df_1.iloc[-28:][i])
    ##############################################################  Testing 2 ##########################################################
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()[:, 0]
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecast_input = normalized_data[-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    forecasts_ = np.zeros(28)
    forecast_input = normalized_data[-28:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    predicted_ = np.concatenate([predicted_, [forecasted.cpu().data.numpy()[0][0]]])
    forecasted[0][0] = y_scaler.transform([[df_sales.iloc[-1, i]]])[0][0]
    for j in range(28):
        forecasted[0][1] = r_scaler.transform([[mu]])[0][0]
        to_concat = forecasted.unsqueeze(0)
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), to_concat), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)

    prediction_df_2[i] = y_scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm_2 = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df_2.iloc[:-28][i]))
    r2_train_lstm_2 = r2_score(sales_validation[optimal_sequence:-28], prediction_df_2.iloc[:-28][i])
    rmse_test_lstm_2 = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df_2.iloc[-28:][i]))
    r2_test_lstm_2 = r2_score(sales_validation[-28:], prediction_df_2.iloc[-28:][i])
    time_taken = time.time() - start_time
    
    summary_df.loc[i] = [rmse_train_lstm_1, r2_train_lstm_1, rmse_test_lstm_1, r2_test_lstm_1, rmse_train_lstm_2, r2_train_lstm_2, rmse_test_lstm_2, r2_test_lstm_2, time_taken]    
    print(f"Item {i} test r2(pr), test r2(mu), time taken: {r2_test_lstm_1}, {r2_test_lstm_2}, {time_taken}")

summary_df.to_csv("LSTM_error_modelling_Summary_1.csv")
prediction_df_1.to_csv("LSTM_error_modelling_Results_1_1.csv")
prediction_df_2.to_csv("LSTM_error_modelling_Results_1_2.csv")

Item 2 test r2(pr), test r2(mu), time taken: -0.3082016954809499, -0.017599669864690082, 109.35196661949158
Item 268 test r2(pr), test r2(mu), time taken: -1.0398586544023467, -0.6145817227170398, 89.21569561958313
Item 345 test r2(pr), test r2(mu), time taken: -0.3145839283921228, -0.04233314202449767, 210.5991725921631
Item 380 test r2(pr), test r2(mu), time taken: -1.3265035374130076, -0.26470223977670626, 94.06848955154419
Item 465 test r2(pr), test r2(mu), time taken: -5.038564916504766, -1.2794903418802361, 97.57755017280579
Item 478 test r2(pr), test r2(mu), time taken: -0.202660437586478, -0.004565636109817195, 69.94629311561584
Item 520 test r2(pr), test r2(mu), time taken: -1.2631846086130927, -0.31845442831568316, 51.01071333885193
Item 529 test r2(pr), test r2(mu), time taken: -0.2663450268778307, -0.0004956519542369531, 65.88622832298279
Item 569 test r2(pr), test r2(mu), time taken: -5.1751284304389795, -2.7376686875447134, 76.19749450683594
Item 576 test r2(pr), test r2(

### Batch 2

In [8]:
batch2 = pd.read_csv("Baseline_LSTM_Results_2.csv")
batch2 = batch2.iloc[:, 1:]
batch2.index = df_validation.index[optimal_sequence:]

In [9]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm_pr", "r2_train_lstm_pr", "rmse_test_lstm_pr", "r2_test_lstm_pr", "rmse_train_lstm_mu", "r2_train_lstm_mu", "rmse_test_lstm_mu", "r2_test_lstm_mu", "Time Taken"])
prediction_df_1 = pd.DataFrame()
prediction_df_1.index = df_validation.index[optimal_sequence:]
prediction_df_2 = pd.DataFrame()
prediction_df_2.index = df_validation.index[optimal_sequence:]
batch = batch2

for string_i in batch.columns:
    
    i = int(string_i)
    start_time = time.time()
    
    original_prediction_residuals_df = df_sales.iloc[optimal_sequence:, i] - batch[string_i][:-28]
    mu = original_prediction_residuals_df.mean()
    total_residuals = [mu]*(optimal_sequence-1)+list(original_prediction_residuals_df)
    total_residuals = np.array(total_residuals).reshape(len(total_residuals), 1)
    total_residuals_df = pd.DataFrame(total_residuals)
    total_residuals_df.index = df_sales.index[:-1]

    y_scaler = MinMaxScaler((-1, 1))
    r_scaler = MinMaxScaler((-1, 1))
    y_scaler.fit(df_sales.iloc[:, i:i+1].values.reshape(-1, 1))
    r_scaler.fit(total_residuals_df.values.reshape(-1, 1))
    normalized_y = y_scaler.transform(df_sales.iloc[:-1, i:i+1])
    normalized_r = r_scaler.transform(total_residuals_df)
    normalized_data = np.concatenate([normalized_y, normalized_r], axis=1)

    X, y = utils.sliding_windows_mutli_features_2(normalized_data, optimal_sequence)
    train_data = utils.CustomDataset(X[:-28], y[:-28])
    val_data = utils.CustomDataset(X[-28:], y[-28:])
    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)
    dataX = Variable(torch.Tensor(np.array(X)))
    dataY = Variable(torch.Tensor(np.array(y)))
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_epochs = 200
    learning_rate = 1e-3
    input_size = 2
    hidden_size = optimal_hidden_neuron
    num_layers = optimal_hidden_layer
    num_classes = 2
    
    #####Init the Model #######################
    lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
    lstm.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(num_epochs): 
        train_loss, val_loss = 0, 0
        lstm.train()
        for _, (X_train, Y_train) in enumerate(train_loader):
            outputs = lstm(X_train.to(device))
            # obtain the loss function
            loss = criterion(outputs, Y_train.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        lstm.eval()
        with torch.no_grad():
            for _, (X_val, Y_val) in enumerate(val_loader):
                valid = lstm(X_val.to(device))
                loss = criterion(valid, Y_val.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, lstm, 'Models\lstm_final_w_residuals.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing 1 ##########################################################
    lstm = torch.load('Models\lstm_final_w_residuals.pth')
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()[:, 0]
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecasts_ = np.zeros(28)
    forecast_input = normalized_data[-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    predicted_ = np.concatenate([predicted_, [forecasted.cpu().data.numpy()[0][0]]])
    forecasted[0][0] = y_scaler.transform([[df_sales.iloc[-1, i]]])[0][0]
    for j in range(28):
        to_concat = forecasted.unsqueeze(0)
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), to_concat), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)
    
    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    
    prediction_df_1[i] = y_scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm_1 = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df_1.iloc[:-28][i]))
    r2_train_lstm_1 = r2_score(sales_validation[optimal_sequence:-28], prediction_df_1.iloc[:-28][i])
    rmse_test_lstm_1 = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df_1.iloc[-28:][i]))
    r2_test_lstm_1 = r2_score(sales_validation[-28:], prediction_df_1.iloc[-28:][i])
    ##############################################################  Testing 2 ##########################################################
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()[:, 0]
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecast_input = normalized_data[-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    forecasts_ = np.zeros(28)
    forecast_input = normalized_data[-28:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    predicted_ = np.concatenate([predicted_, [forecasted.cpu().data.numpy()[0][0]]])
    forecasted[0][0] = y_scaler.transform([[df_sales.iloc[-1, i]]])[0][0]
    for j in range(28):
        forecasted[0][1] = r_scaler.transform([[mu]])[0][0]
        to_concat = forecasted.unsqueeze(0)
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), to_concat), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)

    prediction_df_2[i] = y_scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm_2 = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df_2.iloc[:-28][i]))
    r2_train_lstm_2 = r2_score(sales_validation[optimal_sequence:-28], prediction_df_2.iloc[:-28][i])
    rmse_test_lstm_2 = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df_2.iloc[-28:][i]))
    r2_test_lstm_2 = r2_score(sales_validation[-28:], prediction_df_2.iloc[-28:][i])
    time_taken = time.time() - start_time
    
    summary_df.loc[i] = [rmse_train_lstm_1, r2_train_lstm_1, rmse_test_lstm_1, r2_test_lstm_1, rmse_train_lstm_2, r2_train_lstm_2, rmse_test_lstm_2, r2_test_lstm_2, time_taken]    
    print(f"Item {i} test r2(pr), test r2(mu), time taken: {r2_test_lstm_1}, {r2_test_lstm_2}, {time_taken}")

summary_df.to_csv("LSTM_error_modelling_Summary_2.csv")
prediction_df_1.to_csv("LSTM_error_modelling_Results_2_1.csv")
prediction_df_2.to_csv("LSTM_error_modelling_Results_2_2.csv")

Item 6100 test r2(pr), test r2(mu), time taken: -1.5359560147118998, -0.31274163754633433, 75.6128466129303
Item 6366 test r2(pr), test r2(mu), time taken: -0.07386269877985652, -0.0009885391953214917, 47.600775957107544
Item 6443 test r2(pr), test r2(mu), time taken: -0.06215738818638972, -0.35952993365908315, 51.57441186904907
Item 6478 test r2(pr), test r2(mu), time taken: -4.522701643418523, -0.6922159138679553, 65.87255144119263
Item 6563 test r2(pr), test r2(mu), time taken: -33.890183016280574, -16.6047042974913, 79.76642322540283
Item 6576 test r2(pr), test r2(mu), time taken: -0.20876089294213562, -0.44832707377797143, 60.200074672698975
Item 6618 test r2(pr), test r2(mu), time taken: -29.145501602280643, -11.645074416066928, 55.242870569229126
Item 6627 test r2(pr), test r2(mu), time taken: -0.1625857348135773, -0.005355098268343239, 53.14019298553467
Item 6667 test r2(pr), test r2(mu), time taken: -2.686474858245763, -0.34339924595490956, 44.044798612594604
Item 6674 test r2

### Batch 3

In [8]:
batch3 = pd.read_csv("Baseline_LSTM_Results_3.csv")
batch3 = batch3.iloc[:, 1:]
batch3.index = df_validation.index[optimal_sequence:]

In [9]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm_pr", "r2_train_lstm_pr", "rmse_test_lstm_pr", "r2_test_lstm_pr", "rmse_train_lstm_mu", "r2_train_lstm_mu", "rmse_test_lstm_mu", "r2_test_lstm_mu", "Time Taken"])
prediction_df_1 = pd.DataFrame()
prediction_df_1.index = df_validation.index[optimal_sequence:]
prediction_df_2 = pd.DataFrame()
prediction_df_2.index = df_validation.index[optimal_sequence:]
batch = batch3

for string_i in batch.columns:
    
    i = int(string_i)
    start_time = time.time()
    
    original_prediction_residuals_df = df_sales.iloc[optimal_sequence:, i] - batch[string_i][:-28]
    mu = original_prediction_residuals_df.mean()
    total_residuals = [mu]*(optimal_sequence-1)+list(original_prediction_residuals_df)
    total_residuals = np.array(total_residuals).reshape(len(total_residuals), 1)
    total_residuals_df = pd.DataFrame(total_residuals)
    total_residuals_df.index = df_sales.index[:-1]

    y_scaler = MinMaxScaler((-1, 1))
    r_scaler = MinMaxScaler((-1, 1))
    y_scaler.fit(df_sales.iloc[:, i:i+1].values.reshape(-1, 1))
    r_scaler.fit(total_residuals_df.values.reshape(-1, 1))
    normalized_y = y_scaler.transform(df_sales.iloc[:-1, i:i+1])
    normalized_r = r_scaler.transform(total_residuals_df)
    normalized_data = np.concatenate([normalized_y, normalized_r], axis=1)

    X, y = utils.sliding_windows_mutli_features_2(normalized_data, optimal_sequence)
    train_data = utils.CustomDataset(X[:-28], y[:-28])
    val_data = utils.CustomDataset(X[-28:], y[-28:])
    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)
    dataX = Variable(torch.Tensor(np.array(X)))
    dataY = Variable(torch.Tensor(np.array(y)))
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_epochs = 200
    learning_rate = 1e-3
    input_size = 2
    hidden_size = optimal_hidden_neuron
    num_layers = optimal_hidden_layer
    num_classes = 2
    
    #####Init the Model #######################
    lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
    lstm.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(num_epochs): 
        train_loss, val_loss = 0, 0
        lstm.train()
        for _, (X_train, Y_train) in enumerate(train_loader):
            outputs = lstm(X_train.to(device))
            # obtain the loss function
            loss = criterion(outputs, Y_train.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        lstm.eval()
        with torch.no_grad():
            for _, (X_val, Y_val) in enumerate(val_loader):
                valid = lstm(X_val.to(device))
                loss = criterion(valid, Y_val.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, lstm, 'Models\lstm_final_w_residuals.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing 1 ##########################################################
    lstm = torch.load('Models\lstm_final_w_residuals.pth')
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()[:, 0]
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecasts_ = np.zeros(28)
    forecast_input = normalized_data[-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    predicted_ = np.concatenate([predicted_, [forecasted.cpu().data.numpy()[0][0]]])
    forecasted[0][0] = y_scaler.transform([[df_sales.iloc[-1, i]]])[0][0]
    for j in range(28):
        to_concat = forecasted.unsqueeze(0)
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), to_concat), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)
    
    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    
    prediction_df_1[i] = y_scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm_1 = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df_1.iloc[:-28][i]))
    r2_train_lstm_1 = r2_score(sales_validation[optimal_sequence:-28], prediction_df_1.iloc[:-28][i])
    rmse_test_lstm_1 = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df_1.iloc[-28:][i]))
    r2_test_lstm_1 = r2_score(sales_validation[-28:], prediction_df_1.iloc[-28:][i])
    ##############################################################  Testing 2 ##########################################################
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()[:, 0]
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecast_input = normalized_data[-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    forecasts_ = np.zeros(28)
    forecast_input = normalized_data[-28:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    predicted_ = np.concatenate([predicted_, [forecasted.cpu().data.numpy()[0][0]]])
    forecasted[0][0] = y_scaler.transform([[df_sales.iloc[-1, i]]])[0][0]
    for j in range(28):
        forecasted[0][1] = r_scaler.transform([[mu]])[0][0]
        to_concat = forecasted.unsqueeze(0)
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), to_concat), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)

    prediction_df_2[i] = y_scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm_2 = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df_2.iloc[:-28][i]))
    r2_train_lstm_2 = r2_score(sales_validation[optimal_sequence:-28], prediction_df_2.iloc[:-28][i])
    rmse_test_lstm_2 = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df_2.iloc[-28:][i]))
    r2_test_lstm_2 = r2_score(sales_validation[-28:], prediction_df_2.iloc[-28:][i])
    time_taken = time.time() - start_time
    
    summary_df.loc[i] = [rmse_train_lstm_1, r2_train_lstm_1, rmse_test_lstm_1, r2_test_lstm_1, rmse_train_lstm_2, r2_train_lstm_2, rmse_test_lstm_2, r2_test_lstm_2, time_taken]    
    print(f"Item {i} test r2(pr), test r2(mu), time taken: {r2_test_lstm_1}, {r2_test_lstm_2}, {time_taken}")

summary_df.to_csv("LSTM_error_modelling_Summary_3.csv")
prediction_df_1.to_csv("LSTM_error_modelling_Results_3_1.csv")
prediction_df_2.to_csv("LSTM_error_modelling_Results_3_2.csv")

Item 12269 test r2(pr), test r2(mu), time taken: -0.01419017130506628, -0.08912614505010863, 61.016584634780884
Item 12274 test r2(pr), test r2(mu), time taken: -0.04410246791413219, -0.01459421940438177, 75.13233590126038
Item 12308 test r2(pr), test r2(mu), time taken: 0.0, 0.0, 81.87527704238892
Item 12314 test r2(pr), test r2(mu), time taken: -3.215996521409716, -0.7082254624008006, 65.22032952308655
Item 12342 test r2(pr), test r2(mu), time taken: -0.6322966443060845, -0.3310708270382434, 87.00270318984985
Item 12363 test r2(pr), test r2(mu), time taken: -2.9331484295854784, -1.1448247259413864, 67.48564624786377
Item 12466 test r2(pr), test r2(mu), time taken: -6.0341487377491925, -1.9638603856478416, 60.145052433013916
Item 12490 test r2(pr), test r2(mu), time taken: -5.4077940826807405, -1.9926647802026638, 69.50724840164185
Item 12493 test r2(pr), test r2(mu), time taken: -0.17894499119890206, -0.02954573839352781, 106.14495468139648
Item 12552 test r2(pr), test r2(mu), time t