In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
from itertools import product

from tqdm import tqdm_notebook

import random

import os

import matplotlib.pyplot as plt
from matplotlib.pyplot import figure
from pylab import rcParams

import seaborn as sns

from sklearn import metrics
from sklearn.metrics import r2_score, median_absolute_error, mean_absolute_error
from sklearn.metrics import median_absolute_error, mean_squared_error, mean_squared_log_error

from sklearn.preprocessing import MinMaxScaler

from sklearn.model_selection import KFold

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset, Subset

import utils

import warnings
warnings.filterwarnings("ignore")

from scipy import stats

import time

## Data Preprocessing

In [2]:
train = pd.read_csv("sales_train_validation.csv")
sell_prices = pd.read_csv("sell_prices.csv")
calendar = pd.read_csv("calendar.csv")
# validation contains 28 more dates
validation = pd.read_csv("sales_train_evaluation.csv")

In [3]:
d_cols = [c for c in train.columns if 'd_' in c]
dates = calendar[calendar.d.isin(d_cols)]['date']
dates_list = [datetime.strptime(date, '%Y-%m-%d').date() for date in dates]

df_sales = train[d_cols].T
df_sales.columns = train['id'].values
df_sales = pd.DataFrame(df_sales).set_index([dates_list])
df_sales.index = pd.to_datetime(df_sales.index)

df_sales.columns = [i for i in range(len(df_sales.columns))]

In [4]:
d_cols = [c for c in validation.columns if 'd_' in c]
dates = calendar[calendar.d.isin(d_cols)]['date']
dates_list = [datetime.strptime(date, '%Y-%m-%d').date() for date in dates]

df_validation = validation[d_cols].T
df_validation.columns = validation['id'].values
df_validation = pd.DataFrame(df_validation).set_index([dates_list])
df_validation.index = pd.to_datetime(df_validation.index)

df_validation.columns = [i for i in range(len(df_validation.columns))]

In [5]:
SEED = 1345
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    
seed_everything(SEED)

## Running LSTM Model

In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [7]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        #self.seq_length = seq_length
        self.dropout = nn.Dropout(p=0.2)
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                            num_layers=num_layers, batch_first=True,dropout = 0.25)
        self.fc = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        h_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).to(device))
        c_0 = Variable(torch.zeros(
            self.num_layers, x.size(0), self.hidden_size).to(device))
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (h_0, c_0))
        # IMPORTANT: only use the hidden state of the last layer
        h_out = h_out[-1,:,:].view(-1, self.hidden_size)
        out = self.fc(h_out)
        out = self.dropout(out)
        return out

In [8]:
optimal_hidden_neuron = 512
optimal_batch_size = 16
optimal_hidden_layer = 2
optimal_sequence = 91

### Selecting 5 batches of 150 random time series from M5 dataset

In [9]:
batch_length = int(len(df_sales.columns)/5)

#### Batch 1

In [10]:
batch_no = 1
col_list = random.sample(range((batch_no-1)*batch_length, (batch_no-1)*batch_length+batch_length), 150)
col_list.sort()

In [11]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm", "r2_train_lstm", "rmse_test_lstm", "r2_test_lstm", "Time Taken"])
prediction_df = pd.DataFrame()
prediction_df.index = df_validation.index[optimal_sequence:]

for i in col_list:
    
    start_time = time.time()
    
    data = np.array(df_sales.iloc[:, i]).reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data_normalized = scaler.fit_transform(data)
    X, y = utils.sliding_windows(train_data_normalized, optimal_sequence)
    
    train_data = utils.CustomDataset(X[:-28], y[:-28])
    val_data = utils.CustomDataset(X[-28:], y[-28:])

    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)

    dataX = Variable(torch.Tensor(np.array(X)))
    dataY = Variable(torch.Tensor(np.array(y)))

    forecast_input = train_data_normalized[train_data_normalized.shape[0]-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_epochs = 200
    learning_rate = 1e-3
    input_size = 1
    hidden_size = optimal_hidden_neuron
    num_layers = optimal_hidden_layer
    num_classes = 1
    
    #####Init the Model #######################
    lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
    lstm.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(num_epochs): 
        train_loss, val_loss = 0, 0
        lstm.train()
        for _, (X_train, Y_train) in enumerate(train_loader):
            outputs = lstm(X_train.to(device))
            # obtain the loss function
            loss = criterion(outputs, Y_train.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        lstm.eval()
        with torch.no_grad():
            for _, (X_val, Y_val) in enumerate(val_loader):
                valid = lstm(X_val.to(device))
                loss = criterion(valid, Y_val.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, lstm, 'Models\lstm_final.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing  ##########################################################
    lstm = torch.load('Models\lstm_final.pth')
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecasts_ = np.zeros(28)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    forecasts_[0] = forecasted.cpu().data.numpy()[0][0]
    for j in range(27):
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), forecasted.unsqueeze(0)), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j+1] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)

    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    prediction_df[i] = scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i]))
    r2_train_lstm = r2_score(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i])
    rmse_test_lstm = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df.iloc[-28:][i]))
    r2_test_lstm = r2_score(sales_validation[-28:], prediction_df.iloc[-28:][i])
    time_taken = time.time() - start_time
    
    summary_df.loc[i] = [rmse_train_lstm, r2_train_lstm, rmse_test_lstm, r2_test_lstm, time_taken]    
    print(f"Item {i} test rmse, test r2, time taken: {rmse_test_lstm}, {r2_test_lstm}, {time_taken}")
    

Item 2 test rmse, test r2, time taken: 0.952136492852118, -0.005298583310117788, 46.820117712020874
Item 268 test rmse, test r2, time taken: 0.6455102748074641, -0.09257483500834174, 37.84932851791382
Item 345 test rmse, test r2, time taken: 3.8685181866835574, -0.18754042927023562, 45.255528688430786
Item 380 test rmse, test r2, time taken: 0.4842386474938755, -0.25059769454238845, 70.25096201896667
Item 465 test rmse, test r2, time taken: 0.7828476643636182, -1.555716835266301, 52.33050179481506
Item 478 test rmse, test r2, time taken: 0.8165786669178552, -0.013123573457418436, 114.20653986930847
Item 520 test rmse, test r2, time taken: 0.5998989740958586, -0.15633181488143721, 77.141672372818
Item 529 test rmse, test r2, time taken: 0.7379663662112446, -0.019002330321428396, 61.271798610687256
Item 569 test rmse, test r2, time taken: 1.5535586502832517, -1.1118514198861789, 41.716917991638184
Item 576 test rmse, test r2, time taken: 1.0355063635346153, -0.12388017148905761, 37.81899

In [12]:
summary_df.to_csv("Baseline_LSTM_Summary_1.csv")
prediction_df.to_csv("Baseline_LSTM_Results_1.csv")

#### Batch 2

In [10]:
batch_no = 2
col_list = random.sample(range((batch_no-1)*batch_length, (batch_no-1)*batch_length+batch_length), 150)
col_list.sort()

In [11]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm", "r2_train_lstm", "rmse_test_lstm", "r2_test_lstm", "Time Taken"])
prediction_df = pd.DataFrame()
prediction_df.index = df_validation.index[optimal_sequence:]

for i in col_list:
    
    start_time = time.time()
    
    data = np.array(df_sales.iloc[:, i]).reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data_normalized = scaler.fit_transform(data)
    X, y = utils.sliding_windows(train_data_normalized, optimal_sequence)
    
    train_data = utils.CustomDataset(X[:-28], y[:-28])
    val_data = utils.CustomDataset(X[-28:], y[-28:])

    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)

    dataX = Variable(torch.Tensor(np.array(X)))
    dataY = Variable(torch.Tensor(np.array(y)))

    forecast_input = train_data_normalized[train_data_normalized.shape[0]-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_epochs = 200
    learning_rate = 1e-3
    input_size = 1
    hidden_size = optimal_hidden_neuron
    num_layers = optimal_hidden_layer
    num_classes = 1
    
    #####Init the Model #######################
    lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
    lstm.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(num_epochs): 
        train_loss, val_loss = 0, 0
        lstm.train()
        for _, (X_train, Y_train) in enumerate(train_loader):
            outputs = lstm(X_train.to(device))
            # obtain the loss function
            loss = criterion(outputs, Y_train.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        lstm.eval()
        with torch.no_grad():
            for _, (X_val, Y_val) in enumerate(val_loader):
                valid = lstm(X_val.to(device))
                loss = criterion(valid, Y_val.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, lstm, 'Models\lstm_final.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing  ##########################################################
    lstm = torch.load('Models\lstm_final.pth')
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecasts_ = np.zeros(28)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    forecasts_[0] = forecasted.cpu().data.numpy()[0][0]
    for j in range(27):
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), forecasted.unsqueeze(0)), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j+1] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)

    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    prediction_df[i] = scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i]))
    r2_train_lstm = r2_score(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i])
    rmse_test_lstm = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df.iloc[-28:][i]))
    r2_test_lstm = r2_score(sales_validation[-28:], prediction_df.iloc[-28:][i])
    time_taken = time.time() - start_time
    
    summary_df.loc[i] = [rmse_train_lstm, r2_train_lstm, rmse_test_lstm, r2_test_lstm, time_taken]    
    print(f"Item {i} test rmse, test r2, time taken: {rmse_test_lstm}, {r2_test_lstm}, {time_taken}")
    

Item 6100 test rmse, test r2, time taken: 0.44337121213713643, -0.3401493642915796, 50.445486068725586
Item 6366 test rmse, test r2, time taken: 0.9796957545415168, -0.033634830810262084, 46.927910566329956
Item 6443 test rmse, test r2, time taken: 3.9213911153197083, -0.025851743616213563, 37.97551989555359
Item 6478 test rmse, test r2, time taken: 0.35800228502471604, -0.9323434363270724, 52.141260385513306
Item 6563 test rmse, test r2, time taken: 0.6846049791583795, -12.609194012997811, 37.83680820465088
Item 6576 test rmse, test r2, time taken: 1.7528256504087254, -0.3662846536658675, 37.28048133850098
Item 6618 test rmse, test r2, time taken: 0.5070407286549548, -6.465140577915509, 41.22067475318909
Item 6627 test rmse, test r2, time taken: 0.7666950348287437, -0.0018519146129176711, 50.52197861671448
Item 6667 test rmse, test r2, time taken: 1.2835003497426773, -0.0004187047769379504, 37.47891569137573
Item 6674 test rmse, test r2, time taken: 1.6311676678925486, -0.062656669001

In [12]:
summary_df.to_csv("Baseline_LSTM_Summary_2.csv")
prediction_df.to_csv("Baseline_LSTM_Results_2.csv")

### Batch 3

In [13]:
batch_no = 3
col_list = random.sample(range((batch_no-1)*batch_length, (batch_no-1)*batch_length+batch_length), 150)
col_list.sort()

In [14]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm", "r2_train_lstm", "rmse_test_lstm", "r2_test_lstm", "Time Taken"])
prediction_df = pd.DataFrame()
prediction_df.index = df_validation.index[optimal_sequence:]

for i in col_list:
    
    start_time = time.time()
    
    data = np.array(df_sales.iloc[:, i]).reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data_normalized = scaler.fit_transform(data)
    X, y = utils.sliding_windows(train_data_normalized, optimal_sequence)
    
    train_data = utils.CustomDataset(X[:-28], y[:-28])
    val_data = utils.CustomDataset(X[-28:], y[-28:])

    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)

    dataX = Variable(torch.Tensor(np.array(X)))
    dataY = Variable(torch.Tensor(np.array(y)))

    forecast_input = train_data_normalized[train_data_normalized.shape[0]-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_epochs = 200
    learning_rate = 1e-3
    input_size = 1
    hidden_size = optimal_hidden_neuron
    num_layers = optimal_hidden_layer
    num_classes = 1
    
    #####Init the Model #######################
    lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
    lstm.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(num_epochs): 
        train_loss, val_loss = 0, 0
        lstm.train()
        for _, (X_train, Y_train) in enumerate(train_loader):
            outputs = lstm(X_train.to(device))
            # obtain the loss function
            loss = criterion(outputs, Y_train.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        lstm.eval()
        with torch.no_grad():
            for _, (X_val, Y_val) in enumerate(val_loader):
                valid = lstm(X_val.to(device))
                loss = criterion(valid, Y_val.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, lstm, 'Models\lstm_final.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing  ##########################################################
    lstm = torch.load('Models\lstm_final.pth')
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecasts_ = np.zeros(28)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    forecasts_[0] = forecasted.cpu().data.numpy()[0][0]
    for j in range(27):
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), forecasted.unsqueeze(0)), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j+1] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)

    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    prediction_df[i] = scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i]))
    r2_train_lstm = r2_score(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i])
    rmse_test_lstm = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df.iloc[-28:][i]))
    r2_test_lstm = r2_score(sales_validation[-28:], prediction_df.iloc[-28:][i])
    time_taken = time.time() - start_time
    
    summary_df.loc[i] = [rmse_train_lstm, r2_train_lstm, rmse_test_lstm, r2_test_lstm, time_taken]    
    print(f"Item {i} test rmse, test r2, time taken: {rmse_test_lstm}, {r2_test_lstm}, {time_taken}")
    

Item 12269 test rmse, test r2, time taken: 1.5674732762424177, -0.028988471068143395, 55.98680329322815
Item 12274 test rmse, test r2, time taken: 0.6180412686759813, 0.0017719744243090974, 58.04476809501648
Item 12308 test rmse, test r2, time taken: 0.27037574408155773, 0.0, 45.1892147064209
Item 12314 test rmse, test r2, time taken: 0.6139122600313229, -0.7279555450593285, 56.13570857048035
Item 12342 test rmse, test r2, time taken: 0.8975307359387419, -0.19613665684188608, 70.190012216568
Item 12363 test rmse, test r2, time taken: 0.5757498313037838, -0.7097795309547832, 68.3409206867218
Item 12466 test rmse, test r2, time taken: 4.4307790524153425, -1.1690154398079082, 45.07303547859192
Item 12490 test rmse, test r2, time taken: 0.3110740772782417, -1.8098293310642757, 72.21473026275635
Item 12493 test rmse, test r2, time taken: 0.58760722669449, 0.004774682922594087, 75.56545782089233
Item 12552 test rmse, test r2, time taken: 0.3209111083316238, 0.0, 54.06796145439148
Item 12599 

In [15]:
summary_df.to_csv("Baseline_LSTM_Summary_3.csv")
prediction_df.to_csv("Baseline_LSTM_Results_3.csv")

### Batch 4

In [10]:
batch_no = 4
col_list = random.sample(range((batch_no-1)*batch_length, (batch_no-1)*batch_length+batch_length), 150)
col_list.sort()

In [11]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm", "r2_train_lstm", "rmse_test_lstm", "r2_test_lstm", "Time Taken"])
prediction_df = pd.DataFrame()
prediction_df.index = df_validation.index[optimal_sequence:]

for i in col_list:
    
    start_time = time.time()
    
    data = np.array(df_sales.iloc[:, i]).reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data_normalized = scaler.fit_transform(data)
    X, y = utils.sliding_windows(train_data_normalized, optimal_sequence)
    
    train_data = utils.CustomDataset(X[:-28], y[:-28])
    val_data = utils.CustomDataset(X[-28:], y[-28:])

    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)

    dataX = Variable(torch.Tensor(np.array(X)))
    dataY = Variable(torch.Tensor(np.array(y)))

    forecast_input = train_data_normalized[train_data_normalized.shape[0]-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_epochs = 200
    learning_rate = 1e-3
    input_size = 1
    hidden_size = optimal_hidden_neuron
    num_layers = optimal_hidden_layer
    num_classes = 1
    
    #####Init the Model #######################
    lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
    lstm.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(num_epochs): 
        train_loss, val_loss = 0, 0
        lstm.train()
        for _, (X_train, Y_train) in enumerate(train_loader):
            outputs = lstm(X_train.to(device))
            # obtain the loss function
            loss = criterion(outputs, Y_train.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        lstm.eval()
        with torch.no_grad():
            for _, (X_val, Y_val) in enumerate(val_loader):
                valid = lstm(X_val.to(device))
                loss = criterion(valid, Y_val.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, lstm, 'Models\lstm_final.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing  ##########################################################
    lstm = torch.load('Models\lstm_final.pth')
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecasts_ = np.zeros(28)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    forecasts_[0] = forecasted.cpu().data.numpy()[0][0]
    for j in range(27):
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), forecasted.unsqueeze(0)), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j+1] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)

    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    prediction_df[i] = scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i]))
    r2_train_lstm = r2_score(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i])
    rmse_test_lstm = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df.iloc[-28:][i]))
    r2_test_lstm = r2_score(sales_validation[-28:], prediction_df.iloc[-28:][i])
    time_taken = time.time() - start_time
    
    summary_df.loc[i] = [rmse_train_lstm, r2_train_lstm, rmse_test_lstm, r2_test_lstm, time_taken]    
    print(f"Item {i} test rmse, test r2, time taken: {rmse_test_lstm}, {r2_test_lstm}, {time_taken}")
    

Item 18296 test rmse, test r2, time taken: 0.5253874908079712, -0.00189398217609682, 97.98495483398438
Item 18562 test rmse, test r2, time taken: 0.6891882969296939, -0.025853219729707444, 65.05158829689026
Item 18639 test rmse, test r2, time taken: 9.805360985944285, -0.07516633746088575, 83.21321749687195
Item 18674 test rmse, test r2, time taken: 0.3937424857011731, -0.05692161492323988, 43.44737362861633
Item 18759 test rmse, test r2, time taken: 0.4082095493451188, -0.1360162466296635, 70.87622213363647
Item 18772 test rmse, test r2, time taken: 1.4508437438268214, -0.05046396823495791, 39.859015464782715
Item 18814 test rmse, test r2, time taken: 0.4502556331229606, -0.3820906605619401, 104.85474419593811
Item 18823 test rmse, test r2, time taken: 1.5120493590880433, -0.04212437164287919, 48.70484662055969
Item 18863 test rmse, test r2, time taken: 0.8643822827291276, -0.06117912475666243, 43.41110587120056
Item 18870 test rmse, test r2, time taken: 0.8360769296825231, -0.1735231

In [12]:
summary_df.to_csv("Baseline_LSTM_Summary_4.csv")
prediction_df.to_csv("Baseline_LSTM_Results_4.csv")

### Batch 5

In [13]:
batch_no = 5
col_list = random.sample(range((batch_no-1)*batch_length, (batch_no-1)*batch_length+batch_length), 150)
col_list.sort()

In [14]:
summary_df = pd.DataFrame(columns = ["rmse_train_lstm", "r2_train_lstm", "rmse_test_lstm", "r2_test_lstm", "Time Taken"])
prediction_df = pd.DataFrame()
prediction_df.index = df_validation.index[optimal_sequence:]

for i in col_list:
    
    start_time = time.time()
    
    data = np.array(df_sales.iloc[:, i]).reshape(-1, 1)
    scaler = MinMaxScaler(feature_range=(-1, 1))
    train_data_normalized = scaler.fit_transform(data)
    X, y = utils.sliding_windows(train_data_normalized, optimal_sequence)
    
    train_data = utils.CustomDataset(X[:-28], y[:-28])
    val_data = utils.CustomDataset(X[-28:], y[-28:])

    train_loader = DataLoader(train_data, batch_size=optimal_batch_size , shuffle=True, drop_last=True)
    val_loader = DataLoader(val_data, batch_size=optimal_batch_size, shuffle=False, drop_last=False)

    dataX = Variable(torch.Tensor(np.array(X)))
    dataY = Variable(torch.Tensor(np.array(y)))

    forecast_input = train_data_normalized[train_data_normalized.shape[0]-optimal_sequence:]
    forecast_input_tensor = Variable(torch.Tensor(np.array(forecast_input))).unsqueeze(0)
    ##############################################################  Training  ##########################################################
    #####  Parameters  ######################
    num_epochs = 200
    learning_rate = 1e-3
    input_size = 1
    hidden_size = optimal_hidden_neuron
    num_layers = optimal_hidden_layer
    num_classes = 1
    
    #####Init the Model #######################
    lstm = LSTM(num_classes, input_size, hidden_size, num_layers)
    lstm.to(device)
    early_stopper = utils.EarlyStopper(patience=20)
    ##### Set Criterion Optimzer and scheduler ####################
    criterion = torch.nn.MSELoss().to(device)    # mean-squared error for regression
    optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate,weight_decay=1e-5)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,  patience=20,factor =0.1,min_lr=1e-7, eps=1e-08)
    # Train the model
    train_loss_, val_loss_ = [], []
    for epoch in range(num_epochs): 
        train_loss, val_loss = 0, 0
        lstm.train()
        for _, (X_train, Y_train) in enumerate(train_loader):
            outputs = lstm(X_train.to(device))
            # obtain the loss function
            loss = criterion(outputs, Y_train.to(device))
            train_loss += loss.item()
            # Backpropagation
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss/=len(train_loader.dataset)
        train_loss_.append(train_loss)
        #Evaluate on test     
        lstm.eval()
        with torch.no_grad():
            for _, (X_val, Y_val) in enumerate(val_loader):
                valid = lstm(X_val.to(device))
                loss = criterion(valid, Y_val.to(device))
                val_loss += loss.item()
        val_loss/=len(val_loader.dataset)
        val_loss_.append(val_loss)
        scheduler.step(val_loss)
        if early_stopper.early_stop(val_loss, lstm, 'Models\lstm_final.pth'):
            early_stopped = epoch+1
            break
    ##############################################################  Testing  ##########################################################
    lstm = torch.load('Models\lstm_final.pth')
    lstm.eval()
    with torch.no_grad():
        predict = lstm(dataX.to(device))
    predicted = predict.cpu().data.numpy()
    predicted_ = np.array(predicted.reshape(predicted.shape[0]))
    forecasts_ = np.zeros(28)
    lstm.eval()
    with torch.no_grad():
        forecasted = lstm(forecast_input_tensor.to(device))
    forecasts_[0] = forecasted.cpu().data.numpy()[0][0]
    for j in range(27):
        forecast_input_tensor = torch.cat((forecast_input_tensor[:, 1:].to(device), forecasted.unsqueeze(0)), dim = 1)
        lstm.eval()
        with torch.no_grad():
            forecasted = lstm(forecast_input_tensor.to(device))
        forecasts_[j+1] = forecasted.cpu().data.numpy()[0][0]
    all_prediction = np.append(predicted_, forecasts_)

    sales_validation = pd.DataFrame(df_validation.iloc[:,i])
    prediction_df[i] = scaler.inverse_transform(np.array(all_prediction).reshape(-1, 1)).reshape(-1)
    rmse_train_lstm = np.sqrt(mean_squared_error(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i]))
    r2_train_lstm = r2_score(sales_validation[optimal_sequence:-28], prediction_df.iloc[:-28][i])
    rmse_test_lstm = np.sqrt(mean_squared_error(sales_validation[-28:], prediction_df.iloc[-28:][i]))
    r2_test_lstm = r2_score(sales_validation[-28:], prediction_df.iloc[-28:][i])
    time_taken = time.time() - start_time
    
    summary_df.loc[i] = [rmse_train_lstm, r2_train_lstm, rmse_test_lstm, r2_test_lstm, time_taken]    
    print(f"Item {i} test rmse, test r2, time taken: {rmse_test_lstm}, {r2_test_lstm}, {time_taken}")
    

Item 24465 test rmse, test r2, time taken: 0.605340234509283, 0.0, 64.87036061286926
Item 24470 test rmse, test r2, time taken: 0.6489916758286146, -0.13086682572311537, 126.44151091575623
Item 24504 test rmse, test r2, time taken: 0.412271591120207, -0.15873744381141863, 41.63777136802673
Item 24510 test rmse, test r2, time taken: 1.46765633295882, -0.003415239187473329, 66.73393940925598
Item 24538 test rmse, test r2, time taken: 0.5048561391638864, -1.6643426861486512, 75.54103136062622
Item 24559 test rmse, test r2, time taken: 0.6120541925436245, -0.0797591997588929, 41.587507009506226
Item 24662 test rmse, test r2, time taken: 6.173869202521957, 0.014397685718941178, 46.97289061546326
Item 24686 test rmse, test r2, time taken: 0.4205551670021281, 0.0, 72.00676965713501
Item 24689 test rmse, test r2, time taken: 0.489691571765957, -7.994678472789118e-06, 104.64279294013977
Item 24748 test rmse, test r2, time taken: 0.4542948072342061, -0.01128048221182576, 54.02683472633362
Item 2

In [15]:
summary_df.to_csv("Baseline_LSTM_Summary_5.csv")
prediction_df.to_csv("Baseline_LSTM_Results_5.csv")