In [None]:
# Importing essential Libraries

import numpy as np
import pandas as pd

from urllib.request import urlopen
from PIL import Image

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.autograd import Variable
from sklearn.preprocessing import MinMaxScaler


import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
# Reading train and test data

df = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2022/train.csv')
test_data  = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2022/test.csv')

print('Shape of the training data : ', df.shape)
print('Shape of the testing data : ', test_data.shape, '\n')

# Dropping column 'row_id'
df = df.drop('row_id', axis = 1)
df.date = pd.to_datetime(df.date)
df.head()

* Link for Part 1 : https://www.kaggle.com/toomuchsauce/happy-new-year-tps-understanding-the-data-pt1

In [None]:
def EDA(df):
    
    print('\033[1m' +'EXPLORATORY DATA ANALYSIS :'+ '\033[0m\n')
    print('\033[1m' + 'Shape of the data (rows, columns):' + '\033[0m')
    print(df.shape, 
          '\n------------------------------------------------------------------------------------\n')
    
    print('\033[1m' + 'All columns from the dataframe :' + '\033[0m')
    print(df.columns, 
          '\n------------------------------------------------------------------------------------\n')
    
    print('\033[1m' + 'Datatpes and Missing values:' + '\033[0m')
    print(df.info(), 
          '\n------------------------------------------------------------------------------------\n')
    
    for col in df.columns:
        if df[col].dtype == 'object':
            print('\033[1m' + 'Total Unique values in {} :'.format(col) + '\033[0m',len(df[col].unique()))
            print('\t\033[1m' + 'Categories in {} :'.format(col) + '\033[0m', df[col].unique())
    print('\n------------------------------------------------------------------------------------\n')
    
    print('\033[1m' + 'Summary statistics for the data :' + '\033[0m')
    print(df.describe(include='all'), 
          '\n------------------------------------------------------------------------------------\n')
    
        
    print('\033[1m' + 'Memory used by the data :' + '\033[0m')
    print(df.memory_usage(), 
          '\n------------------------------------------------------------------------------------\n')
    
    print('\033[1m' + 'Number of duplicate values :' + '\033[0m')
    print(df.duplicated().sum())
          
EDA(df)

In [None]:
mmm = Image.open(urlopen("https://i.postimg.cc/4drz84FH/fooled-by-the-accuracy.png"))
mmm

In [None]:
training_set = df[(df['country'] == 'Finland') & (df['store'] == 'KaggleMart') & (df['product'] == 'Kaggle Mug')]

training_set = training_set[['num_sold']].values

plt.figure(figsize=(8,4), dpi=120)
plt.plot(training_set)
plt.grid(True)
plt.title('Finland KaggleMart Mug sales')
plt.ylabel('num_sold')
plt.xlabel('days')
plt.show()

In [None]:
# Parameters

num_epochs = 2000
learning_rate = 0.01

input_size = 1
hidden_size = 2
num_layers = 1

num_classes = 1

#train_size = len(y) - 365
seq_length = 365

In [None]:
def sliding_windows(data, seq_length):
    '''
    The function will accept the raw input data and will return a list of tuples.
    In each tuple, the first element will contain list of 365 items corresponding 
    to the number of sales days in a year, the second tuple element will contain 
    one item i.e. the num_sold in 365+1st day.
    '''
    x = []
    y = []

    for i in range(len(data) - seq_length-1):
        _x = data[i : (i + seq_length)]
        _y = data[i + seq_length]
        x.append(_x)
        y.append(_y)

    return np.array(x), np.array(y)

In [None]:
# Initializing scaler
sc = MinMaxScaler()

In [None]:
mmm = Image.open(urlopen("https://i.postimg.cc/mkLbWCnv/fooled-by-the-accuracy-2.png"))
mmm

In [None]:
class LSTM(nn.Module):

    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        
        self.num_classes = num_classes
        self.num_layers = num_layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.seq_length = seq_length
        
        self.lstm = nn.LSTM(input_size=input_size,
                            hidden_size=hidden_size,
                            num_layers=num_layers,
                            batch_first=True)
        
        self.fc = nn.Linear(hidden_size, num_classes)

    def forward(self, x):
        state = Variable(torch.zeros(self.num_layers,
                                   x.size(0),
                                   self.hidden_size))
        
        cell = Variable(torch.zeros(self.num_layers,
                                   x.size(0),
                                   self.hidden_size))
        
        # Propagate input through LSTM
        ula, (h_out, _) = self.lstm(x, (state, cell))        
        h_out = h_out.view(-1, self.hidden_size)        
        out = self.fc(h_out)
        
        return out

In [None]:
lstm = LSTM(num_classes, input_size, hidden_size, num_layers)

criterion = torch.nn.MSELoss()
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

In [None]:
mmm = Image.open(urlopen("https://i.postimg.cc/Sx2m4yZx/fooled-by-the-accuracy-3.png"))
mmm

In [None]:
models_dict = {}
for c in df.country.unique():
    for s in df.store.unique():
        for p in df["product"].unique():
            
            data = df[(df.country == c) & (df.store == s) & (df["product"] == p)][["date", "num_sold"]]
            
            # Transformation
            training_set  = data[['num_sold']].values            
            training_data = sc.fit_transform(training_set)
            x, y = sliding_windows(training_data, seq_length)
            train_size = len(y) - 365# Change if you want validation set
            
            dataX = Variable(torch.Tensor(np.array(x)))
            dataY = Variable(torch.Tensor(np.array(y)))

            trainX = Variable(torch.Tensor(np.array(x[0:train_size]))) 
            trainY = Variable(torch.Tensor(np.array(y[0:train_size])))

            testX = Variable(torch.Tensor(np.array(x[train_size:len(x)])))
            testY = Variable(torch.Tensor(np.array(y[train_size:len(y)])))
            
            model_name = f"model_{c}_{s}_{p}"
            print("Training model: ", model_name)          

            # Training
            for epoch in range(num_epochs):
                
                outputs = lstm(trainX)
                loss = criterion(outputs, trainY)
                
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()                

            print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))
            models_dict[model_name] = lstm.state_dict()

            # Evaluating

            with torch.no_grad():
                lstm.eval()
                train_predict = lstm(dataX)

            data_predict = train_predict.data.numpy()
            dataY_plot = dataY.data.numpy()

            data_predict = sc.inverse_transform(data_predict)
            dataY_plot = sc.inverse_transform(dataY_plot)

            plt.figure(figsize=(12,4), dpi=100)
            plt.axvline(x=train_size, c='g', linestyle='--', label='test split')
            plt.plot(dataY_plot, label = 'Actual')
            plt.plot(data_predict, label = 'Predicted')
            plt.ylabel('num_sold')
            plt.legend(loc="upper left")
            plt.suptitle(f'Time-Series Prediction : {c}-> {s}-> {p}')
            plt.box(False)
            plt.show()


In [None]:
# Accessing saved models

models_dict['model_Finland_KaggleMart_Kaggle Mug']

In [None]:
# Saving models 
np.save('models.npy', models_dict)

## Acknowledgements
* https://machinelearningmastery.com/time-series-forecasting-long-short-term-memory-network-python/
* https://stackabuse.com/time-series-prediction-using-lstm-with-pytorch-in-python/
* https://www.kaggle.com/rnepal2/tps-how-does-the-very-new-neuralprophet-do

### Thank you for reading :)