# LSTM using raw data

# Import

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch #pytorch
import torch.nn as nn
from torch.autograd import Variable
import fpmodules.tools as tools
from torch.nn.functional import softmax, sigmoid
from labelled_data.tools.constants import *
from labelled_data.tools.data_loader import data_loader

# Define data loader

In [None]:
data_generator = data_loader(chunks=False)

In [None]:
data = data_generator.__next__()

In [None]:
for d in data.keys():
    print(d, data[d].shape)

# Define model

In [None]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm

        self.linear_out = nn.Linear(hidden_size, num_classes) #fully connected last layer

    def forward(self,x):
        # Propagate input through LSTM
        x, (h, c) = self.lstm(x) #lstm with input, hidden, and internal state
        x = x.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        x = self.linear_out(x) #first Dense
        out = sigmoid(x) #Final Output
        return out

## Hyper parameters

In [None]:
num_epochs = 100 # 1000 epochs
learning_rate = 0.1 # 0.001 lr

input_size = 1 # number of features
hidden_size = 10 # number of features in hidden state
num_layers = 2 # number of stacked lstm layers

num_classes = 1 # number of output classes

chunks = False # raw or event
no_files = 10 # no. data files

 ## Initialize

In [None]:
lstm = LSTM(num_classes, input_size, hidden_size, num_layers) #our lstm class

## Loss function and optimizer

In [None]:
criterion = torch.nn.MSELoss() # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

# Train the model

In [None]:
def evaluate_result(output):
    output[output > 0.5] = 1
    output[output < 0.5] = 0
    return output

In [None]:
epoch_training_loss = []
epoch_validation_loss = []
epoch_training_acc = []
epoch_validation_acc = []

for epoch in range(num_epochs):
  training_loss = 0
  validation_loss = 0
  training_correct = 0
  training_all = 0
  validation_correct = 0
  validation_all = 0

  data_generator = data_loader(chunks=chunks)

  for file in range(no_files):
      data = data_generator.__next__()

      lstm.eval()
      outputs = lstm.forward(data['X_valid']) #forward pass
      loss = criterion(outputs, data['y_valid'])
      loss.backward() #calculates the loss of the loss function
      validation_loss += loss.item()

      validation_correct += (evaluate_result(outputs) == data['y_valid']).float().sum()
      validation_all += len(data['y_valid'])

      lstm.train()
      outputs = lstm.forward(data['X_train']) #forward pass

      # obtain the loss function
      loss = criterion(outputs, data['y_train'])
      loss.backward() #calculates the loss of the loss function
      training_loss += loss.item()
      training_correct += (evaluate_result(outputs) == data['y_train']).float().sum()
      training_all += len(data['y_train'])

      optimizer.step() #improve from loss, i.e backprop
      optimizer.zero_grad() #caluclate the gradient, manually setting to 0
  validation_acc = 100 * validation_correct / validation_all
  training_acc = 100 * training_correct / training_all
  #if epoch % 10 == 0:
  print("Epoch: %d, training loss: %1.5f, validation loss: %1.5f, , training acc: %1.5f, , validation acc: %1.5f" % (epoch, training_loss, validation_loss, training_acc, validation_acc))

  epoch_validation_loss.append(validation_loss)
  epoch_validation_acc.append(validation_acc)
  epoch_training_loss.append(training_loss)
  epoch_training_acc.append(training_acc)

In [None]:
plt.plot(np.linspace(0, num_epochs-1, num_epochs), epoch_validation_loss, label='validation loss')
plt.plot(np.linspace(0, num_epochs-1, num_epochs), epoch_training_loss, label='training loss')
plt.legend()
plt.show()

# Run the model

In [None]:
train_predict = lstm(data['X_test'])#forward pass
data_predict = train_predict.data.numpy() #numpy conversion
data_predict[data_predict > 0.5] = 1
data_predict[data_predict < 0.5] = 0

dataY_plot = data['y_test']

plt.figure(figsize=(10,6)) #plotting
#plt.axvline(x=40000, c='r', linestyle='--') #size of the training set

plt.plot(dataY_plot, label='Actual Data') #actual plot
plt.plot(data_predict, label='Predicted Data') #predicted plot
plt.title('Time-Series Prediction')
plt.legend()
plt.show()