# LSTM using raw data

# Import

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torch.nn import Sigmoid, Softmax
from labelled_data.tools.load_data import data_loader
from labelled_data.tools.load_data import data_generator
figure_path = '/home/thoresen/Code/deep_learning/02456-2021-project/figures/'

*** FP-modules version 2.10.13 ***


# Raw data or chunked data

In [2]:
chunks = False

# Define data loader

In [3]:
X_train, X_test, X_valid, y_train, y_test, y_valid = data_loader(chunks=chunks, no_files=10)
train_gen = data_generator(X_train, y_train)

In [4]:
data, labels = train_gen.__next__()

# Define model

In [5]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        #self.dropout = nn.Dropout(0.2)
        self.linear_out = nn.Linear(hidden_size, num_classes) #fully connected last layer
        self.sigmoid = Sigmoid()
        #self.softmax = Softmax()

    def forward(self,x):
        # Propagate input through LSTM
        x, (h, c) = self.lstm(x) #lstm with input, hidden, and internal state
        x = x.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        #x = self.dropout(x)
        x = self.linear_out(x) #first Dense
        out = self.sigmoid(x) #Final Output
        return out

## Hyper parameters

In [6]:
num_epochs = 50 # 1000 epochs
learning_rate = 0.001

input_size = 1 # number of features
hidden_size = 40 # number of features in hidden state
num_layers = 2 # number of stacked lstm layers

num_classes = 1 # number of output classes

no_files = 1 # no. data files

 ## Initialize

In [7]:
lstm = LSTM(num_classes, input_size, hidden_size, num_layers) #our lstm class

## Loss function and optimizer

In [8]:
criterion = torch.nn.BCELoss() # cross validation
#criterion = torch.nn.MSELoss() # cross validation

optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)
#optimizer = torch.optim.SGD(lstm.parameters(), lr=0.01, momentum=0.5)

# Train the model

In [9]:
def evaluate_result(output):
    output[output > 0.7] = 1.
    output[output < 0.7] = 0.
    return output

In [None]:
epoch_training_loss = []
epoch_validation_loss = []
epoch_training_acc = []
epoch_validation_acc = []

X_train, X_test, X_valid, y_train, y_test, y_valid = data_loader(chunks=chunks, no_files=no_files)

for epoch in range(num_epochs):
  training_loss = 0
  validation_loss = 0
  training_correct = 0
  training_all = 0
  validation_correct = 0
  validation_all = 0

  train_gen = data_generator(X_train, y_train)
  test_gen = data_generator(X_test, y_test)
  valid_gen = data_generator(X_valid, y_valid)

  dict = {
      'no_train' : len(X_train),
      'train': train_gen,
      'no_test' : len(X_test),
      'test': test_gen,
      'no_valid' : len(X_valid),
      'valid': valid_gen,
  }

  for _ in range(len(X_valid)):
      print(f'validating file number {_}')

      data, labels = valid_gen.__next__()
      if chunks:
        data = data[int(len(data)*0.25):int(len(data)*0.75)]
        labels = labels[int(len(labels)*0.25):int(len(labels)*0.75)]
      lstm.eval()
      outputs = lstm.forward(data) #forward pass

      #loss = criterion(d, labels.flatten().to(torch.long))
      loss = criterion(outputs, labels)
      print(loss.shape)
      loss.backward() #calculates the loss of the loss function
      validation_loss += loss.item()

      validation_correct += (evaluate_result(outputs) == labels).float().sum()
      validation_all += len(labels)

  for _ in range(len(X_train)):
      print(f'training file number {_}')
      data, labels = train_gen.__next__()
      if chunks:
        data = data[int(len(data)*0.25):int(len(data)*0.75)]
        labels = labels[int(len(labels)*0.25):int(len(labels)*0.75)]
      lstm.train()
      outputs = lstm.forward(data) #forward pass

      # obtain the loss function
      loss = criterion(outputs, labels)
      loss.backward() #calculates the loss of the loss function
      training_loss += loss.item()

      training_correct += (evaluate_result(outputs) == labels).float().sum()
      training_all += len(labels)

      optimizer.step() #improve from loss, i.e backprop
      optimizer.zero_grad() #caluclate the gradient, manually setting to 0
  validation_acc = validation_correct / validation_all
  training_acc = training_correct / training_all
  if epoch % 10 == 0:
    print("Epoch: %d, training loss: %1.5f, validation loss: %1.5f, training acc: %1.5f, , validation acc: %1.5f" % (epoch, training_loss/len(X_train), validation_loss/len(X_valid), training_acc, validation_acc))

  epoch_validation_loss.append(validation_loss/len(X_valid))
  epoch_validation_acc.append(validation_acc)
  epoch_training_loss.append(training_loss/len(X_train))
  epoch_training_acc.append(training_acc)

validating file number 0


In [None]:
train_gen = data_generator(X_train, y_train)

In [None]:
plt.figure(figsize=(40,5))
data, labels = train_gen.__next__()
data = data.detach().numpy()[:,0,0]
#plt.plot(data[int(len(data)*0.25):int(len(data)*0.75)])
plt.plot(data)

if labels.sum() > 0:
#plt.plot(labels[int(len(data)*0.25):int(len(data)*0.75)]*1330)
    plt.plot(labels*1330)
#plt.gca().set_ylim([1300,1500])
#plt.gca().set_xlim([1610000,len(data)-860000])

plt.show()

In [None]:
plt.plot(np.linspace(0, len(epoch_validation_loss)-1, len(epoch_validation_loss)), epoch_validation_loss, label='validation loss')
plt.plot(np.linspace(0, len(epoch_training_loss)-1, len(epoch_training_loss)), epoch_training_loss, label='training loss')
plt.gca().set_ylabel('Loss')
plt.gca().set_xlabel('Epochs')
plt.legend()
plt.savefig(figure_path + 'validation.png', bbox_inches='tight')
plt.show()

In [None]:
plt.plot(np.linspace(0, len(epoch_validation_acc)-1, len(epoch_validation_acc)), epoch_validation_acc, label='validation acc')
plt.plot(np.linspace(0, len(epoch_training_acc)-1, len(epoch_training_acc)), epoch_training_acc, label='training acc')
plt.gca().set_ylabel('Accuracy')
plt.gca().set_xlabel('Epochs')
plt.legend()
plt.savefig(figure_path + 'accuracy.png', bbox_inches='tight')
plt.show()

In [None]:
torch.save(lstm.state_dict(), '../models/model')

# Run the model

In [None]:
data, labels = test_gen.__next__()
train_predict = lstm(data)#forward pass
data_predict = train_predict.data.numpy() #numpy conversion
data_predict[data_predict > 0.5] = 1
data_predict[data_predict < 0.5] = 0

dataY_plot = labels

plt.figure(figsize=(10,6)) #plotting
#plt.axvline(x=40000, c='r', linestyle='--') #size of the training set

plt.plot(dataY_plot, label='Actual Data') #actual plot
plt.plot(data_predict, label='Predicted Data') #predicted plot
plt.title('Time-Series Prediction')
plt.legend()
plt.show()