# Fauna data LSTM

# Import

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
import torch #pytorch
import torch.nn as nn
from torch.autograd import Variable
import fpmodules.tools as tools
from torch.nn.functional import softmax

# Load data

In [None]:
EVENTS_CACHE_PATH = os.path.expanduser("~/EventCache")
filename_data = os.path.join(EVENTS_CACHE_PATH, 'RawLabelledData/dca6327d8fa8_20210330_raw_20210330T152242Z_ds_1_data.npy')
filename_labels = os.path.join(EVENTS_CACHE_PATH, 'RawLabelledData/dca6327d8fa8_20210330_raw_20210330T152242Z_ds_1_labels.npy')

data = np.load(filename_data)[:,1]
labels = np.load(filename_labels)[:,1]

In [None]:
# plot
fig, ax1 = plt.subplots()
fig.set_figwidth(25)
color = 'tab:blue'
ax1.set_ylabel('data', color=color)
ax1.plot(data[4000000:4150000], color=color)
ax1.tick_params(axis='y', color=color)
ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('labelled', color=color)
ax2.plot(labels[4000000:4150000], color=color)
ax2.tick_params(axis='y', color=color)
plt.show()

Data-preprocessing. Get the data and the labels separate from a single dataframe.

Transform and scale output

In [None]:
#from sklearn.preprocessing import StandardScaler, MinMaxScaler
#mm = MinMaxScaler()
#ss = StandardScaler()

#X_ss = ss.fit_transform(X)
#y_mm = mm.fit_transform(y)

In [None]:
def split_datasets(data, labels):
    # 80 % for training, 10 % for validation, 10 % for testing
    train_index = int(len(data)*0.8)
    valid_index = train_index + int(len(data)*0.1)

    X_train = data[0:train_index]
    X_test = data[train_index:valid_index]
    X_valid = data[valid_index:]

    y_train = labels[0:train_index]
    y_test = labels[train_index:valid_index]
    y_valid = labels[valid_index:]

    return X_train, X_test, X_valid, y_train, y_test, y_valid

def format_datasets(X_train, X_test, X_valid, y_train, y_test, y_valid):
    X_train = np.reshape(X_train,(X_train.size,1)).astype(np.int32)
    X_test = np.reshape(X_test,(X_test.size,1)).astype(np.int32)
    X_valid = np.reshape(X_valid,(X_valid.size,1)).astype(np.int32)
    y_train = np.reshape(y_train,(y_train.size,1)).astype(np.int16)
    y_test = np.reshape(y_test,(y_test.size,1)).astype(np.int16)
    y_valid = np.reshape(y_valid,(y_valid.size,1)).astype(np.int16)

    X_train_tensors = Variable(torch.Tensor(X_train))
    X_valid_tensors = Variable(torch.Tensor(X_valid))
    X_test_tensors = Variable(torch.Tensor(X_test))

    y_train_tensors = Variable(torch.Tensor(y_train))
    y_valid_tensors = Variable(torch.Tensor(y_valid))
    y_test_tensors = Variable(torch.Tensor(y_test))

    # shape is (batch size, sequence length, input size)
    X_train_tensors_final = torch.reshape(X_train_tensors,   (X_train_tensors.shape[0], 1, X_train_tensors.shape[1]))
    X_valid_tensors_final = torch.reshape(X_valid_tensors,   (X_valid_tensors.shape[0], 1, X_valid_tensors.shape[1]))
    X_test_tensors_final = torch.reshape(X_test_tensors,  (X_test_tensors.shape[0], 1, X_test_tensors.shape[1]))

    return X_train_tensors_final, X_test_tensors_final, X_valid_tensors_final, y_train_tensors, y_test_tensors, y_valid_tensors

def data_loader():
  files = os.listdir(os.path.join(EVENTS_CACHE_PATH, 'RawLabelledData'))
  for file in files:
    path = os.path.join(os.path.join(EVENTS_CACHE_PATH, 'RawLabelledData'), file)

    data = np.load(filename_data)[:,1]
    labels = np.load(filename_labels)[:,1]

    X_train, X_test, X_valid, y_train, y_test, y_valid = split_datasets(data, labels)
    X_train_tensors_final, X_test_tensors_final, X_valid_tensors_final, y_train_tensors, y_test_tensors, y_valid_tensors = format_datasets(X_train, X_test, X_valid, y_train, y_test, y_valid)
    dict = {
        'X_train': X_train_tensors_final,
        'X_test': X_test_tensors_final,
        'X_valid': X_valid_tensors_final,
        'y_train': y_train_tensors,
        'y_test': y_test_tensors,
        'y_valid': y_valid_tensors
    }
    yield dict

Define training and test data

In [None]:
data_generator = data_loader()

In [None]:
data = data_generator.__next__()

In [None]:
data.keys()

In [None]:
print("Training Shape", data['X_train'].shape, data['y_train'].shape)
print("Validation Shape", data['X_valid'].shape, data['y_valid'].shape)
print("Testing Shape", data['X_test'].shape, data['y_test'].shape)

Prepare input for LSTM

# Define model

In [None]:
class LSTM(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers):
        super(LSTM, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm

        self.linear_out = nn.Linear(hidden_size, num_classes) #fully connected last layer

    def forward(self,x):
        # Propagate input through LSTM
        x, (h, c) = self.lstm(x) #lstm with input, hidden, and internal state
        x = x.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        x = self.linear_out(x) #first Dense
        out = softmax(x) #Final Output
        return out

Hyper parameters

In [None]:
num_epochs = 100 # 1000 epochs
learning_rate = 0.001 # 0.001 lr

input_size = 1 # number of features
hidden_size = 50 # number of features in hidden state
num_layers = 1 # number of stacked lstm layers

num_classes = 1 # number of output classes

 Instantiate the class LSTM1 object

In [None]:
lstm = LSTM(num_classes, input_size, hidden_size, num_layers) #our lstm class

Loss function and optimizer

In [None]:
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm.parameters(), lr=learning_rate)

In [None]:
for d in data.keys():
    print(d, data[d].shape)

# Train the model

In [None]:
epoch_training_loss = 0
epoch_validation_loss = 0
no_files = 1
for epoch in range(num_epochs):
  training_loss = 0
  validation_loss = 0
  for file in range(no_files):
      data = data_generator.__next__()

      lstm.eval()
      outputs = lstm.forward(data['X_valid']) #forward pass
      loss = criterion(outputs, data['y_valid'])
      loss.backward() #calculates the loss of the loss function
      validation_loss += loss.item()

      lstm.train()
      outputs = lstm.forward(data['X_train']) #forward pass
      optimizer.zero_grad() #caluclate the gradient, manually setting to 0

      # obtain the loss function
      loss = criterion(outputs, data['y_train'])
      loss.backward() #calculates the loss of the loss function
      training_loss += loss.item()

      optimizer.step() #improve from loss, i.e backprop
  #if epoch % 10 == 0:
  print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

  epoch_validation_loss.append(validation_loss)
  epoch_training_loss.append(training_loss)

# Run the model

Convert data

In [None]:
df_X_ss = data.astype(np.int32)

df_X_ss = Variable(torch.Tensor(df_X_ss))

#reshaping the dataset
df_X_ss = torch.reshape(df_X_ss, (df_X_ss.shape[0], 1, 1))

Show predictions

In [None]:
train_predict = lstm(df_X_ss)#forward pass
data_predict = train_predict.data.numpy() #numpy conversion
dataY_plot = labels

plt.figure(figsize=(10,6)) #plotting
plt.axvline(x=40000, c='r', linestyle='--') #size of the training set

plt.plot(dataY_plot, label='Actuall Data') #actual plot
plt.plot(data_predict, label='Predicted Data') #predicted plot
plt.title('Time-Series Prediction')
plt.legend()
plt.show()