# Fauna data LSTM

Import

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt

Load data

In [None]:
EVENTS_CACHE_PATH = os.path.expanduser("~/EventCache")
filename_data = os.path.join(EVENTS_CACHE_PATH, 'RawLabelledData/dca6327d8fa8_20210330_raw_20210330T152242Z_ds_5_data.npy')
filename_labels = os.path.join(EVENTS_CACHE_PATH, 'RawLabelledData/dca6327d8fa8_20210330_raw_20210330T152242Z_ds_5_labels.npy')

data = np.load(filename_data)
labels = np.load(filename_labels)

In [None]:
# create artificial training data
channel = 4
event1 = data[582400:583200,channel]
event2 = data[909000:910500,channel]
event3 = data[1801000:1802000,channel]
noise = data[979000:990000,channel]
data_chunk = np.concatenate((noise, event1, noise, event2, noise, event3, noise, event2, noise))
labels_chunk = np.zeros(data_chunk.size)
labels_chunk[11300:11600] = 1
labels_chunk[23500:23800] = 1
labels_chunk[35600:36000] = 1
labels_chunk[47800:48400] = 1

In [None]:
# plot
fig, ax1 = plt.subplots()
color = 'tab:blue'
ax1.set_ylabel('data', color=color)
ax1.plot(data_chunk, color=color)
ax1.tick_params(axis='y', color=color)
ax2 = ax1.twinx()
color = 'tab:red'
ax2.set_ylabel('labelled', color=color)
ax2.plot(labels_chunk, color=color)
ax2.tick_params(axis='y', color=color)
plt.show()

Data-preprocessing. Get the data and the labels separate from a single dataframe.

Transform and scale output

In [None]:
#from sklearn.preprocessing import StandardScaler, MinMaxScaler
#mm = MinMaxScaler()
#ss = StandardScaler()

#X_ss = ss.fit_transform(X)
#y_mm = mm.fit_transform(y)

Define training and test data

In [None]:
#first 200 for training

X_train = data_chunk[:40000]
X_test = data_chunk[40000:]

y_train = labels_chunk[:40000]
y_test = labels_chunk[40000:]

In [None]:
plt.plot(X_train)

In [None]:
plt.plot(X_test)

In [None]:
plt.plot(y_train)

In [None]:
plt.plot(y_test)

In [None]:
X_train = np.reshape(X_train,(X_train.size,1))
X_test = np.reshape(X_test,(X_test.size,1))
y_train = np.reshape(y_train,(y_train.size,1))
y_test = np.reshape(y_test,(y_test.size,1))

In [None]:
print("Training Shape", X_train.shape, y_train.shape)
print("Testing Shape", X_test.shape, y_test.shape) 

Import pytorch

In [None]:
import torch #pytorch
import torch.nn as nn
from torch.autograd import Variable

Convert numpy arrays to tensors and variables

In [None]:
X_train = X_train.astype(np.int32)
X_test = X_test.astype(np.int32)

X_train_tensors = Variable(torch.Tensor(X_train))
X_test_tensors = Variable(torch.Tensor(X_test))

y_train_tensors = Variable(torch.Tensor(y_train))
y_test_tensors = Variable(torch.Tensor(y_test))

Prepare input for LSTM

In [None]:
#reshaping to rows, timestamps, features
X_train_tensors_final = torch.reshape(X_train_tensors,   (X_train_tensors.shape[0], 1, X_train_tensors.shape[1]))
X_test_tensors_final = torch.reshape(X_test_tensors,  (X_test_tensors.shape[0], 1, X_test_tensors.shape[1]))

In [None]:
print("Training Shape", X_train_tensors_final.shape, y_train_tensors.shape)
print("Testing Shape", X_test_tensors_final.shape, y_test_tensors.shape)

# Define model

In [None]:
class LSTM1(nn.Module):
    def __init__(self, num_classes, input_size, hidden_size, num_layers, seq_length):
        super(LSTM1, self).__init__()
        self.num_classes = num_classes #number of classes
        self.num_layers = num_layers #number of layers
        self.input_size = input_size #input size
        self.hidden_size = hidden_size #hidden state
        self.seq_length = seq_length #sequence length

        self.lstm = nn.LSTM(input_size=input_size, hidden_size=hidden_size,
                          num_layers=num_layers, batch_first=True) #lstm
        self.fc_1 =  nn.Linear(hidden_size, 128) #fully connected 1
        self.fc = nn.Linear(128, num_classes) #fully connected last layer

        self.relu = nn.ReLU()
    
    def forward(self,x):
        h_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #hidden state
        c_0 = Variable(torch.zeros(self.num_layers, x.size(0), self.hidden_size)) #internal state
        # Propagate input through LSTM
        output, (hn, cn) = self.lstm(x, (h_0, c_0)) #lstm with input, hidden, and internal state
        hn = hn.view(-1, self.hidden_size) #reshaping the data for Dense layer next
        out = self.relu(hn)
        out = self.fc_1(out) #first Dense
        out = self.relu(out) #relu
        out = self.fc(out) #Final Output
        return out

Hyper parameters

In [None]:
num_epochs = 1000 #1000 epochs
learning_rate = 0.001 #0.001 lr

input_size = 1 #number of features
hidden_size = 2 #number of features in hidden state
num_layers = 1 #number of stacked lstm layers

num_classes = 1 #number of output classes

 Instantiate the class LSTM1 object

In [None]:
lstm1 = LSTM1(num_classes, input_size, hidden_size, num_layers, X_train_tensors_final.shape[1]) #our lstm class 

In [None]:
print(lstm1)

Loss function and optimizer

In [None]:
criterion = torch.nn.MSELoss()    # mean-squared error for regression
optimizer = torch.optim.Adam(lstm1.parameters(), lr=learning_rate)

# Train the model

In [None]:
for epoch in range(num_epochs):
  outputs = lstm1.forward(X_train_tensors_final) #forward pass
  optimizer.zero_grad() #caluclate the gradient, manually setting to 0
 
  # obtain the loss function
  loss = criterion(outputs, y_train_tensors)
 
  loss.backward() #calculates the loss of the loss function
 
  optimizer.step() #improve from loss, i.e backprop
  if epoch % 100 == 0:
    print("Epoch: %d, loss: %1.5f" % (epoch, loss.item()))

# Run the model

Convert data

In [None]:
df_X_ss = data_chunk.astype(np.int32)

df_X_ss = Variable(torch.Tensor(df_X_ss))

#reshaping the dataset
df_X_ss = torch.reshape(df_X_ss, (df_X_ss.shape[0], 1, 1))

Show predictions

In [None]:
train_predict = lstm1(df_X_ss)#forward pass
data_predict = train_predict.data.numpy() #numpy conversion
dataY_plot = labels_chunk

plt.figure(figsize=(10,6)) #plotting
plt.axvline(x=40000, c='r', linestyle='--') #size of the training set

plt.plot(dataY_plot, label='Actuall Data') #actual plot
plt.plot(data_predict, label='Predicted Data') #predicted plot
plt.title('Time-Series Prediction')
plt.legend()
plt.show()