In [1]:
%matplotlib inline

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

plt.style.use('ggplot')

In [2]:
# Load the data
ais_data = pd.read_csv('set1.csv')

ais_data.head()

Unnamed: 0,OBJECT_ID,VID,SEQUENCE_DTTM,LAT,LON,SPEED_OVER_GROUND,COURSE_OVER_GROUND
0,1,100008,14:00:00,36.90685,-76.089022,1,1641
1,2,100015,14:00:00,36.95,-76.026834,11,2815
2,3,100016,14:00:00,36.906783,-76.089084,0,2632
3,4,100019,14:00:00,37.003,-76.283167,148,2460
4,5,100016,14:00:01,36.906783,-76.089084,0,2632


There are 20 unique ships accounted for in the dataset. Now, we will first split the dataset into test, validate and train sets. Since the total number of datapoints in this dataset is 13714, we will split the data on the basis of the time value - `SEQUENCE_DTTM` and then use the train-validate-test sets to train the prediction models, validate it and then test it. We will use the first 80% of the data for training, the next 15% for validation and the last 5% for testing.

Here, we will use the following three prediction models on this dataset:
    
      1. LSTM
      2. LSTNet
      3. Transformer


In [3]:
# Dropping the `OBJECT_ID` column
ais_data = ais_data.drop('OBJECT_ID', axis=1)

In [4]:
# Returning the last time value present in the `SEQUENCE_DTTM` column
ais_data["SEQUENCE_DTTM"].iloc[-1]

'17:59:58'

Here, we split the 'SEQUENCE_DTTM' column into `HOUR` and `TIME` columns. This will help us in the future when we convert the dataset into a tensor and feed it into the LSTM model. We will also drop the `SEQUENCE_DTTM` column as it is no longer required. Further, this conversion would allow us to create a tensor that is of the shape (NxCxL) where N is the number of samples, C is the number of channels and L is the length of the sequence. This is the format that the LSTM model requires.    

In [5]:
# Splitting the `SEQUENCE DTTM` column into separate columns for 'hour' and a separate column to store the remaining time values in seconds 
ais_data["HOUR"] = ais_data["SEQUENCE_DTTM"].apply(lambda x: int(x.split(":")[0]))
ais_data["TIME"] = ais_data["SEQUENCE_DTTM"].apply(lambda x: int(x.split(":")[0])*3600 + int(x.split(":")[1])*60 + int(x.split(":")[2]))
# In the time column, we subtract the hour value from the time column to get the remaining time values in seconds
ais_data["TIME"] = ais_data["TIME"] - ais_data["HOUR"]*3600
# Dropping the `SEQUENCE_DTTM` column
ais_data.drop("SEQUENCE_DTTM", axis=1, inplace=True)
# Converting the hour column to give the hours starting from 0 instead of 14
ais_data["HOUR"] = ais_data["HOUR"] - 14
# converting the hour column to a categorical column
ais_data["HOUR"] = ais_data["HOUR"].astype("category")
# Reordering the columns
ais_data = ais_data[["HOUR", "TIME", "LAT", "LON",  "SPEED_OVER_GROUND", "COURSE_OVER_GROUND", "VID"]]
ais_data.head()

Unnamed: 0,HOUR,TIME,LAT,LON,SPEED_OVER_GROUND,COURSE_OVER_GROUND,VID
0,0,0,36.90685,-76.089022,1,1641,100008
1,0,0,36.95,-76.026834,11,2815,100015
2,0,0,36.906783,-76.089084,0,2632,100016
3,0,0,37.003,-76.283167,148,2460,100019
4,0,1,36.906783,-76.089084,0,2632,100016


In [6]:
ais_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13714 entries, 0 to 13713
Data columns (total 7 columns):
 #   Column              Non-Null Count  Dtype   
---  ------              --------------  -----   
 0   HOUR                13714 non-null  category
 1   TIME                13714 non-null  int64   
 2   LAT                 13714 non-null  float64 
 3   LON                 13714 non-null  float64 
 4   SPEED_OVER_GROUND   13714 non-null  int64   
 5   COURSE_OVER_GROUND  13714 non-null  int64   
 6   VID                 13714 non-null  int64   
dtypes: category(1), float64(2), int64(4)
memory usage: 656.6 KB


In [7]:
# Here, VID is the label for each vessel. We will now split the data into features and labels for training.
# In that regard, from the dataset, since the predictive value is in the columns (2-5), we will drop the following columns from the dataset as they have no predictive value: 
#   - HOUR
#   - TIME
# Then we will use the remaining columns as features. 
# The label will be the `VID` column
def split_dataset_into_features_and_labels(dataset):
    features = dataset.drop(["HOUR", "TIME", "VID"], axis=1)
    labels = dataset["VID"]
    return features, labels

In [8]:
features, labels = split_dataset_into_features_and_labels(ais_data)
features

Unnamed: 0,LAT,LON,SPEED_OVER_GROUND,COURSE_OVER_GROUND
0,36.906850,-76.089022,1,1641
1,36.950000,-76.026834,11,2815
2,36.906783,-76.089084,0,2632
3,37.003000,-76.283167,148,2460
4,36.906783,-76.089084,0,2632
...,...,...,...,...
13709,36.916043,-76.168737,0,363
13710,36.943300,-76.017700,248,2538
13711,36.906833,-76.089500,1,2495
13712,36.988868,-76.318949,82,2401


In [9]:
features = np.array(features)
labels = np.array(labels)

## Data Splitting:

Since the dataset is a timeseries data, we will split it using the `SEQUENCE_DTTM` column. We will use the first 80% of the data for training, the next 15% for validation and the last 5% for testing.

In [10]:
# Splitting the dataset into the Training set, Validation set and Test set 
X_train = features[:int(0.8*len(features))]
y_train = labels[:int(0.8*len(labels))]
X_val = features[int(0.8*len(features)):int(0.9*len(features))]
y_val = labels[int(0.8*len(labels)):int(0.9*len(labels))]
X_test = features[int(0.9*len(features)):]
y_test = labels[int(0.9*len(labels)):]

In [11]:
X_train.shape

(10971, 4)

In [12]:
print(X_train[0], y_train[0])

[ 3.6906850e+01 -7.6089022e+01  1.0000000e+00  1.6410000e+03] 100008


## Scaling the data: 

Here, we use the `MinMaxScaler` from `sklearn` to scale the data. We will use the `fit_transform` method to fit the scaler to the training data and then transform the training, validation and test data.

In [13]:
# Normalizing the features
scaler = MinMaxScaler(feature_range=(-1, 1))
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

# Converting the features into a 3D tensor
X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0], 1, X_train_scaled.shape[1])
X_val_scaled = X_val_scaled.reshape(X_val_scaled.shape[0], 1, X_val_scaled.shape[1])
X_test_scaled = X_test_scaled.reshape(X_test_scaled.shape[0], 1, X_test_scaled.shape[1])

# Converting the labels into a 1D tensor
y_train = torch.from_numpy(y_train).type(torch.LongTensor)
y_val = torch.from_numpy(y_val).type(torch.LongTensor)
y_test = torch.from_numpy(y_test).type(torch.LongTensor)

# Printing the shapes of the features and labels
X_train_scaled

array([[[-0.9951913 ,  0.37725386, -0.99239544, -0.08808002]],

       [[-0.39375566,  0.73277232, -0.91634981,  0.56432342]],

       [[-0.99612517,  0.37689942, -1.        ,  0.46262851]],

       ...,

       [[-0.24585685, -0.14046261, -0.77186312, -0.16976938]],

       [[-0.99310056,  0.37452122, -1.        ,  0.73270353]],

       [[ 0.52432922, -0.16812065, -0.26996198,  0.14754098]]])

In [14]:
print(X_train_scaled[:5])

[[[-0.9951913   0.37725386 -0.99239544 -0.08808002]]

 [[-0.39375566  0.73277232 -0.91634981  0.56432342]]

 [[-0.99612517  0.37689942 -1.          0.46262851]]

 [[ 0.34497178 -0.73264083  0.12547529  0.3670464 ]]

 [[-0.99612517  0.37689942 -1.          0.46262851]]]


## Defining the LSTM Model: 
An LSTM (Long Short_term Memory) is a special type of Recurrent Neural Network (RNN) that is capable of learning long-term dependencies through feedback connections. It can not only process single data points, but also entire sequences of data. It has feedback connections. It is through these “loops” in the network that it is able to store past information while still maintaining the ability to make predictions based on the current input. LSTMs have an additional memory cell and three gates. The memory cell is responsible for storing past information, and the three gates are responsible for controlling the flow of information into and out of the memory cell. The three gates are the input gate, the forget gate, and the output gate. The input gate controls the flow of information into the memory cell. The forget gate controls the flow of information out of the memory cell. The output gate controls the flow of information out of the memory cell.

In [None]:
# In this case, we will use a Long Short-Term Memory (LSTM) model to predict the vessel ID.
# Here, the input size is 4, the hidden layer size is 100, and the output size is 2.
# The number of layers is 2.
# The input size is 4 because we have 4 features in the dataset.
# These are the `LAT`, `LON`, `SPEED_OVER_GROUND`, and `COURSE_OVER_GROUND` columns.


In [18]:
class LSTM(nn.Module):
    def __init__(self, input_size = 4, hidden_layer_size = 100, output_size = 20):
        super().__init__()
        self.hidden_layer_size = hidden_layer_size
        self.lstm = nn.LSTM(input_size, hidden_layer_size)
        self.linear = nn.Linear(hidden_layer_size, output_size)
        self.hidden_cell = (torch.zeros(1,1,self.hidden_layer_size),
                            torch.zeros(1,1,self.hidden_layer_size))

    def forward(self, input_seq):
        lstm_out, self.hidden_cell = self.lstm(input_seq.view(len(input_seq)), self.hidden_cell)
        predictions = self.linear(lstm_out.view(len(input_seq), -1))
        return predictions[-1]

In [19]:
model = LSTM()
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [20]:
epochs = 5
for i in range(epochs):
    for seq, labels in zip(X_train_scaled, y_train):
        optimizer.zero_grad()
        model.hidden_cell = (torch.zeros(1, 1, model.hidden_layer_size),
                            torch.zeros(1, 1, model.hidden_layer_size))
        y_pred = model(seq)
        labels = labels.view(1)
        single_loss = loss_function(y_pred, labels)
        single_loss.backward()
        optimizer.step()
    # Printing the loss after every epoch
    print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')

TypeError: Cannot interpret '1' as a data type

Predicting values using the trained LSTM model

In [21]:
model.eval()
actual = []
predictions = []

if True:
    for seq, labels in zip(X_test_scaled, y_test):
        actual.append(labels.item())
        with torch.no_grad():
            model.hidden = (torch.zeros(1, 1, model.hidden_layer_size),
                            torch.zeros(1, 1, model.hidden_layer_size))
            y_pred = model(seq)
            predictions.append(y_pred.item())
            actual.append(labels.item())

TypeError: Cannot interpret '1' as a data type

Converting the predicted and actual data into tensors

In [22]:
predictions = torch.from_numpy(np.array(predictions)).type(torch.Tensor)
actual = torch.from_numpy(np.array(actual)).type(torch.Tensor)

Converting the normalized values back into their scalar forms

In [23]:
final_predictions = scaler.inverse_transform(predictions)
final_actual = scaler.inverse_transform(actual)   

ValueError: Expected 2D array, got 1D array instead:
array=[].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.