In [1]:
#Importing Libraries

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import ConvLSTM2D
from keras.layers import Flatten
from numpy import array

In [2]:
#Import Raw Data and convert it to CSV 

url = "https://raw.githubusercontent.com/rohitash-chandra/CMTL_dynamictimeseries/master/IndianOcean/rawtrain1985-2001.txt"
df = pd.read_csv(url, sep = "\t", header = None)
df.columns = ['ID','Date','Longitude','Latitude','Speed']
df['Category'] = df['Speed'].apply(lambda x: 1 if x<=27 else 2  if x<=33 and x> 27 else 3 if x<=47 and x> 33 else 4 if x<=63 and x> 47 else 5 if x<=89 and x> 63 else 6 if x<=119 and x>89 else 7 )
df = df.drop(['Date'], axis = 1)
df.to_csv('adjusted.csv')
df

Unnamed: 0,ID,Longitude,Latitude,Speed,Category
0,1,7.7,80.9,20,1
1,1,8.1,79.9,20,1
2,1,8.5,78.8,25,1
3,1,8.8,77.7,25,1
4,1,9.0,76.6,30,2
...,...,...,...,...,...
9360,21,25.9,38.9,35,3
9361,21,25.9,40.2,35,3
9362,21,25.5,41.0,35,3
9363,21,25.1,41.7,30,2


In [3]:
# Using a Univariate LSTM, therefore only working with Speed 

# Extracting speed data from Dataset 

speed = array(df['Speed'])
speed=speed.reshape(len(speed),1)
category=array(df['Category'])
speed

array([[20],
       [20],
       [25],
       ...,
       [35],
       [30],
       [25]], dtype=int64)

In [4]:
# Splitting the Sequence To Prepare the Training and Test Data. Data Preprocessing. 

def split_seq(timeseries_data, n_steps_in, n_steps_out):
    X, y =[],[]
    for i in range(len(timeseries_data)):
        # find the end of this pattern
        end_ix = i + n_steps_in
        out_end_ix = end_ix+n_steps_out
        # check if we are beyond the sequence
        if out_end_ix > len(timeseries_data)-1:
            break
        # gather input and output parts of the pattern
        seq_x, seq_y = timeseries_data[i:end_ix], timeseries_data[end_ix:out_end_ix]
        X.append(seq_x)
        y.append(seq_y)
    return np.array(X), np.array(y)

In [5]:
# Initialising Features of LSTM 

n_steps_in=4
n_features = 1
n_steps_out=2

In [6]:
# Defining RMSE Function to test Performance

def rmse(pred, actual):
    temp = np.sqrt(((pred-actual)**2).mean())
    return temp

In [7]:
# From Speed DataFrame, Separate and Prepare (Split and make 3D) Test and Train Data 

train = speed[0:9000]
test = speed[9000:9364]

x_train, y_train = split_seq(train, n_steps_in,n_steps_out)
x_test, y_test = split_seq(test, n_steps_in,n_steps_out)


x_train = x_train.reshape((x_train.shape[0], x_train.shape[1], n_features))
print(x_train.shape)

x_test = x_test.reshape((x_test.shape[0], x_test.shape[1], n_features))
print(x_test.shape)

y_train = y_train.reshape((y_train.shape[0], y_train.shape[1]))
print(y_train.shape)

y_test = y_test.reshape((y_test.shape[0], y_test.shape[1]))
print(y_test.shape)

(8994, 4, 1)
(358, 4, 1)
(8994, 2)
(358, 2)


In [8]:
# Defining and Fitting the Conv LSTM Network 

#Define the Model
model=Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True, input_shape=(n_steps_in, n_features)))
model.add(LSTM(100, activation='relu'))
model.add(Dense(n_steps_out))
model.compile(optimizer='adam', loss='mse')
model.summary()

#Fit the Model 
model.fit(x_train,y_train,epochs=100,verbose=0)

#Predict with the Fitted Model 
y_train_predicted=model.predict(x_train)
y_test_predicted=model.predict(x_test)



#Check RMSE
train_acc=rmse(y_train_predicted,y_train)
test_acc=rmse(y_test_predicted,y_test)

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 4, 50)             10400     
_________________________________________________________________
lstm_1 (LSTM)                (None, 100)               60400     
_________________________________________________________________
dense (Dense)                (None, 2)                 202       
Total params: 71,002
Trainable params: 71,002
Non-trainable params: 0
_________________________________________________________________


In [9]:
#Displaying Train and Test Accuracies 

print(train_acc, 'is the RMSE for the Train Data') 
print(test_acc, 'is the RMSE for the Test Data')

5.853795038296876 is the RMSE for the Train Data
5.82241955685324 is the RMSE for the Test Data
