In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as mlp
import seaborn as sns
from math import sqrt
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential, layers, callbacks
from tensorflow.keras.layers import Dense, LSTM, Dropout, GRU, Bidirectional
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.callbacks import EarlyStopping

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [5]:
ls'/content/gdrive/My Drive/Thesis'

dataframe_dma_flow_1D.csv     dataframe_flow_variable_1D.csv
dataframe_dma_flow_60min.csv  dataframe_flow_variable_60min.csv
dataframe_flow_1D.csv


In [6]:
data = pd.read_csv("/content/gdrive/My Drive/Thesis/dataframe_flow_variable_60min.csv")

In [5]:
data.shape

(2208, 34)

In [7]:
data['Unnamed: 0'] = pd.to_datetime(data['Unnamed: 0'])
data.rename( columns={'Unnamed: 0':'Date_time','225':'DMA_225'}, inplace=True )
data_DMA225 = data[['Date_time','DMA_225']]

In [7]:
data_DMA225.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2208 entries, 0 to 2207
Data columns (total 2 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   Date_time  2208 non-null   datetime64[ns]
 1   DMA_225    2208 non-null   float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 34.6 KB


In [8]:
data_DMA225

Unnamed: 0,Date_time,DMA_225
0,2016-04-22 00:00:00,1.13875
1,2016-04-22 01:00:00,1.16650
2,2016-04-22 02:00:00,1.16650
3,2016-04-22 03:00:00,1.33300
4,2016-04-22 04:00:00,1.33300
...,...,...
2203,2016-07-22 19:00:00,3.97225
2204,2016-07-22 20:00:00,3.41650
2205,2016-07-22 21:00:00,2.88900
2206,2016-07-22 22:00:00,2.22200


In [8]:
# Train Test Split Index
size_of_trainset = 0.7
split_value = round(len(data_DMA225)* size_of_trainset)
print (split_value)

# Split
train = data_DMA225.iloc[:split_value]
test = data_DMA225.iloc[split_value:]

print(data_DMA225.shape)
print(train.shape,test.shape)

1546
(2208, 2)
(1546, 2) (662, 2)


In [9]:
# Normalisation training:
scaler = MinMaxScaler(feature_range=(0, 1))
scaler = scaler.fit(train.iloc[:,1:])
print('Min: %f, Max: %f' % (scaler.data_min_, scaler.data_max_))
# Normalised dataset:
df_train = scaler.transform(train.iloc[:,1:])
df_test = scaler.transform(test.iloc[:,1:])

Min: 0.916750, Max: 8.916750


In [10]:
# Converting numpy array into matrix:
def matrix_dataset(array_dataset, timesteps=1):
 mat_X, mat_Y = [], []
 for i in range(len(array_dataset)-timesteps-1):
  Z = i+timesteps
  m = array_dataset[i:Z, 0]
  mat_X.append(m)
  n = array_dataset[Z, 0]
  mat_Y.append(n)
 return np.array(mat_X), np.array( mat_Y)

In [11]:
# Preparing the data and reshaping it as my  bidirectional LSTM input layer must be 3D (2 dimensions - samples, time steps, and features)
timesteps =  10
train_X,train_y = matrix_dataset(df_train,timesteps)
test_X, test_y = matrix_dataset(df_test,timesteps)
print("The train_X and train_y values are:",train_X.shape,train_y.shape)
print("The test_X and test_y values are:",test_X.shape,test_y.shape)

The train_X and train_y values are: (1535, 10) (1535,)
The test_X and test_y values are: (651, 10) (651,)


In [12]:
#Reshaping for bidirectional lstm input
#Feature should be 1 to get lstm input
timesteps = 10
feature = 1
train_X = train_X.reshape((train_X.shape[0], timesteps, feature))
test_X = test_X.reshape((test_X.shape[0], timesteps, feature))
print("The train_X values after reshaping:",train_X.shape)
print("The test_X values after reshaping:",test_X.shape)

The train_X values after reshaping: (1535, 10, 1)
The test_X values after reshaping: (651, 10, 1)


In [17]:
bilstm_model = Sequential()
bilstm_model.add(Bidirectional(LSTM(50, return_sequences=True, activation= 'relu'), input_shape=(timesteps, feature)))
#bilstm_model.add(Dropout(0.3))
bilstm_model.add(Bidirectional(LSTM(25)))
#bilstm_model.add(Dropout(0.3))
#bilstm_model.add(Bidirectional(LSTM(50)))
bilstm_model.add(Dropout(0.2))
bilstm_model.add(Dense(1 ,activation = 'linear'))
bilstm_model.compile(optimizer='adam', loss='mse')
bilstm_model.fit(train_X,train_y,validation_data=(test_X,test_y),epochs=200,batch_size=200, 
                    callbacks=[EarlyStopping(monitor='val_loss', patience=10)], verbose=1, shuffle=False)
bilstm_model.compile(loss='mean_squared_error',optimizer='adam')

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200


In [18]:
bilstm_model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_2 (Bidirection (None, 10, 100)           20800     
_________________________________________________________________
bidirectional_3 (Bidirection (None, 50)                25200     
_________________________________________________________________
dropout_1 (Dropout)          (None, 50)                0         
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 51        
Total params: 46,051
Trainable params: 46,051
Non-trainable params: 0
_________________________________________________________________


In [19]:
#Prediction and getting the accuracy metrics
predict_train=bilstm_model.predict(train_X)
predict_test=bilstm_model.predict(test_X)

##Transformback to original form
predict_train_bilstm=scaler.inverse_transform(predict_train)
predict_test_bilstm=scaler.inverse_transform(predict_test)

In [20]:
### Calculate RMSE performance metrics
import math
from sklearn.metrics import mean_squared_error
trainset = math.sqrt(mean_squared_error(train_y,predict_train_bilstm))
testset = math.sqrt(mean_squared_error(test_y,predict_test_bilstm))

print("The RMSE prediction value on trainset: ",trainset)
print("The RMSE prediction value on testset: ",testset)

The RMSE prediction value on trainset:  3.2354451171024947
The RMSE prediction value on testset:  3.2336957802772504
