In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from keras.preprocessing.sequence import TimeseriesGenerator

import plotly.express as px # to plot the time series plot
from sklearn import metrics # for the evaluation
from sklearn.preprocessing import LabelEncoder,MinMaxScaler
import tensorflow as tf 

In [11]:
data = pd.read_csv('metro data.csv')
data 

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
0,,288.28,0.0,0.0,40,Clouds,scattered clouds,02-10-2012 09:00,5545
1,,289.36,0.0,0.0,75,Clouds,broken clouds,02-10-2012 10:00,4516
2,,289.58,0.0,0.0,90,Clouds,overcast clouds,02-10-2012 11:00,4767
3,,290.13,0.0,0.0,90,Clouds,overcast clouds,02-10-2012 12:00,5026
4,,291.14,0.0,0.0,75,Clouds,broken clouds,02-10-2012 13:00,4918
...,...,...,...,...,...,...,...,...,...
48199,,283.45,0.0,0.0,75,Clouds,broken clouds,30-09-2018 19:00,3543
48200,,282.76,0.0,0.0,90,Clouds,overcast clouds,30-09-2018 20:00,2781
48201,,282.73,0.0,0.0,90,Thunderstorm,proximity thunderstorm,30-09-2018 21:00,2159
48202,,282.09,0.0,0.0,90,Clouds,overcast clouds,30-09-2018 22:00,1450


In [15]:
for i in data.select_dtypes('object').columns: # for each element with object datatype
    le = LabelEncoder().fit(data[i]) 
    data[i] = le.transform(data[i]) #convert to a number

In [16]:
data.head()

Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
0,7,288.28,0.0,0.0,40,1,24,2291,5545
1,7,289.36,0.0,0.0,75,1,2,2292,4516
2,7,289.58,0.0,0.0,90,1,19,2293,4767
3,7,290.13,0.0,0.0,90,1,19,2294,5026
4,7,291.14,0.0,0.0,75,1,2,2295,4918


In [17]:
X_scaler = MinMaxScaler()
Y_scaler = MinMaxScaler()
X_data = X_scaler.fit_transform(data[['holiday', 'temp', 'rain_1h', 'snow_1h', 'clouds_all', 'weather_main',
        'weather_description','traffic_volume']])
Y_data = Y_scaler.fit_transform(data[['traffic_volume']]) 

In [20]:
 def custom_ts_multi_data_prep(dataset, target, start, end, window, horizon):
     X = []
     y = []
     start = start + window
     if end is None:
         end = len(dataset) - horizon
     for i in range(start, end):
         indices = range(i-window, i)
         X.append(dataset[indices])
         indicey = range(i+1, i+1+horizon)
         y.append(target[indicey])
     return np.array(X), np.array(y) 

In [22]:
hist_window = 48
horizon = 10
TRAIN_SPLIT = 30000
x_train, y_train = custom_ts_multi_data_prep(X_data, Y_data, 0, TRAIN_SPLIT, hist_window, horizon)
x_vali, y_vali = custom_ts_multi_data_prep(X_data, Y_data, TRAIN_SPLIT, None, hist_window, horizon) 

In [28]:
batch_size = 256
buffer_size = 150
train_data = tf.data.Dataset.from_tensor_slices((x_train, y_train))
train_data = train_data.cache().shuffle(buffer_size).batch(batch_size).repeat()
val_data = tf.data.Dataset.from_tensor_slices((x_vali, y_vali))
val_data = val_data.batch(batch_size).repeat() 

In [29]:
lstm_model = tf.keras.models.Sequential([
   tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(200, return_sequences=True), input_shape=x_train.shape[-2:]),
     tf.keras.layers.Dense(20, activation='tanh'),
     tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(150)),
     tf.keras.layers.Dense(20, activation='tanh'),
     tf.keras.layers.Dense(20, activation='tanh'),
     tf.keras.layers.Dropout(0.25),
     tf.keras.layers.Dense(units=horizon),
 ])
lstm_model.compile(optimizer='adam', loss='mse')
lstm_model.summary() 

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
bidirectional_2 (Bidirection (None, 48, 400)           334400    
_________________________________________________________________
dense_4 (Dense)              (None, 48, 20)            8020      
_________________________________________________________________
bidirectional_3 (Bidirection (None, 300)               205200    
_________________________________________________________________
dense_5 (Dense)              (None, 20)                6020      
_________________________________________________________________
dense_6 (Dense)              (None, 20)                420       
_________________________________________________________________
dropout_1 (Dropout)          (None, 20)                0         
_________________________________________________________________
dense_7 (Dense)              (None, 10)               

In [30]:
model_path = 'Bidirectional_LSTM_Multivariate.h5'
early_stopings = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, mode='min')
checkpoint =  tf.keras.callbacks.ModelCheckpoint(model_path, monitor='val_loss', save_best_only=True, mode='min', verbose=0)
callbacks=[early_stopings,checkpoint] 


In [31]:
history = lstm_model.fit(train_data,epochs=150,steps_per_epoch=100,validation_data=val_data,
                         validation_steps=50,verbose=1,callbacks=callbacks)

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150

KeyboardInterrupt: 

In [None]:
plt.figure(figsize=(16,9))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train loss', 'validation loss'])
plt.show()

In [None]:
data_val = X_scaler.fit_transform(data[['holiday', 'temp', 'rain_1h', 'snow_1h', 'clouds_all', 'weather_main','weather_description', 'traffic_volume']].tail(48))
val_rescaled = data_val.reshape(1, data_val.shape[0], data_val.shape[1])
pred = lstm_model.predict(val_rescaled)
pred_Inverse = Y_scaler.inverse_transform(pred)
pred_Inverse 