In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib.ticker import MaxNLocator
#import seaborn as sns
from sklearn.metrics import mean_squared_error

from keras.layers.advanced_activations import LeakyReLU
import os
from pandas import DataFrame
from numpy import concatenate

from sklearn.model_selection import GridSearchCV
import datetime
from math import sqrt
from keras import regularizers
import math
from tensorflow.keras import backend
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard, ReduceLROnPlateau
import tensorflow as tf
from tensorflow.keras.optimizers import Adam

In [None]:
# fix random seed for reproducibility
np.random.seed(7)

In [None]:
def remove_outliers(data, threshold=5):
    mean= data.mean()
    std= data.std()
    idx= np.abs((data-mean)/std) <= threshold
    return data[idx].reshape(-1, 1)

In [None]:
#********Load data**************************

dataset = pd.read_csv('murdoch_user_incoming_traffic.csv', usecols=[2])
dataset.dropna(0,inplace= True)
dataset= np.array(dataset)
print('dataset with outlier', dataset.shape)
dataset= dataset.astype('float32')
dataset= remove_outliers(dataset)
print('dataset after removing outlier', dataset.shape)

# *****************split into train and test sets********************

train_size = int(len(dataset) * 0.80)
test_size = len(dataset) - train_size
data_train = dataset[0:train_size]
data_test = dataset[train_size:len(dataset)]
print('Data_train', data_train.shape, 'Data_test',data_test.shape)

# ********Normalize the data******************

scaler= MinMaxScaler (feature_range= (-1,1))
train_scaled = scaler.fit_transform(data_train)
test_scaled = scaler.transform(data_test)
print('Train_scaled',train_scaled.shape,' Test scaled', test_scaled.shape)

In [None]:
# ******************frame a sequence as a supervised learning problem*************

def prepare_seq2seq_data(dataset, look_back=1):

    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)


look_back = 40
train_X, train_y = prepare_seq2seq_data(train_scaled , look_back)
test_X, test_y = prepare_seq2seq_data(test_scaled , look_back)

print('train_X',train_X.shape, 'train_y',train_y.shape)
print('test_X', test_X.shape, 'test_y',test_y.shape)

In [None]:
#*******Reshape from[samples, timesteps] into [samples, timesteps, features]*************

train_X = train_X.reshape(train_X.shape[0],1, train_X.shape[1])
test_X = test_X.reshape(test_X.shape[0],1, test_X.shape[1])
print('train_X',train_X.shape, 'train_y',train_y.shape)
print('test_X', test_X.shape, 'test_y',test_y.shape)

# convert 1 D to 2D array
#test_y = test_y.reshape((test_y.shape[0], 1))
#print(test_y.shape)

In [None]:
#**********RPE******************************
def rpe(y_true, y_pred):
    
    return backend.abs((y_pred - y_true)/y_true) * 100

In [None]:
#********************Model****************************

from keras.models import Sequential
from keras.layers import LSTM, Dropout, Dense
model = Sequential()
model.add(LSTM(128, input_shape=(1,look_back),kernel_initializer='glorot_uniform', return_sequences=True))
model.add(Dropout(0.5))


model.add(LSTM(64,kernel_initializer='glorot_uniform',return_sequences=True ))
model.add(Dropout(0.5))

model.add(LSTM(32,kernel_initializer='glorot_uniform'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.summary()



In [None]:
#optimizer = tf.keras.optimizers.Adam(lr=0.0001)
model.compile(optimizer= 'adam',
            loss='mean_squared_error', metrics= [rpe])

In [None]:
callback_early_stopping = EarlyStopping(monitor='val_loss',
                                        patience=100, verbose=1)



callbacks = [callback_early_stopping]

In [None]:
history = model.fit(train_X, train_y, batch_size = 256,validation_split=0.2, epochs = 5000, verbose=1,
                    callbacks=callbacks, shuffle=False)

In [None]:
# **************************Model Performance Error  metrics*************************************

hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()

In [None]:
# convert 1 D to 2D array
train_y = train_y.reshape((train_y.shape[0], 1))
#print(test_y.shape)

# convert 1 D to 2D array
test_y = test_y.reshape((test_y.shape[0], 1))
#print(test_y.shape)

In [None]:
# *********************make predictions*************************************************************

trainPredict = model.predict(train_X)
testPredict = model.predict(test_X)

# *************************invert predictions**************************

trainPredict_inv = scaler.inverse_transform(trainPredict)
train_y_inv = scaler.inverse_transform(train_y)

testPredict_inv = scaler.inverse_transform(testPredict)
test_y_inv = scaler.inverse_transform(test_y)


# ********************RMSE*************************

trainScore = math.sqrt(mean_squared_error(train_y_inv, trainPredict_inv))
print('Train Score: %.2f RMSE' % trainScore)
testScore = math.sqrt(mean_squared_error(test_y_inv, testPredict_inv))
print('Test Score: %.2f RMSE' % testScore)


# *****************Relative Percentage Error******************************
"""
def mean_relative_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100
"""
def rpe(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean((y_pred - y_true) / y_true) * 100

rpe = rpe(test_y_inv, testPredict_inv)
print('RPE: %.3f' % rpe)

In [None]:
trainPredict_inv.max(), test_y_inv.max()

In [None]:
# ***********************shift train predictions for plotting*******************************

trainPredictPlot = np.empty_like(dataset)
trainPredictPlot[:, :] = np.nan
trainPredictPlot[look_back:len(trainPredict)+look_back, :] = trainPredict_inv
    
# **********************shift test predictions for plotting*************************************

testPredictPlot = np.empty_like(dataset)
testPredictPlot[:, :] = np.nan
testPredictPlot[len(trainPredict)+(look_back*2)+1:len(dataset)-1, :] = testPredict_inv
    
# ********************Create the plot*************************************************************

plt.figure(figsize = (15, 5))
plt.plot(dataset, label= 'Actual values', color='y')
plt.plot(trainPredictPlot, label= 'Train set')
plt.plot(testPredictPlot, label= 'Test set')
plt.show()




In [None]:
    
#***************************************Model validation loss*****************************************
plt.figure()
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Validation loss')
plt.xlabel('Epoch')
plt.legend(['Training data loss', 'Validation data loss'], loc='upper right')
plt.show()
    

In [None]:
 #***************************************RPE*****************************************
plt.figure()
plt.plot(history.history['rpe'])
plt.plot(history.history['val_rpe'])
plt.title('RPE')
plt.ylabel('Relative percentage error')
plt.xlabel('Epoch')
plt.legend(['Training data error', 'Validation data error'], loc='upper right')
plt.show()

In [None]:
#********************* Actual value, Predicted value and Prediction Error ***************

plt.figure(figsize = (15, 5))
plt.xlabel("Epoch")
plt.ylabel("Email traffic")
plt.plot(test_y_inv[:,0], label="True value", linewidth=1,linestyle="--",color= 'green')
plt.plot(testPredict_inv[:,0], label="Predicted value", linewidth=1,color='blue')

error = abs(test_y_inv[:,0] - testPredict_inv[:,0])
plt.plot(error, label='Error',color= 'orange', linewidth=1, linestyle="--")
plt.legend(bbox_to_anchor=(1, .99))
plt.show()

#******************************* True value vs Prediction **********************/

plt.figure(figsize = (15, 5))
plt.xlabel("Epoch")
plt.ylabel("Email traffic")
plt.plot (test_y_inv[:,0], label="True value", linewidth=1,color= 'green', linestyle="--")
plt.plot(testPredict_inv[:,0], label="Predicted value", linewidth=1,color='blue')
plt.show()