In [None]:
import math
import matplotlib.pyplot as plt
import keras
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout
from keras.layers import *
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import train_test_split
from keras.callbacks import EarlyStopping

import tensorflow as tf
from tensorflow import keras
import sklearn.metrics as sm

In [None]:
# Update the file path only here.
df = pd.read_csv("../input/vdisk1-for-dl/disk1DL.csv")

# Drop unnamed columns
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df1 = df[['startTid','lebel data']]
df1['startTid'] = pd.to_datetime(df.startTid).dt.strftime('%d-%m-%Y %H:%M')

In [None]:
df.info()

In [None]:
df.isnull().sum()

In [None]:
df.describe()

## Convert Catg variable into numerical values

In [None]:
print (len(df['diskObjekt'].unique()))
df['diskObjekt'].unique()

In [None]:
# Have to replace these 23 unique string with numbers to get this column into numerical format to input to model.
df.diskObjekt = pd.Categorical(df.diskObjekt)
df['diskObjekt_code'] = df.diskObjekt.cat.codes

# Will do the same for column diskProgram
df.diskProgram = pd.Categorical(df.diskProgram)
df['diskProgram_code'] = df.diskProgram.cat.codes

# Drop the old columns with catg values, we will retain the numerical code values.
df.drop(['diskObjekt','diskProgram'], axis=1,inplace=True)

In [None]:
# Updated columns are added as last columns.
df.head()

### Break Timestamp into further columns (to get data spread across the time in feature eng and model learning)

In [None]:
# Can split into Year, month, day, hour, min and seconds as individual columns.
df['startTid']

In [None]:
# convert the 'startTid' column to datetime format
df['startTid']= pd.to_datetime(df['startTid'])
 
# Add new columns, with individual values
df['startTid_year'] = df['startTid'].dt.year
df['startTid_month'] = df['startTid'].dt.month
df['startTid_day'] = df['startTid'].dt.day
df['startTid_hour'] = df['startTid'].dt.hour
df['startTid_min'] = df['startTid'].dt.minute
df['startTid_sec'] = df['startTid'].dt.second

# Drop the main old colum of 'startTid'
df.drop(['startTid'], axis=1, inplace=True)

Will follow the same steps for 'stoppTid' column

In [None]:
# convert the 'stoppTid' column to datetime format
df['stoppTid']= pd.to_datetime(df['stoppTid'])
 
# Add new columns, with individual values
df['stoppTid_year'] = df['stoppTid'].dt.year
df['stoppTid_month'] = df['stoppTid'].dt.month
df['stoppTid_day'] = df['stoppTid'].dt.day
df['stoppTid_hour'] = df['stoppTid'].dt.hour
df['stoppTid_min'] = df['stoppTid'].dt.minute
df['stoppTid_sec'] = df['stoppTid'].dt.second

# Drop the main old colum of 'stoppTid'
df.drop(['stoppTid'], axis=1, inplace=True)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df['lebel data'].value_counts()

Our target column have 1 and 0 as target value, so we will be using classification approach.

## 1- LSTM

In [None]:
df1.shape

In [None]:
# Based on rows, split into training and testing set
training_set = df1.iloc[:2000, 1:2].values
testing_set = df1.iloc[2000:, 1:2].values

In [None]:
training_set.shape

In [None]:
testing_set.shape

In [None]:
# To scale/standarize the values to a normal range.
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
training_set_scaled = sc.fit_transform(training_set)

testing__set_scaled = sc.fit_transform(testing_set)

In [None]:
# Spliting data into x train and y train for training purpose, into 3d matrix form to feed into model
X_train = []
y_train = []

# Setting 2000 rows, with 60 records interval window
for i in range(60, 2000):
    X_train.append(training_set_scaled[i-60:i, 0])
    y_train.append(training_set_scaled[i, 0])
    
# Converting into numpy array
X_train, y_train = np.array(X_train), np.array(y_train)

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

print(X_train.shape)
print (y_train.shape)

In [None]:
# Spliting data into x test and y test for testing purpose, into 3d matrix form to feed into model
X_test = []
y_test = []
# Setting 887 rows, with 60 records interval window
for i in range(60, 887):
    X_test.append(testing__set_scaled[i-60:i, 0])
    y_test.append(testing__set_scaled[i, 0])
    
# Converting into numpy array
X_test, y_test = np.array(X_test), np.array(y_test)

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

print(X_test.shape)
print (y_test.shape)

In [None]:
# Model object
regressor = Sequential()


# Adding lstm layer step by step and giving input data to each layer.
regressor.add(LSTM(units = 50, return_sequences = True, activation = 'relu',input_shape = (X_train.shape[1], 1)))
regressor.add(Dropout(0.2))

# Adding drop out for better learning and to prevent overfitting
regressor.add(LSTM(units = 50, activation = 'relu',return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 50, activation = 'relu',return_sequences = True))
regressor.add(Dropout(0.2))

regressor.add(LSTM(units = 1, activation = 'sigmoid'))
regressor.add(Dropout(0.2))

# Final output layer
regressor.add(Dense(units = 1, activation = 'sigmoid'))

# compile the model
regressor.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics=['accuracy'])

# Training the model on training data over here, wiht given batch size and number of iteration  (epochs)
regressor.fit(X_train, y_train, epochs = 20, batch_size = 32)

In [None]:
# Used the trained model to predict on test data and make predictions, and show
lstm_pred = regressor.predict(X_test)
lstm_pred = np.round(sc.inverse_transform(lstm_pred))
df_res = pd.DataFrame()
df_res ['Actual'] = y_test
df_res ['Predicted'] = lstm_pred

df_res

In [None]:
print("Mean absolute error =", round(sm.mean_absolute_error(y_test, lstm_pred), 2)) 
print("Mean squared error =", round(sm.mean_squared_error(y_test, lstm_pred), 2)) 
print("Median absolute error =", round(sm.median_absolute_error(y_test, lstm_pred), 2)) 
print("Explain variance score =", round(sm.explained_variance_score(y_test, lstm_pred), 2)) 
print("R2 score =", round(sm.r2_score(y_test, lstm_pred), 2))

# RNN

In [None]:
# Creating RNN model with input and hidden layers.
# Ouput layer have one node to give output
model = keras.models.Sequential([
    keras.layers.SimpleRNN(50, return_sequences=True, input_shape=[None, 1]),
    keras.layers.SimpleRNN(45, return_sequences=True),
    keras.layers.SimpleRNN(40, return_sequences=True),
    keras.layers.SimpleRNN(35, return_sequences=True),
    keras.layers.SimpleRNN(30, return_sequences=True),
    keras.layers.SimpleRNN(25, return_sequences=True),
    keras.layers.SimpleRNN(20, return_sequences=True),
    keras.layers.SimpleRNN(1)
])

In [None]:
model.summary()

In [None]:
# Traing the model and evaluate the model performance on test data.
optimizer = keras.optimizers.Adam(lr=0.005)
model.compile(loss="mse", optimizer=optimizer)
history = model.fit(X_train, y_train, epochs=20,
                    validation_data=(X_test, y_test))
print ("Model Accu")
model.evaluate(X_test, y_test)

In [None]:
# Making predicitons and giving transformed predction array
y_pred = model.predict(X_test)
y_pred = np.round(sc.inverse_transform(y_pred))

In [None]:
print("Mean absolute error =", round(sm.mean_absolute_error(y_test, y_pred), 2)) 
print("Mean squared error =", round(sm.mean_squared_error(y_test, y_pred), 2)) 
print("Median absolute error =", round(sm.median_absolute_error(y_test, y_pred), 2)) 
print("Explain variance score =", round(sm.explained_variance_score(y_test, y_pred), 2)) 
print("R2 score =", round(sm.r2_score(y_test, y_pred), 2))