In [1]:
# importing libraries
import os
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.externals import joblib
import seaborn as sns
sns.set(color_codes = True)
import matplotlib.pyplot as plt
%matplotlib inline

from numpy.random import seed

from tensorflow import set_random_seed
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)



In [None]:
from keras.layers import Input, Dropout, Dense, LSTM, TimeDistributed, RepeatVector
from keras.models import Model
from keras import regularizers

In [None]:
tf.random.set_seed(10)
train = pd.DataFrame(np.random.randint(0,100,size=(800, 4)), columns=list('ABCD'))
test = pd.DataFrame(np.random.randint(0,1000,size=(100, 4)), columns=list('ABCD'))

In [None]:
# Transforming into frequency data unsing fourier transform
train_fft = np.fft.fft(train)
test_fft = np.fft.fft(test)

In [None]:
# normalize data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(train)
X_test = scaler.transform(test)

In [None]:
# reshape to [samples, time_steps, n_features]

X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

In [None]:
# Defining NN structure
def autoencoder_model(X):
    inputs = Input(shape = (X.shape[1], X.shape[2]))
    L1 = LSTM(16, activation='relu', return_sequences=True, kernel_regularizer= regularizers.l2(0.00))(inputs)
    L2 = LSTM(4, activation='relu', return_sequences=False)(L1)
    L3 = RepeatVector(X.shape[1])(L2)
    L4 = LSTM(4, activation='relu', return_sequences=True)(L3)
    L5 = LSTM(16, activation='relu', return_sequences=True)(L4)
    output = TimeDistributed(Dense(X.shape[2]))(L5)
    model = Model(inputs = inputs, outputs = output)
    return model

In [None]:
# Create autoencoder model
model = autoencoder_model(X_train)
model.compile(optimizer='adam', loss='mae')
model.summary()

In [None]:
#fitting the model
nb_epochs = 100
batch_size = 10
history = model.fit(X_train, X_train, epochs = nb_epochs, batch_size = batch_size, validation_split = 0.05).history

In [None]:
# Checking loss
X_pred = model.predict(X_train)
X_pred = X_pred.reshape(X_pred.shape[0], X_pred.shape[2])
X_pred = pd.DataFrame(X_pred, columns=train.columns)
X_pred.index = train.index

In [None]:
scored = pd.DataFrame(index = train.index)
Xtrain = X_train.reshape(X_train.shape[0], X_train.shape[2])
scored['Loss_MAE'] = np.mean(np.abs(X_pred - Xtrain), axis = 1)

In [None]:
plt.figure(figsize = (16,9), dpi = 80)
plt.title('Loss_Distribution', fontsize=16)
sns.distplot(scored['Loss_MAE'], bins=20, kde = True, color = 'blue');
plt.xlim([0.0,0.5])

In [None]:
# Calculate loss on the test dataset
X_pred = model.predict(X_test)
X_pred = X_pred.reshape(X_pred.shape[0], X_pred.shape[2])
X_pred = pd.DataFrame(X_pred, columns=test.columns)
X_pred.index = test.index

scored = pd.DataFrame(index = test.index)
Xtest = X_test.reshape(X_test.shape[0], X_test.shape[2])
scored['Loss_MAE'] = np.mean(np.abs(X_pred - Xtest), axis = 1)

In [None]:
# Selecting scores above the threshold
scored['Threshold'] = 0.7
scored['Anomaly'] = scored['Loss_MAE'] > scored['Threshold']

# https://towardsdatascience.com/lstm-autoencoder-for-anomaly-detection-e1f4f2ee7ccf