In [None]:
# importing packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.layers import Dense
from keras.models import Sequential

# importing datasets
X = pd.read_csv('../Datasets/NB.csv')
X['Fault'] = 0

y = pd.read_csv('../Datasets/IR - 7.csv')
y['Fault'] = 1

# splitting data into train and test sets
X_train, X_test = train_test_split(X, test_size=0.2, random_state=0)
y_train, y_test = train_test_split(y, test_size=0.2, random_state=0)

# concatenating train and test sets
train = X_train.append(y_train)
test = X_test.append(y_test)

# scaling the data
scaler = MinMaxScaler()
train = pd.DataFrame(scaler.fit_transform(train))
test = pd.DataFrame(scaler.transform(test))

# defining the model
act_func = 'relu'
autoencoder = Sequential([
    Dense(32, activation=act_func, kernel_initializer='glorot_uniform',
          kernel_regularizer='l2', input_shape=(X_test.shape[1],)),
    Dense(2, activation=act_func, kernel_initializer='glorot_uniform'),
    Dense(32, activation=act_func, kernel_initializer='glorot_uniform'),
    Dense(X_test.shape[1], kernel_initializer='glorot_uniform')
])
autoencoder.compile(loss='mse', optimizer='adam', metrics=['accuracy'])

# fitting the model
NUM_EPOCHS = 100
BATCH_SIZE = 1000
history = autoencoder.fit(train, train, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS,
                          validation_split=0.1, verbose=1, shuffle=True)

# plotting the loss curve
plt.plot(history.history['loss'], 'b', label='Training loss')
plt.plot(history.history['val_loss'], 'r', label='Validation loss')
plt.legend(loc='upper right')
plt.xlabel('Epochs')
plt.ylabel('Loss, [mse]')
plt.ylim([0, 0.05])
plt.show()

# evaluating the model on the training set
pred_train = autoencoder.predict(train)
scored_train = pd.DataFrame()
scored_train['Loss_mae'] = np.mean(np.abs(pred_train - train), axis=1)
TH = 0.1
scored_train['Threshold'] = TH
scored_train['Anomaly'] = scored_train['Loss_mae'] > scored_train['Threshold']
scored_train.plot(logy=True, figsize=(16, 4), xlim=[0, len(scored_train)])

# identifying the anomalies in the training set
anomalies = scored_train[scored_train['Anomaly'] == True]
print(anomalies)
print(anomalies.shape)

# plotting the anomalies in the training set
f, (ax1) = plt.subplots(figsize=(16, 4))
ax1.plot(scored_train.index, scored_train.Loss_mae, label='Loss(MAE)')
ax1.plot(scored_train.index, scored_train.Threshold, label='Threshold')
g = sns.scatterplot(x=anomalies.index, y=anomalies.Loss_mae, label='anomaly', color='red')
g.set(xlim=(0, len(scored_train.index)))
plt.title('Anomalies')
plt.legend()

# evaluating the model on the test set
pred_test = autoencoder.predict(test)
scored_test = pd.DataFrame()
scored_test["Loss_mae"] = np.mean(np.abs(pred_test - test), axis=1)
scored_test["Threshold"] = TH
scored_test["Anomaly"] = scored_test["Loss_mae"] > scored_test["Threshold"]

scored_test

scored_test["Loss_mae"].mean()

scored_test.plot(logy=True, figsize=(16, 4), xlim=[0, len(scored_test)])

IR_anomalies = scored_test[scored_test["Anomaly"] == True]
print(IR_anomalies)
print(IR_anomalies.shape)

f, (ax2) = plt.subplots(figsize=(16, 4))
ax2.plot(scored_test.index, scored_test.Loss_mae, label="Loss(MAE)")
ax2.plot(scored_test.index, scored_test.Threshold, label="Threshold")
g = sns.scatterplot(
    x=IR_anomalies.index, y=IR_anomalies.Loss_mae, label="anomaly", color="red"
)
g.set(xlim=(0, len(scored_test.index)))
plt.title("Anomalies")
plt.legend()

print(score[1])

print("Accuracy: {:.2f}%".format(score[1] * 100))

print("Anomalies: {}".format(IR_anomalies["Anomaly"].count()))
