In [None]:
##Libraries 

import numpy as np
import pandas as pd
import os, time, re
import pickle, gzip


import matplotlib.pyplot as plt
import seaborn as sns
color = sns.color_palette()
import matplotlib as mpl

%matplotlib inline


from sklearn import preprocessing as pp
from sklearn.model_selection import train_test_split 
from sklearn.model_selection import StratifiedKFold 
from sklearn.metrics import log_loss
from sklearn.metrics import precision_recall_curve, average_precision_score
from sklearn.metrics import roc_curve, auc, roc_auc_score


import lightgbm as lgb


import tensorflow as tf
import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Activation, Dense, Dropout
from keras.layers import BatchNormalization, Input, Lambda
from keras import regularizers
from keras.losses import mse, binary_crossentropy

#### apply methods learned from "Hands on Unsupervised Learning " Author - Ankur A. Patel

In [None]:
df = pd.read_pickle('../input/searching-for-bad-loan-data-preprocessing/df_pp.pkl')

In [None]:
df['Loan_status'].value_counts()

In [None]:
f,ax=plt.subplots(1,2,figsize=(18,8))
df['Loan_status'].value_counts().plot.pie(explode=[0,0.1],autopct='%1.1f%%',ax=ax[0],shadow=True)
ax[0].set_title('Loan_status')
ax[0].set_ylabel('')
sns.countplot('Loan_status',data=df,ax=ax[1])
ax[1].set_title('Loan_status')
plt.show()

In [None]:
X = df.drop('Loan_status', axis=1)
y = df['Loan_status']

from sklearn import preprocessing as pp
featuresToScale = X.columns
sX = pp.StandardScaler(copy=True)
X.loc[:,featuresToScale] = sX.fit_transform(X[featuresToScale])


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2 , random_state = 2020, stratify = y)

In [None]:
X_train_AE = X_train.copy()
X_test_AE = X_test.copy()

In [None]:
len(X_train_AE.columns)

In [None]:
def anomalyScores(originalDF, reducedDF):
    loss = np.sum((np.array(originalDF) - \
                   np.array(reducedDF))**2, axis=1)
    loss = pd.Series(data=loss,index=originalDF.index)
    loss = (loss-np.min(loss))/(np.max(loss)-np.min(loss))
    return loss

In [None]:
def plotResults(trueLabels, anomalyScores, returnPreds = False):
    preds = pd.concat([trueLabels, anomalyScores], axis=1)
    preds.columns = ['trueLabel', 'anomalyScore']
    precision, recall, thresholds = \
        precision_recall_curve(preds['trueLabel'], \
                               preds['anomalyScore'])
    average_precision = average_precision_score( \
                        preds['trueLabel'], preds['anomalyScore'])
    
    plt.step(recall, precision, color='k', alpha=0.7, where='post')
    plt.fill_between(recall, precision, step='post', alpha=0.3, color='k')

    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.ylim([0.0, 1.05])
    plt.xlim([0.0, 1.0])
    
    plt.title('Precision-Recall curve: Average Precision = \
        {0:0.2f}'.format(average_precision))

    fpr, tpr, thresholds = roc_curve(preds['trueLabel'], \
                                     preds['anomalyScore'])
    areaUnderROC = auc(fpr, tpr)

    plt.figure()
    plt.plot(fpr, tpr, color='r', lw=2, label='ROC curve')
    plt.plot([0, 1], [0, 1], color='k', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver operating characteristic: Area under the \
        curve = {0:0.2f}'.format(areaUnderROC))
    plt.legend(loc="lower right")
    plt.show()
    
    if returnPreds==True:
        return preds, average_precision  ### diff

## 1. First AutoEncoder

In [None]:
model = Sequential()
model.add(Dense(units=31, activation='linear',input_dim=31))
model.add(Dense(units=31, activation='linear'))

In [None]:
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

In [None]:

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)

In [None]:

predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []
# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=31, activation='linear',input_dim=31))
#     model.add(Dense(units=31, activation='linear'))
#     model.compile(optimizer='adam',
#                   loss='mean_squared_error',
#                   metrics=['accuracy'])


#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()


In [None]:

# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 2. Undercomplete Single Layer Auto-encoder 

In [None]:
model = Sequential()
model.add(Dense(units=20, activation='linear',input_dim=31))
model.add(Dense(units=31, activation='linear'))
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# mean of mean precisions (10 times)

test_scores = []
for i in range(0,10):
    model = Sequential()
    model.add(Dense(units=20, activation='linear',input_dim=31))
    model.add(Dense(units=31, activation='linear'))
    model.compile(optimizer='adam',
                  loss='mean_squared_error',
                  metrics=['accuracy'])


    num_epochs = 10
    batch_size = 32

    history = model.fit(x=X_train_AE, y=X_train_AE,
                        epochs=num_epochs,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(X_train_AE, X_train_AE),
                        verbose=1)


    predictions = model.predict(X_test, verbose=1)
    anomalyScoresAE = anomalyScores(X_test, predictions)
    preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
    test_scores.append(avgPrecision)
    model.reset_states()



In [None]:
print("Mean average precision over 10 runs: ", np.mean(test_scores))
print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
                                                np.mean(test_scores))
test_scores

## 2-1. Change number of node Undercomplete Single Layer Auto-encoder 

In [None]:
model = Sequential()
model.add(Dense(units=28, activation='linear',input_dim=31))
model.add(Dense(units=31, activation='linear'))
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# mean of mean precisions (10 times)

test_scores = []
for i in range(0,10):
    model = Sequential()
    model.add(Dense(units=28, activation='linear',input_dim=31))
    model.add(Dense(units=31, activation='linear'))
    model.compile(optimizer='adam',
                  loss='mean_squared_error',
                  metrics=['accuracy'])


    num_epochs = 10
    batch_size = 32

    history = model.fit(x=X_train_AE, y=X_train_AE,
                        epochs=num_epochs,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(X_train_AE, X_train_AE),
                        verbose=1)


    predictions = model.predict(X_test, verbose=1)
    anomalyScoresAE = anomalyScores(X_test, predictions)
    preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
    test_scores.append(avgPrecision)
    model.reset_states()


In [None]:
print("Mean average precision over 10 runs: ", np.mean(test_scores))
print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
                                                np.mean(test_scores))
test_scores

## 2-2. Add more nodes to Undercomplete Auto-encoder 

In [None]:
model = Sequential()
model.add(Dense(units=28, activation='linear',input_dim=31))
model.add(Dense(units=27, activation='linear'))
model.add(Dense(units=31, activation='linear'))
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# mean of mean precisions (10 times)

test_scores = []
for i in range(0,10):
    model = Sequential()
    model.add(Dense(units=28, activation='linear',input_dim=31))
    model.add(Dense(units=27, activation='linear'))
    model.add(Dense(units=31, activation='linear'))
    model.compile(optimizer='adam',
                  loss='mean_squared_error',
                  metrics=['accuracy'])


    num_epochs = 10
    batch_size = 32

    history = model.fit(x=X_train_AE, y=X_train_AE,
                        epochs=num_epochs,
                        batch_size=batch_size,
                        shuffle=True,
                        validation_data=(X_train_AE, X_train_AE),
                        verbose=1)


    predictions = model.predict(X_test, verbose=1)
    anomalyScoresAE = anomalyScores(X_test, predictions)
    preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
    test_scores.append(avgPrecision)
    model.reset_states()



In [None]:
print("Mean average precision over 10 runs: ", np.mean(test_scores))
print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
                                                np.mean(test_scores))
test_scores

## 2-3. Non-linear Activation(Relu) Undercomplete Auto-encoder

In [None]:
model = Sequential()
model.add(Dense(units=28, activation='relu',input_dim=31))
model.add(Dense(units=23, activation='relu'))


model.add(Dense(units=28, activation='relu'))
model.add(Dense(units=31, activation='relu'))



model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []
# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=28, activation='relu',input_dim=31))
#     model.add(Dense(units=23, activation='relu'))


#     model.add(Dense(units=28, activation='relu'))
#     model.add(Dense(units=31, activation='relu'))
#     model.compile(optimizer='adam',
#                   loss='mean_squared_error',
#                   metrics=['accuracy'])


#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 2-4. linear Activation Undercomplete Auto-encoder / Dropout


In [None]:
model = Sequential()
model.add(Dense(units=28, activation='relu',input_dim=31))
model.add(Dropout(0.10))

model.add(Dense(units=31, activation='relu'))



model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []
# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=28, activation='relu',input_dim=31))
#     model.add(Dropout(0.10))

#     model.add(Dense(units=31, activation='relu'))
#     model.compile(optimizer='adam',
#                   loss='mean_squared_error',
#                   metrics=['accuracy'])


#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 3. OverComplete Auto-Encoder with linear Activation

In [None]:
model = Sequential()
model.add(Dense(units=40, activation='linear',input_dim=31))
model.add(Dense(units=31, activation='linear'))
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []
# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=40, activation='linear',input_dim=31))
#     model.add(Dense(units=31, activation='linear'))
#     model.compile(optimizer='adam',
#                   loss='mean_squared_error',
#                   metrics=['accuracy'])


#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 3-2. OverComplete Auto-Encoder with linear Activation / Dropout   

In [None]:
model = Sequential()
model.add(Dense(units=40, activation='linear',input_dim=31))
model.add(Dropout(0.10))
model.add(Dense(units=31, activation='linear'))
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []
# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=40, activation='linear',input_dim=31))
#     model.add(Dropout(0.10))
#     model.add(Dense(units=31, activation='linear'))
#     model.compile(optimizer='adam',
#                   loss='mean_squared_error',
#                   metrics=['accuracy'])


#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 3-3. Sparse OverComplete Auto-Encoder with linear Activation

In [None]:
model = Sequential()
model.add(Dense(units=40, activation='linear',\
                activity_regularizer=regularizers.l1(10e-5),input_dim=31))
model.add(Dense(units=31, activation='linear'))
model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []
# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=40, activation='linear',\
#                     activity_regularizer=regularizers.l1(10e-5),input_dim=31))
#     model.add(Dense(units=31, activation='linear'))
#     model.compile(optimizer='adam',
#                   loss='mean_squared_error',
#                   metrics=['accuracy'])


#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 3-4. Sparse OverComplete Auto-Encoder with linear Activation / Dropout

In [None]:
model = Sequential()
model.add(Dense(units=40, activation='linear',\
                activity_regularizer=regularizers.l1(10e-5),input_dim=31))
model.add(Dropout(0.05))
model.add(Dense(units=31, activation='linear'))

model.compile(optimizer='adam',
              loss='mean_squared_error',
              metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE, y=X_train_AE,
                    epochs=num_epochs,
                    batch_size=batch_size,
                    shuffle=True,
                    validation_data=(X_train_AE, X_train_AE),
                    verbose=1)


predictions = model.predict(X_test, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds = plotResults(y_test, anomalyScoresAE, True)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []
# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=40, activation='linear',\
#                     activity_regularizer=regularizers.l1(10e-5),input_dim=31))
#     model.add(Dropout(0.05))
#     model.add(Dense(units=31, activation='linear'))
#     model.compile(optimizer='adam',
#                   loss='mean_squared_error',
#                   metrics=['accuracy'])


#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 4. Noise Dataset

In [None]:
noise_factor = 0.50
X_train_AE_noisy = X_train_AE.copy() + noise_factor * \
 np.random.normal(loc=0.0, scale=1.0, size=X_train_AE.shape)
X_test_AE_noisy = X_test_AE.copy() + noise_factor * \
 np.random.normal(loc=0.0, scale=1.0, size=X_test_AE.shape) 

## 4-1. Noise Elimination UnderComplete AutoEncoder

In [None]:

model = Sequential()
model.add(Dense(units=29, activation='linear', input_dim=31))
model.add(Dense(units=31, activation='linear'))


model.compile(optimizer='adam',
                      loss='mean_squared_error',
                      metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE_noisy, y=X_train_AE_noisy,
                            epochs=num_epochs,
                            batch_size=batch_size,
                            shuffle=True,
                            validation_data=(X_train_AE, X_train_AE),
                            verbose=1)

predictions = model.predict(X_test_AE_noisy, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
test_scores.append(avgPrecision)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []
# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=29, activation='linear', input_dim=31))
#     model.add(Dense(units=31, activation='linear'))


#     model.compile(optimizer='adam',
#                           loss='mean_squared_error',
#                           metrics=['accuracy'])



#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 4-2. Noise Elimination OverComplete AutoEncoder

In [None]:
noise_factor = 0.50
X_train_AE_noisy = X_train_AE.copy() + noise_factor * \
 np.random.normal(loc=0.0, scale=1.0, size=X_train_AE.shape)
X_test_AE_noisy = X_test_AE.copy() + noise_factor * \
 np.random.normal(loc=0.0, scale=1.0, size=X_test_AE.shape) 


model = Sequential()
model.add(Dense(units=40, activation='linear', activity_regularizer=regularizers.l1(10e-5), input_dim=31))
model.add(Dropout(0.05))
model.add(Dense(units=31, activation='linear'))


model.compile(optimizer='adam',
                      loss='mean_squared_error',
                      metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE_noisy, y=X_train_AE_noisy,
                            epochs=num_epochs,
                            batch_size=batch_size,
                            shuffle=True,
                            validation_data=(X_train_AE, X_train_AE),
                            verbose=1)

predictions = model.predict(X_test_AE_noisy, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
test_scores.append(avgPrecision)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []

# noise_factor = 0.50
# X_train_AE_noisy = X_train_AE.copy() + noise_factor * \
#  np.random.normal(loc=0.0, scale=1.0, size=X_train_AE.shape)
# X_test_AE_noisy = X_test_AE.copy() + noise_factor * \
#  np.random.normal(loc=0.0, scale=1.0, size=X_test_AE.shape) 

# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=40, activation='linear', activity_regularizer=regularizers.l1(10e-5), input_dim=31))
#     model.add(Dropout(0.05))
#     model.add(Dense(units=31, activation='linear'))

#     model.compile(optimizer='adam',
#                           loss='mean_squared_error',
#                           metrics=['accuracy'])



#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores

## 4-3. Noise Elimination OverComplete AutoEncoder with non-linear Activation

In [None]:
noise_factor = 0.50
X_train_AE_noisy = X_train_AE.copy() + noise_factor * \
 np.random.normal(loc=0.0, scale=1.0, size=X_train_AE.shape)
X_test_AE_noisy = X_test_AE.copy() + noise_factor * \
 np.random.normal(loc=0.0, scale=1.0, size=X_test_AE.shape) 


model = Sequential()
model.add(Dense(units=40, activation='relu', activity_regularizer=regularizers.l1(10e-5), input_dim=31))
model.add(Dropout(0.05))
model.add(Dense(units=31, activation='linear'))


model.compile(optimizer='adam',
                      loss='mean_squared_error',
                      metrics=['accuracy'])

num_epochs = 10
batch_size = 32

history = model.fit(x=X_train_AE_noisy, y=X_train_AE_noisy,
                            epochs=num_epochs,
                            batch_size=batch_size,
                            shuffle=True,
                            validation_data=(X_train_AE, X_train_AE),
                            verbose=1)

predictions = model.predict(X_test_AE_noisy, verbose=1)
anomalyScoresAE = anomalyScores(X_test, predictions)
preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
test_scores.append(avgPrecision)
model.reset_states()

In [None]:
# # mean of mean precisions (10 times)

# test_scores = []

# noise_factor = 0.50
# X_train_AE_noisy = X_train_AE.copy() + noise_factor * \
#  np.random.normal(loc=0.0, scale=1.0, size=X_train_AE.shape)
# X_test_AE_noisy = X_test_AE.copy() + noise_factor * \
#  np.random.normal(loc=0.0, scale=1.0, size=X_test_AE.shape) 

# for i in range(0,10):
#     model = Sequential()
#     model.add(Dense(units=40, activation='relu', activity_regularizer=regularizers.l1(10e-5), input_dim=31))
#     model.add(Dropout(0.05))
#     model.add(Dense(units=31, activation='linear'))

#     model.compile(optimizer='adam',
#                           loss='mean_squared_error',
#                           metrics=['accuracy'])



#     num_epochs = 10
#     batch_size = 32

#     history = model.fit(x=X_train_AE, y=X_train_AE,
#                         epochs=num_epochs,
#                         batch_size=batch_size,
#                         shuffle=True,
#                         validation_data=(X_train_AE, X_train_AE),
#                         verbose=1)


#     predictions = model.predict(X_test, verbose=1)
#     anomalyScoresAE = anomalyScores(X_test, predictions)
#     preds, avgPrecision = plotResults(y_test, anomalyScoresAE, True)
#     test_scores.append(avgPrecision)
#     model.reset_states()

In [None]:
# print("Mean average precision over 10 runs: ", np.mean(test_scores))
# print("Coefficient of variation over 10 runs: ", np.std(test_scores)/ \
#                                                 np.mean(test_scores))
# test_scores