In [1]:
# Author: Kaylani Bochie
# github.com/kaylani2
# kaylani AT gta DOT ufrj DOT br

### K: Model: Autoencoder

import sys
import time
import pandas as pd
import os
import math
sys.path.insert(1, '../')
import numpy as np
from numpy import mean, std
from unit import remove_columns_with_one_value, remove_nan_columns, load_dataset
from unit import display_general_information, display_feature_distribution
from collections import Counter
#from imblearn.over_sampling import RandomOverSampler, RandomUnderSampler
import sklearn
from sklearn import set_config
from sklearn.impute import SimpleImputer
from sklearn.svm import SVC, LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import LabelEncoder, OneHotEncoder, OrdinalEncoder
from sklearn.preprocessing import StandardScaler, RobustScaler, MinMaxScaler
from sklearn.metrics import confusion_matrix, precision_score, recall_score
from sklearn.metrics import f1_score, classification_report, accuracy_score
from sklearn.metrics import cohen_kappa_score, mean_squared_error
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split, PredefinedSplit, RandomizedSearchCV
from sklearn.model_selection import GridSearchCV, RepeatedStratifiedKFold, KFold
from sklearn.model_selection import cross_val_score
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import f_classif, chi2, mutual_info_classif
from sklearn.utils import class_weight
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor
import keras.utils
from keras import metrics
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.layers import Conv2D, MaxPooling2D, Flatten, LSTM
from keras.optimizers import RMSprop, Adam
from keras.constraints import maxnorm

In [2]:
###############################################################################
## Define constants
###############################################################################
pd.set_option ('display.max_rows', None)
pd.set_option ('display.max_columns', 5)
BOT_IOT_DIRECTORY = '../../../../../datasets/bot-iot/'
BOT_IOT_FEATURE_NAMES = 'UNSW_2018_IoT_Botnet_Dataset_Feature_Names.csv'
BOT_IOT_FILE_5_PERCENT_SCHEMA = 'UNSW_2018_IoT_Botnet_Full5pc_{}.csv' # 1 - 4
FIVE_PERCENT_FILES = 4
BOT_IOT_FILE_FULL_SCHEMA = 'UNSW_2018_IoT_Botnet_Dataset_{}.csv' # 1 - 74
FULL_FILES = 74
FILE_NAME = BOT_IOT_DIRECTORY + BOT_IOT_FILE_5_PERCENT_SCHEMA
FEATURES = BOT_IOT_DIRECTORY + BOT_IOT_FEATURE_NAMES
NAN_VALUES = ['?', '.']
TARGET = 'attack'
INDEX_COLUMN = 'pkSeqID'
LABELS = ['attack', 'category', 'subcategory']
STATE = 0
try:
  STATE = int (sys.argv [1])
except:
  pass
#for STATE in [1, 2, 3, 4, 5]:
np.random.seed (STATE)
print ('STATE:', STATE)

STATE: 0


In [3]:
###############################################################################
## Load dataset
###############################################################################
df = load_dataset (FILE_NAME, FIVE_PERCENT_FILES, INDEX_COLUMN, NAN_VALUES)

Reading ../../../../../datasets/bot-iot/UNSW_2018_IoT_Botnet_Full5pc_1.csv
Reading ../../../../../datasets/bot-iot/UNSW_2018_IoT_Botnet_Full5pc_2.csv
Reading ../../../../../datasets/bot-iot/UNSW_2018_IoT_Botnet_Full5pc_3.csv
Reading ../../../../../datasets/bot-iot/UNSW_2018_IoT_Botnet_Full5pc_4.csv


In [4]:
###############################################################################
## Clean dataset
###############################################################################
###############################################################################
### Remove columns with only one value
df, log = remove_columns_with_one_value (df, verbose = False)
print (log)


###############################################################################
### Remove redundant columns, useless columns and unused targets
### K: _number columns are numerical representations of other existing columns.
### K: category and subcategory are other labels.
### K: saddr and daddr may specialize the model to a single network
redundant_columns = ['state_number', 'proto_number', 'flgs_number']
other_targets = ['category', 'subcategory']
misc_columns = ['saddr', 'daddr']
print ('Removing redundant columns:', redundant_columns)
print ('Removing useless targets:', other_targets)
print ('Removing misc columns:', misc_columns)
columns_to_remove = redundant_columns + other_targets + misc_columns
df.drop (axis = 'columns', columns = columns_to_remove, inplace = True)

###############################################################################
### Remove NaN columns (with a lot of NaN values)
df, log = remove_nan_columns (df, 1/2, verbose = False)
print (log)

###############################################################################
### Encode categorical features
print ('Encoding categorical features (ordinal encoding).')
my_encoder = OrdinalEncoder ()
df ['flgs'] = my_encoder.fit_transform (df ['flgs'].values.reshape (-1, 1))
df ['proto'] = my_encoder.fit_transform (df ['proto'].values.reshape (-1, 1))
df ['sport'] = my_encoder.fit_transform (df ['sport'].astype (str).values.reshape (-1, 1))
df ['dport'] = my_encoder.fit_transform (df ['dport'].astype (str).values.reshape (-1, 1))
df ['state'] = my_encoder.fit_transform (df ['state'].values.reshape (-1, 1))
print ('Objects:', list (df.select_dtypes ( ['object']).columns))

While removing single value columns: No columns dropped.
Removing redundant columns: ['state_number', 'proto_number', 'flgs_number']
Removing useless targets: ['category', 'subcategory']
Removing misc columns: ['saddr', 'daddr']
While removing nan value columns: No columns dropped.
Encoding categorical features (ordinal encoding).
Objects: []


In [5]:
###############################################################################
## Quick sanity check
###############################################################################
display_general_information (df)

Dataframe shape (lines, columns): (3668522, 38) 

First 5 entries:
                 stime  flgs  ...  Pkts_P_State_P_Protocol_P_SrcIP  attack
pkSeqID                      ...                                         
1        1.528089e+09   0.0  ...                              602       1
2        1.528089e+09   0.0  ...                                6       1
3        1.528089e+09   0.0  ...                              602       1
4        1.528089e+09   0.0  ...                              602       1
5        1.528089e+09   0.0  ...                              602       1

[5 rows x 38 columns] 

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3668522 entries, 1 to 3668522
Data columns (total 38 columns):
 #   Column                            Dtype  
---  ------                            -----  
 0   stime                             float64
 1   flgs                              float64
 2   proto                             float64
 3   sport                             fl

In [6]:
###############################################################################
## Split dataset into train and test sets
###############################################################################
### K: Dataset is too big? Drop.
drop_indices = np.random.choice (df.index, int (df.shape [0] * 0.5),
                                 replace = False)
df = df.drop (drop_indices)
###############################################################################
### Isolate attack and normal samples
# 0 == normal
# 1 == attack
mask = df [TARGET] == 0
df_normal = df [mask]
df_attack = df [~mask]

print ('Attack set:')
print (df_attack [TARGET].value_counts ())
print ('Normal set:')
print (df_normal [TARGET].value_counts ())

### Sample and drop random attacks
df_random_attacks = df_attack.sample (n = df_normal.shape [0], random_state = STATE)
df_attack = df_attack.drop (df_random_attacks.index)

### Assemble train set (only attacks)
X_train_df = df_attack.loc [:, df.columns != TARGET]
y_train_df = df_attack [TARGET]
print ('Train set:')
print (df_attack [TARGET].value_counts ())

### Assemble test set (50/50 attacks and non-attacks)
df_test = pd.DataFrame ()
df_test = pd.concat ( [df_test, df_normal])
df_test = pd.concat ( [df_test, df_random_attacks])
print ('Test set:')
print (df_test [TARGET].value_counts ())
X_test_df = df_test.loc [:, df.columns != TARGET]
y_test_df = df_test [TARGET]
### K: y_test is required to plot the roc curve in the end
NUMBER_OF_FEATURES = X_train_df.shape [1]
print (NUMBER_OF_FEATURES)

Attack set:
1    1834017
Name: attack, dtype: int64
Normal set:
0    244
Name: attack, dtype: int64
Train set:
1    1833773
Name: attack, dtype: int64
Test set:
1    244
0    244
Name: attack, dtype: int64
37


In [7]:
from sklearn.compose import TransformedTargetRegressor
###############################################################################
## Create wrapper function for keras
## Usage: clf = KerasRegressor (build_fn = create_model, verbose = 2)
## Parameters epochs and batch_size are standard from KerasRegressor
###############################################################################
def create_model (learn_rate = 0.01, dropout_rate = 0.0, weight_constraint = 0,
                  input_shape = NUMBER_OF_FEATURES, metrics = ['mse']):
  model = Sequential ()
  model.add (Dense (units = 64, activation = 'relu',
                   input_shape = (input_shape, )))
  model.add (Dense (32, activation = 'relu'))
  model.add (Dense (8,  activation = 'relu'))
  model.add (Dense (32, activation = 'relu'))
  model.add (Dense (input_shape, activation = None))
  model.compile (loss = 'mean_squared_error',
                 optimizer = Adam (lr = learn_rate),
                 metrics = metrics)
  return model

In [13]:
###############################################################################
## Define processing pipeline for grid search
###############################################################################
###############################################################################
### standard_scaler ### K: Non object features
object_features = (list (df.select_dtypes ( ['object']).columns))
remaining_features = list (df.columns)
for feature in object_features:
  remaining_features.remove (feature)
remaining_features.remove (TARGET)

standard_scaler_features = remaining_features
my_scaler = StandardScaler ()
steps = list ()
steps.append (('scaler', my_scaler))
standard_scaler_transformer = Pipeline (steps)

###############################################################################
### Assemble column transformer
preprocessor = ColumnTransformer (transformers = [
             ('sca', standard_scaler_transformer, standard_scaler_features)])

###############################################################################
### feature selector ### K: Non object features
# my_feature_selector = SelectKBest ()
# steps = list ()
# steps.append (('feature_selector', my_feature_selector))
# feature_selector_transformer = Pipeline (steps)

###############################################################################
### Assemble pipeline for grid search
clf = KerasRegressor (build_fn = create_model, verbose = 2)
clf = Pipeline (steps = [('preprocessor', preprocessor),
                       #('feature_selector', feature_selector_transformer),
                         ('ttregressor', TransformedTargetRegressor(clf, transformer=my_scaler)),
                         #('classifier', clf)
                        ],
               verbose = True)
#set_config (display = 'diagram')
#clf

In [10]:
print (sorted (clf.get_params ().keys ()))

['memory', 'preprocessor', 'preprocessor__n_jobs', 'preprocessor__remainder', 'preprocessor__sca', 'preprocessor__sca__memory', 'preprocessor__sca__scaler', 'preprocessor__sca__scaler__copy', 'preprocessor__sca__scaler__with_mean', 'preprocessor__sca__scaler__with_std', 'preprocessor__sca__steps', 'preprocessor__sca__verbose', 'preprocessor__sparse_threshold', 'preprocessor__transformer_weights', 'preprocessor__transformers', 'preprocessor__verbose', 'steps', 'ttregressor', 'ttregressor__check_inverse', 'ttregressor__func', 'ttregressor__inverse_func', 'ttregressor__regressor', 'ttregressor__regressor__build_fn', 'ttregressor__regressor__verbose', 'ttregressor__transformer', 'ttregressor__transformer__copy', 'ttregressor__transformer__with_mean', 'ttregressor__transformer__with_std', 'verbose']


In [16]:
###############################################################################
### Run grid search
#sorted (sklearn.metrics.SCORERS.keys ())
### K: How to set classifier__input_shape to match feature_selector__k?
param_grid = {#'feature_selector__feature_selector__score_func' : [f_classif],
              #'feature_selector__feature_selector__k' : [9],
              'ttregressor__regressor__input_shape' : [NUMBER_OF_FEATURES],
              'ttregressor__regressor__batch_size' : [50],#, 500, 50],
              'ttregressor__regressor__learn_rate' : [0.001],#, 0.01, 0.1],
              #'classifier__dropout_rate' : [0.0, 0.1],
              'ttregressor__regressor__epochs' : [1]}#, 5]}#, 7]}
print ('param_grid:', param_grid)
#cv = RepeatedStratifiedKFold (n_splits = 5, n_repeats = 1, random_state = STATE)
cv = KFold (n_splits = 5, shuffle = False, random_state = STATE)
grid = GridSearchCV (estimator = clf, param_grid = param_grid,
                     scoring = 'neg_mean_squared_error', verbose = 1, n_jobs = 1, cv = cv)
grid_result = grid.fit (X_train_df, X_train_df)

print ('Best: %f using %s' % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_ ['mean_test_score']
stds = grid_result.cv_results_ ['std_test_score']
params = grid_result.cv_results_ ['params']
for mean, stdev, param in zip (means, stds, params):
  print ('%f (%f) with: %r' % (mean, stdev, param))

param_grid: {'ttregressor__regressor__input_shape': [37], 'ttregressor__regressor__batch_size': [50], 'ttregressor__regressor__learn_rate': [0.001], 'ttregressor__regressor__epochs': [1]}
Fitting 5 folds for each of 1 candidates, totalling 5 fits


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   2.4s
29341/29341 - 28s - loss: 0.2238 - mse: 0.2238
[Pipeline] ....... (step 2 of 2) Processing ttregressor, total=  30.0s
7336/7336 - 4s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   1.3s
29341/29341 - 27s - loss: 0.2022 - mse: 0.2022
[Pipeline] ....... (step 2 of 2) Processing ttregressor, total=  29.5s
7336/7336 - 4s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   1.3s
29341/29341 - 27s - loss: 0.2243 - mse: 0.2243
[Pipeline] ....... (step 2 of 2) Processing ttregressor, total=  29.4s
7336/7336 - 4s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   1.3s
29341/29341 - 28s - loss: 0.2124 - mse: 0.2124
[Pipeline] ....... (step 2 of 2) Processing ttregressor, total=  30.2s
7336/7336 - 4s
[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   1.3s
29341/29341 - 28s - loss: 0.3207 - mse: 0.3207
[Pipeline] ....... (step 2 of 2) Processing ttregressor, total=  3

[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed:  3.0min finished


[Pipeline] ...... (step 1 of 2) Processing preprocessor, total=   1.9s
36676/36676 - 36s - loss: 0.2093 - mse: 0.2093
[Pipeline] ....... (step 2 of 2) Processing ttregressor, total=  38.3s
Best: -2495428383060.134277 using {'ttregressor__regressor__batch_size': 50, 'ttregressor__regressor__epochs': 1, 'ttregressor__regressor__input_shape': 37, 'ttregressor__regressor__learn_rate': 0.001}
-2495428383060.134277 (4990536887098.318359) with: {'ttregressor__regressor__batch_size': 50, 'ttregressor__regressor__epochs': 1, 'ttregressor__regressor__input_shape': 37, 'ttregressor__regressor__learn_rate': 0.001}


In [None]:
###############################################################################
## Define processing pipeline for training (hyperparameter are optimized)
###############################################################################
###############################################################################
### standard_scaler ### K: Non object features
object_features = (list (df.select_dtypes ( ['object']).columns))
remaining_features = list (df.columns)
for feature in object_features:
  remaining_features.remove (feature)
remaining_features.remove (TARGET)

standard_scaler_features = remaining_features
my_scaler = StandardScaler ()
steps = list ()
steps.append (('scaler', my_scaler))
standard_scaler_transformer = Pipeline (steps)

###############################################################################
### Assemble column transformer
preprocessor = ColumnTransformer (transformers = [
             ('sca', standard_scaler_transformer, standard_scaler_features)])

###############################################################################
### feature selector
# Best: 0.999986 using {'classifier__batch_size': 5000,
NUMBER_OF_FEATURES = 9
SCORE_FUNCTION = f_classif
my_feature_selector = SelectKBest (score_func = SCORE_FUNCTION, k = NUMBER_OF_FEATURES)
steps = list ()
steps.append (('feature_selector', my_feature_selector))
feature_selector_transformer = Pipeline (steps)

###############################################################################
### Assemble pipeline for training
METRICS = [keras.metrics.MeanSquaredError (name = 'MSE'),
           keras.metrics.RootMeanSquaredError (name = 'RMSE'),
           keras.metrics.MeanAbsoluteError (name = 'MAE'),]
BATCH_SIZE = 5000
NUMBER_OF_EPOCHS = 70
LEARN_RATE = 0.001
WEIGHT_CONSTRAINT = 0
NUMBER_OF_FEATURES = 9
# Best: 0.999986 using {'classifier__batch_size': 5000,
clf = KerasRegressor (build_fn = create_model, learn_rate = LEARN_RATE,
                      dropout_rate = DROPOUT_RATE,
                      weight_constraint = WEIGHT_CONSTRAINT,
                      input_shape = NUMBER_OF_FEATURES,
                      epochs = NUMBER_OF_EPOCHS, batch_size = BATCH_SIZE,
                      verbose = 2, metrics = METRICS, workers = 0,
                      use_multiprocessing = True)
clf = Pipeline (steps = [ ('preprocessor', preprocessor),
                       ('feature_selector', feature_selector_transformer),
                       ('classifier', clf)],
                verbose = True)

###############################################################################
### Train
startTime = time.time ()
clf = clf.fit (X_train_df, y_train_df)
print (str (time.time () - startTime), 's to train model.')


### K: NOTE: PAREI AQUI. FALTA AJUSTAR A ANALISE DE DESEMEPENHO.
###############################################################################
## Evaluate performance
###############################################################################
print ('\nPerformance on TRAIN set:')
y_pred = clf.predict (X_train_df)
my_confusion_matrix = confusion_matrix (y_train_df, y_pred, labels = df [TARGET].unique ())
tn, fp, fn, tp = my_confusion_matrix.ravel ()
### K: NOTE: Scikit's confusion matrix is different from keras. We want attacks to be
### the positive class:
tp, tn, fp, fn = tn, tp, fn, fp
print ('Confusion matrix:')
print (my_confusion_matrix)
print ('Accuracy:', accuracy_score (y_train_df, y_pred))
print ('Precision:', precision_score (y_train_df, y_pred, average = 'macro'))
print ('Recall:', recall_score (y_train_df, y_pred, average = 'macro'))
print ('F1:', f1_score (y_train_df, y_pred, average = 'macro'))
print ('Cohen Kappa:', cohen_kappa_score (y_train_df, y_pred,
                       labels = df [TARGET].unique ()))
print ('TP:', tp)
print ('TN:', tn)
print ('FP:', fp)
print ('FN:', fn)

### K: Only before publishing... Don't peek.
print ('\nPerformance on TEST set:')
y_pred = clf.predict (X_test_df)
my_confusion_matrix = confusion_matrix (y_test_df, y_pred, labels = df [TARGET].unique ())
tn, fp, fn, tp = my_confusion_matrix.ravel ()
### K: NOTE: Scikit's confusion matrix is different from keras. We want attacks to be
### the positive class:
tp, tn, fp, fn = tn, tp, fn, fp
print ('Confusion matrix:')
print (my_confusion_matrix)
print ('Accuracy:', accuracy_score (y_test_df, y_pred))
print ('Precision:', precision_score (y_test_df, y_pred, average = 'macro'))
print ('Recall:', recall_score (y_test_df, y_pred, average = 'macro'))
print ('F1:', f1_score (y_test_df, y_pred, average = 'macro'))
print ('Cohen Kappa:', cohen_kappa_score (y_test_df, y_pred,
                       labels = df [TARGET].unique ()))
print ('TP:', tp)
print ('TN:', tn)
print ('FP:', fp)
print ('FN:', fn)




###############################################################################
# Hyperparameter tuning
test_fold = np.repeat ( [-1, 0], [X_train.shape [0], X_val.shape [0]])
myPreSplit = PredefinedSplit (test_fold)
def create_model (learn_rate = 0.01, dropout_rate = 0.0, weight_constraint = 0):
  model = Sequential ()
  model.add (Dense (X_train.shape [1], activation = 'relu',
                    input_shape = (X_train.shape [1], )))
  model.add (Dense (32, activation = 'relu'))
  model.add (Dense (8,  activation = 'relu'))
  model.add (Dense (32, activation = 'relu'))
  model.add (Dense (X_train.shape [1], activation = None))
  model.compile (loss = 'mean_squared_error',
                 optimizer = 'adam',
                 metrics = ['mse'])
  return model

model = KerasRegressor (build_fn = create_model, verbose = 2)
batch_size = [30]#, 50]
epochs = [5]#, 5, 10]
learn_rate = [0.01, 0.1]#, 0.2, 0.3]
dropout_rate = [0.0, 0.2]#, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
weight_constraint = [0]#1, 2, 3, 4, 5]
param_grid = dict (batch_size = batch_size, epochs = epochs,
                   dropout_rate = dropout_rate, learn_rate = learn_rate,
                   weight_constraint = weight_constraint)
grid = GridSearchCV (estimator = model, param_grid = param_grid,
                     scoring = 'neg_mean_squared_error', cv = myPreSplit,
                     verbose = 2, n_jobs = 16)

grid_result = grid.fit (np.vstack ((X_train, X_val)),#, axis = 1),
                        np.vstack ((X_train, X_val)))#, axis = 1))
print (grid_result.best_params_)

print ("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_ ['mean_test_score']
stds = grid_result.cv_results_ ['std_test_score']
params = grid_result.cv_results_ ['params']
for mean, stdev, param in zip (means, stds, params):
  print ("%f (%f) with: %r" % (mean, stdev, param))
#
## Best: -0.129429 using {'batch_size': 30, 'dropout_rate': 0.0, 'epochs': 5, 'learn_rate': 0.1, 'weight_constraint': 0}


###############################################################################
## Finished model
NUMBER_OF_EPOCHS = 5
BATCH_SIZE = 30
LEARNING_RATE = 0.1

print ('\nCreating learning model.')
bestModel = Sequential ()
bestModel.add (Dense (X_train.shape [1], activation = 'relu',
                      input_shape = (X_train.shape [1], )))
bestModel.add (Dense (32, activation = 'relu'))
bestModel.add (Dense (8,  activation = 'relu'))
bestModel.add (Dense (32, activation = 'relu'))
bestModel.add (Dense (X_train.shape [1], activation = None))


###############################################################################
## Compile the network
###############################################################################
print ('\nCompiling the network.')
bestModel.compile (loss = 'mean_squared_error',
                   optimizer = Adam (lr = LEARNING_RATE),
                   metrics = ['mse'])#,metrics.Precision ()])
print ('Model summary:')
bestModel.summary ()


###############################################################################
## Fit the network
###############################################################################
print ('\nFitting the network.')
startTime = time.time ()
history = bestModel.fit (X_train, X_train,
                         batch_size = BATCH_SIZE,
                         epochs = NUMBER_OF_EPOCHS,
                         verbose = 2, #1 = progress bar, not useful for logging
                         workers = 0,
                         use_multiprocessing = True,
                         #class_weight = 'auto',
                         validation_data = (X_val, X_val))
print (str (time.time () - startTime), 's to train model.')


###############################################################################
## Analyze results
###############################################################################
X_val_pred   = bestModel.predict (X_val)
X_train_pred = bestModel.predict (X_train)
print ('Train error:'     , mean_squared_error (X_train_pred, X_train))
print ('Validation error:', mean_squared_error (X_val_pred, X_val))

#SAMPLES = 50
#print ('Error on first', SAMPLES, 'samples:')
#print ('MSE (pred, real)')
#for pred_sample, real_sample in zip (X_val_pred [:SAMPLES], X_val [:SAMPLES]):
#  print (mean_squared_error (pred_sample, real_sample))

### K: This looks like another hyperparameter to be adjusted by using a
### separate validation set that contains normal and anomaly samples.
### K: I've guessed 1%, this may be a future line of research.
THRESHOLD_SAMPLE_PERCENTAGE = 1/100

train_mse_element_wise = np.mean (np.square (X_train_pred - X_train), axis = 1)
val_mse_element_wise = np.mean (np.square (X_val_pred - X_val), axis = 1)

max_threshold_val = np.max (val_mse_element_wise)
print ('max_Thresh val:', max_threshold_val)



print ('samples:')
print (int (round (val_mse_element_wise.shape [0] *
           THRESHOLD_SAMPLE_PERCENTAGE)))

top_n_values_val = np.partition (-val_mse_element_wise,
                                 int (round (val_mse_element_wise.shape [0] *
                                             THRESHOLD_SAMPLE_PERCENTAGE)))

top_n_values_val = -top_n_values_val [: int (round (val_mse_element_wise.shape [0] *
                                                    THRESHOLD_SAMPLE_PERCENTAGE))]


### K: O limiar de classificacao sera a mediana dos N maiores custos obtidos
### ao validar a rede no conjunto de validacao. N e um hiperparametro que pode
### ser ajustado, mas e necessario um conjunto de validacao com amostras
### anomalas em adicao ao conjunto de validacao atual, que so tem amostras nao
### anomalas. @TODO: Desenvolver e validar o conjunto com esta nova tecnica.
threshold = np.median (top_n_values_val)
print ('Thresh val:', threshold)


### K: NOTE: Only look at test results when publishing...
sys.exit ()
X_test_pred = bestModel.predict (X_test)
print (X_test_pred.shape)
print ('Test error:', mean_squared_error (X_test_pred, X_test))


y_pred = np.mean (np.square (X_test_pred - X_test), axis = 1)
#y_pred = []
#for pred_sample, real_sample, label in zip (X_test_pred, X_test, y_test):
#  y_pred.append (mean_squared_error (pred_sample, real_sample))

#print ('\nLabel | MSE (pred, real)')
#for label, pred in zip (y_test, y_pred):
#  print (label, '|', pred)

y_test, y_pred = zip (*sorted (zip (y_test, y_pred)))
#print ('\nLabel | MSE (pred, real) (ordered)')
#for label, pred in zip (y_test, y_pred):
#  print (label, '|', pred)

# 0 == normal
# 1 == attack
print ('\nMSE (pred, real) | Label (ordered)')
tp, tn, fp, fn = 0, 0, 0, 0
for label, pred in zip (y_test, y_pred):
#  if (pred >= threshold):
#    print ('Classified as anomaly    (NORMAL):', label)
#  else:
#    print ('Classified as not anomaly (ATTACK):', label)

  if ((pred >= threshold) and (label == 0)):
    print ('True negative.')
    tn += 1
  elif ((pred >= threshold) and (label == 1)):
    print ('False negative!')
    fn += 1
  elif ((pred < threshold) and (label == 1)):
    print ('True positive.')
    tp += 1
  elif ((pred < threshold) and (label == 0)):
    print ('False positive!')
    fp += 1

print ('Confusion matrix:')
print ('tp | fp')
print ('fn | tn')
print (tp, '|', fp)
print (fn, '|', tn)