In [1]:
import pandas as pd
import numpy as np

import tensorflow as tf

import os
import sys

script_dir = os.path.dirname(os.path.abspath("experiments"))
sys.path.append(os.path.dirname(script_dir))

from experiments.predictions import make_prediction, PredMetrics, get_prediction_metrics, labels

2021-12-24 12:00:37.777213: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2021-12-24 12:00:37.777249: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
try:
    gpus = tf.config.list_physical_devices('GPU')
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    logical_gpus = tf.config.experimental.list_logical_devices('GPU')
    print("Physical GPUs:", len(gpus))
    print("Logical GPUs:", len(logical_gpus))

except RuntimeError as e:
    # Visible devices must be set before GPUs have been initialized
    print(e)

Physical GPUs: 1
Logical GPUs: 1


In [3]:
earlystop_callback = tf.keras.callbacks.EarlyStopping(
    monitor="accuracy", 
    mode='max', 
    patience=5, 
    min_delta=0.0001
)

### Classifier (10% data)

In [4]:
x_train = np.load("data/preserve10/x_train.npy")
y_train = np.load("data/preserve10/y_train.npy")
x_test = np.load("data/preserve10/x_test.npy")
y_test = np.load("data/preserve10/y_test.npy")

In [5]:
y_train = tf.keras.utils.to_categorical(y_train)
y_test_onehot = tf.keras.utils.to_categorical(y_test)

In [6]:
model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(31,)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(256, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(15, activation="softmax")
])

In [7]:
model.compile(optimizer="adamax",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

In [8]:
model.fit(x_train,
          y_train,
          epochs=100,
          callbacks=[earlystop_callback],
          batch_size=128
         )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100


<tensorflow.python.keras.callbacks.History at 0x2008dc0cc10>

**Evaluate**

In [9]:
model.evaluate(x_test, 
               y_test_onehot, 
               verbose=2)

2239/2239 - 5s - loss: 0.0612 - accuracy: 0.9771


[0.06117844209074974, 0.9771350622177124]

In [10]:
x_test = pd.DataFrame(x_test)
y_test = pd.DataFrame(y_test)

In [11]:
predictions = make_prediction(model, x_test, y_test)
pred_metrics = get_prediction_metrics(predictions)

In [12]:
print("FP Rate (FPR):", round(pred_metrics.weighted_fpr, 4))
print("FN Rate (FNR):", round(pred_metrics.weighted_fnr, 4))
print("Detection Rate:", round(pred_metrics.weighted_detection_rate, 4))
print("False Alarm Rate (FAR):", round(pred_metrics.false_alarm_rate, 4))
print("Accuracy: ", round(pred_metrics.classification_report.loc["accuracy"].iloc[0], 4))

pred_metrics.classification_report.columns = ["Precision", "Recall", "F1-score", "Quantity"]
pred_metrics.classification_report = pred_metrics.classification_report.astype({"Quantity": int})
pred_metrics.classification_report.drop("accuracy", axis=0, inplace=True)

print("\n", pred_metrics.classification_report)

FP Rate (FPR): 0.0004
FN Rate (FNR): 0.0745
Detection Rate: 0.9255
False Alarm Rate (FAR): 0.0374
Accuracy:  0.9771

                           Precision    Recall  F1-score  Quantity
BENIGN                     0.989657  0.984903  0.987274     56833
Bot                        0.934097  0.645545  0.763466       505
DDoS                       0.999349  0.983664  0.991445      3122
DoS_GoldenEye              0.963235  0.952727  0.957952       275
DoS_Hulk                   0.970447  0.980726  0.975559      5759
DoS_Slowhttptest           0.855072  0.880597  0.867647       134
DoS_slowloris              0.951515  0.945783  0.948640       166
FTPPatator                 0.949772  0.995215  0.971963       209
Heartbleed                 1.000000  1.000000  1.000000         2
Infiltration               0.000000  0.000000  0.000000         7
PortScan                   0.897822  0.954707  0.925392      3930
SSHPatator                 0.801370  0.914062  0.854015       128
Web_Attack_Brute_Force  

In [13]:
model.save("models_save/10_classifier_standalone.h5")

### Classifier (25% data)

In [14]:
x_train = np.load("data/preserve25/x_train.npy")
y_train = np.load("data/preserve25/y_train.npy")
x_test = np.load("data/preserve25/x_test.npy")
y_test = np.load("data/preserve25/y_test.npy")

In [15]:
y_train = tf.keras.utils.to_categorical(y_train)
y_test_onehot = tf.keras.utils.to_categorical(y_test)

In [16]:
model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(31,)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(256, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(15, activation="softmax")
])

In [17]:
model.compile(optimizer="adamax",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

In [18]:
model.fit(x_train,
          y_train,
          epochs=100,
          callbacks=[earlystop_callback],
          batch_size=128
         )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


<tensorflow.python.keras.callbacks.History at 0x20240e59130>

**Evaluate**

In [19]:
model.evaluate(x_test, 
               y_test_onehot, 
               verbose=2)

5548/5548 - 10s - loss: 0.0459 - accuracy: 0.9806


[0.04589821398258209, 0.9805775880813599]

In [20]:
x_test = pd.DataFrame(x_test)
y_test = pd.DataFrame(y_test)

In [21]:
predictions = make_prediction(model, x_test, y_test)
pred_metrics = get_prediction_metrics(predictions)

In [22]:
print("FP Rate (FPR):", round(pred_metrics.weighted_fpr, 4))
print("FN Rate (FNR):", round(pred_metrics.weighted_fnr, 4))
print("Detection Rate:", round(pred_metrics.weighted_detection_rate, 4))
print("False Alarm Rate (FAR):", round(pred_metrics.false_alarm_rate, 4))
print("Accuracy: ", round(pred_metrics.classification_report.loc["accuracy"].iloc[0], 4))

pred_metrics.classification_report.columns = ["Precision", "Recall", "F1-score", "Quantity"]
pred_metrics.classification_report = pred_metrics.classification_report.astype({"Quantity": int})
pred_metrics.classification_report.drop("accuracy", axis=0, inplace=True)

print("\n", pred_metrics.classification_report)

FP Rate (FPR): 0.0001
FN Rate (FNR): 0.0586
Detection Rate: 0.9414
False Alarm Rate (FAR): 0.0293
Accuracy:  0.9806

                           Precision    Recall  F1-score  Quantity
BENIGN                     0.994914  0.982176  0.988504    141997
Bot                        0.820375  0.620690  0.706697       493
DDoS                       0.998346  0.998982  0.998664      7855
DoS_GoldenEye              0.972561  0.965204  0.968869       661
DoS_Hulk                   0.939948  0.998273  0.968233     14472
DoS_Slowhttptest           0.859788  0.978916  0.915493       332
DoS_slowloris              0.950249  0.959799  0.955000       398
FTPPatator                 0.976048  0.989879  0.982915       494
Heartbleed                 1.000000  1.000000  1.000000         2
Infiltration               0.000000  0.000000  0.000000        12
PortScan                   0.901574  0.956924  0.928425      9936
SSHPatator                 0.869333  0.967359  0.915730       337
Web_Attack_Brute_Force  

In [23]:
model.save("models_save/25_classifier_standalone.h5")

### Classifier (50% data)

In [24]:
x_train = np.load("data/preserve50/x_train.npy")
y_train = np.load("data/preserve50/y_train.npy")
x_test = np.load("data/preserve50/x_test.npy")
y_test = np.load("data/preserve50/y_test.npy")

In [25]:
y_train = tf.keras.utils.to_categorical(y_train)
y_test_onehot = tf.keras.utils.to_categorical(y_test)

In [26]:
model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(31,)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(256, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(15, activation="softmax")
])

In [27]:
model.compile(optimizer="adamax",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

In [28]:
model.fit(x_train,
          y_train,
          epochs=100,
          callbacks=[earlystop_callback],
          batch_size=128
         )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100


<tensorflow.python.keras.callbacks.History at 0x20240b27be0>

**Evaluate**

In [29]:
model.evaluate(x_test, 
               y_test_onehot, 
               verbose=2)

11063/11063 - 19s - loss: 0.0406 - accuracy: 0.9844


[0.040626417845487595, 0.9844297170639038]

In [30]:
x_test = pd.DataFrame(x_test)
y_test = pd.DataFrame(y_test)

In [31]:
predictions = make_prediction(model, x_test, y_test)
pred_metrics = get_prediction_metrics(predictions)

In [32]:
print("FP Rate (FPR):", round(pred_metrics.weighted_fpr, 4))
print("FN Rate (FNR):", round(pred_metrics.weighted_fnr, 4))
print("Detection Rate:", round(pred_metrics.weighted_detection_rate, 4))
print("False Alarm Rate (FAR):", round(pred_metrics.false_alarm_rate, 4))
print("Accuracy: ", round(pred_metrics.classification_report.loc["accuracy"].iloc[0], 4))

pred_metrics.classification_report.columns = ["Precision", "Recall", "F1-score", "Quantity"]
pred_metrics.classification_report = pred_metrics.classification_report.astype({"Quantity": int})
pred_metrics.classification_report.drop("accuracy", axis=0, inplace=True)

print("\n", pred_metrics.classification_report)

FP Rate (FPR): 0.0006
FN Rate (FNR): 0.1015
Detection Rate: 0.8985
False Alarm Rate (FAR): 0.051
Accuracy:  0.9844

                           Precision    Recall  F1-score  Quantity
BENIGN                     0.991948  0.988849  0.990396    283911
Bot                        0.731235  0.604000  0.661555       500
DDoS                       0.998350  0.985718  0.991994     15964
DoS_GoldenEye              0.971384  0.982786  0.977052      1278
DoS_Hulk                   0.970707  0.990843  0.980672     28829
DoS_Slowhttptest           0.867198  0.976084  0.918425       669
DoS_slowloris              0.960053  0.976965  0.968435       738
FTPPatator                 0.992986  0.991000  0.991992      1000
Heartbleed                 0.000000  0.000000  0.000000         2
Infiltration               0.000000  0.000000  0.000000         6
PortScan                   0.903064  0.944598  0.923364     19873
SSHPatator                 0.945241  0.981055  0.962815       739
Web_Attack_Brute_Force   

In [33]:
model.save("models_save/50_classifier_standalone.h5")

### Classifier (100% data)

In [34]:
x_train = np.load("data/preserve100/x_train.npy")
y_train = np.load("data/preserve100/y_train.npy")
x_test = np.load("data/preserve100/x_test.npy")
y_test = np.load("data/preserve100/y_test.npy")

In [35]:
y_train = tf.keras.utils.to_categorical(y_train)
y_test_onehot = tf.keras.utils.to_categorical(y_test)

In [36]:
model = tf.keras.models.Sequential([
        tf.keras.layers.InputLayer(input_shape=(31,)),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(256, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(128, activation="relu"),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(15, activation="softmax")
])

In [37]:
model.compile(optimizer="adamax",
              loss="categorical_crossentropy",
              metrics=["accuracy"])

In [38]:
model.fit(x_train,
          y_train,
          epochs=100,
          callbacks=[earlystop_callback],
          batch_size=128
         )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100


<tensorflow.python.keras.callbacks.History at 0x202901903d0>

**Evaluate**

In [39]:
model.evaluate(x_test, 
               y_test_onehot, 
               verbose=2)

22093/22093 - 38s - loss: 0.0359 - accuracy: 0.9852


[0.03587724268436432, 0.9851648211479187]

In [40]:
x_test = pd.DataFrame(x_test)
y_test = pd.DataFrame(y_test)

In [41]:
predictions = make_prediction(model, x_test, y_test)
pred_metrics = get_prediction_metrics(predictions)

In [42]:
print("FP Rate (FPR):", round(pred_metrics.weighted_fpr, 4))
print("FN Rate (FNR):", round(pred_metrics.weighted_fnr, 4))
print("Detection Rate:", round(pred_metrics.weighted_detection_rate, 4))
print("False Alarm Rate (FAR):", round(pred_metrics.false_alarm_rate, 4))
print("Accuracy: ", round(pred_metrics.classification_report.loc["accuracy"].iloc[0], 4))

pred_metrics.classification_report.columns = ["Precision", "Recall", "F1-score", "Quantity"]
pred_metrics.classification_report = pred_metrics.classification_report.astype({"Quantity": int})
pred_metrics.classification_report.drop("accuracy", axis=0, inplace=True)

print("\n", pred_metrics.classification_report)

FP Rate (FPR): 0.0002
FN Rate (FNR): 0.0782
Detection Rate: 0.9218
False Alarm Rate (FAR): 0.0392
Accuracy:  0.9852

                           Precision    Recall  F1-score  Quantity
BENIGN                     0.992337  0.989356  0.990845    568229
Bot                        0.993865  0.334711  0.500773       484
DDoS                       0.998696  0.986768  0.992696     31818
DoS_GoldenEye              0.975994  0.980625  0.978304      2529
DoS_Hulk                   0.966539  0.990601  0.978422     57561
DoS_Slowhttptest           0.884326  0.980273  0.929831      1318
DoS_slowloris              0.972279  0.982912  0.977566      1463
FTPPatator                 0.995970  0.990977  0.993467      1995
Heartbleed                 0.000000  0.000000  0.000000         4
Infiltration               0.000000  0.000000  0.000000         7
PortScan                   0.907606  0.936991  0.922065     39566
SSHPatator                 0.950369  0.975895  0.962963      1452
Web_Attack_Brute_Force  

In [43]:
model.save("models_save/100_classifier_standalone.h5")