In [1]:
import os
import logging
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf

from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.utils import plot_model
import tensorflow.keras as keras

In [2]:
MODEL_DIR_PATH = "/Users/smalih/CICIDS_models/my_model"
PROCESSED_DIR_PATH = "/Users/smalih/CICIDS_models/processed_dataset"

In [3]:
def preprocessing(df: pd.DataFrame) -> (np.ndarray, np.ndarray):
    # Shuffle the dataset
    df = df.sample(frac=1)

    # Split features and labels
    x = df.iloc[:, df.columns != 'Label']
    y = df[['Label']].to_numpy()

    # Scale the features between 0 ~ 1
    scaler = np.load(os.path.join(PROCESSED_DIR_PATH, 'x_scaler.pkl'), allow_pickle=True)
    x = scaler.fit_transform(x)

    return x, y


def reshape_dataset_cnn(x: np.ndarray) -> np.ndarray:
    # Add padding columns
    result = np.zeros((x.shape[0], 49)) # changed 81 to 49 as 48 features
    result[:, :-1] = x # changed -3 to -1 as only one column is padding

    # Reshaping dataset
    result = np.reshape(result, (result.shape[0], 7, 7))
    result = result[..., tf.newaxis]
    return result


In [4]:
def load_model(model_dir):
    model = keras.models.load_model(os.path.join(model_dir, '06_cnn.h5'))
    return model

def load_data(data_path):
    df = pd.read_csv(data_path)
    # Split features and labels
    scaler = MinMaxScaler()
    x = scaler.fit_transform(df)
    return reshape_dataset_cnn(x)


In [5]:
def get_predictions(model, data, label_encoder):
    y_pred = model.predict(data, batch_size=1024, verbose=False)
    y_pred = label_encoder[np.argmax(y_pred, axis=1)]
    return y_pred

In [6]:
X = load_data("/Users/smalih/CICIDS_models/dataset/data_collect.csv")
model = load_model(MODEL_DIR_PATH)



In [7]:

preds = model.predict(X)

[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


In [8]:
preds

array([[1.0000000e+00, 1.7522338e-20, 0.0000000e+00, ..., 3.0216349e-35,
        0.0000000e+00, 0.0000000e+00],
       [1.0000000e+00, 6.3934347e-20, 1.0639994e-37, ..., 1.7028457e-34,
        7.7369655e-29, 1.1087574e-32],
       [1.0000000e+00, 1.0086480e-13, 1.1419795e-22, ..., 1.0478001e-28,
        1.0001425e-20, 4.3716058e-33],
       ...,
       [1.0000000e+00, 7.4887954e-24, 0.0000000e+00, ..., 0.0000000e+00,
        0.0000000e+00, 0.0000000e+00],
       [1.0000000e+00, 1.8535511e-23, 4.2009028e-29, ..., 3.9245816e-38,
        0.0000000e+00, 0.0000000e+00],
       [9.9999988e-01, 7.6940505e-22, 1.5032325e-33, ..., 5.8073453e-32,
        1.9414780e-20, 2.5685375e-37]], dtype=float32)

In [9]:
print(preds)

[[1.0000000e+00 1.7522338e-20 0.0000000e+00 ... 3.0216349e-35
  0.0000000e+00 0.0000000e+00]
 [1.0000000e+00 6.3934347e-20 1.0639994e-37 ... 1.7028457e-34
  7.7369655e-29 1.1087574e-32]
 [1.0000000e+00 1.0086480e-13 1.1419795e-22 ... 1.0478001e-28
  1.0001425e-20 4.3716058e-33]
 ...
 [1.0000000e+00 7.4887954e-24 0.0000000e+00 ... 0.0000000e+00
  0.0000000e+00 0.0000000e+00]
 [1.0000000e+00 1.8535511e-23 4.2009028e-29 ... 3.9245816e-38
  0.0000000e+00 0.0000000e+00]
 [9.9999988e-01 7.6940505e-22 1.5032325e-33 ... 5.8073453e-32
  1.9414780e-20 2.5685375e-37]]


In [10]:
import sys

np.set_printoptions(suppress=True)
np.set_printoptions(threshold=sys.maxsize)

In [11]:
label_encoder = np.load(os.path.join(PROCESSED_DIR_PATH, 'label_encoder.npy'), allow_pickle=True)

print(label_encoder)

['BENIGN' 'Bot' 'DDoS' 'DoS GoldenEye' 'DoS Hulk' 'DoS Slowhttptest'
 'DoS slowloris' 'FTP-Patator' 'Heartbleed' 'Infiltration' 'PortScan'
 'SSH-Patator' 'Web Attack-Brute Force' 'Web Attack-Sql Injection'
 'Web Attack-XSS']


In [12]:
preds = get_predictions(model, X, label_encoder)

In [13]:
attacks = preds[preds != 'BENIGN']

In [14]:
attacks

array(['Heartbleed', 'Heartbleed', 'Heartbleed', 'DDoS', 'DoS slowloris',
       'PortScan', 'DoS Hulk'], dtype=object)