# CSE-CIC-IDS 2017 Ensembles - Voting Classifier

In [1]:
model_id = "voting3-multiclass-rf-dnn-knn"

In [2]:
import numpy as np
np.set_printoptions(suppress=True)
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import tensorflow as tf

import glob, pickle, time, keras

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.


In [3]:
NOTEBOOK_PATH = "C:/Users/Xetrov/Desktop/SciFair20/Code/"

In [4]:
x_scaled = pd.read_csv(NOTEBOOK_PATH + "IDS2017/x_scaled_powertransform.csv")

In [5]:
y_df_enc = pd.read_csv(NOTEBOOK_PATH + "IDS2017/y_grouped_1henc.csv")

# Split data into train and test

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
x_train, x_valtest, y_train, y_valtest = train_test_split(x_scaled, y_df_enc, test_size = 0.4, random_state = 42)

In [8]:
x_val, x_test, y_val, y_test = train_test_split(x_valtest, y_valtest, test_size = 0.5, random_state = 42)

In [9]:
del x_valtest 
del y_valtest

# Ensemble Models

In [10]:
from sklearn.ensemble import VotingClassifier

In [11]:
rf = pickle.load(open(NOTEBOOK_PATH + "Models/randomforest4-adasyn [20191017 1939].pkl", "rb"))

In [12]:
knn = pickle.load(open(NOTEBOOK_PATH + "Models/knearestneighbors-distance [20191013 1528].pkl", "rb"))

In [13]:
dnn = keras.models.load_model(NOTEBOOK_PATH + "Models/dnn4-adasyn/dnn(97).h5")

In [14]:
dnnsk = tf.keras.wrappers.scikit_learn.KerasClassifier(lambda: dnn)

In [45]:
class CustomVotingClassifier(object):
    """ Implements a voting classifier for pre-trained classifiers"""
    def __init__(self, estimators):
        if type(estimators) is list:
            self.names = None
            self.estimators = estimators
        elif type(estimators) is dict:
            self.names = list(estimators.keys())
            self.estimators = list(estimators.values())
        else:
            raise TypeError("Must pass a List or Dictionary")

    def predict(self, X):
        # get values
        Y = np.zeros([X.shape[0], len(self.estimators)], dtype=int)
        for i, clf in enumerate(self.estimators):
            if self.names is not None: print(f"Predicting with {self.names[i]}...")
            Y[:, i] = np.squeeze(clf.predict(X).round().argmax(axis=1))
        # apply voting 
        y = np.zeros(X.shape[0])
        for i in range(X.shape[0]):
            y[i] = np.argmax(np.bincount(Y[i,:]))
        return y

In [46]:
ensemble = CustomVotingClassifier({"Random Forest": rf, "K-Nearest Neighbors": knn, "Dense Neural Network": dnn})

# Test Set

In [50]:
from sklearn.metrics import f1_score

In [51]:
pred = ensemble.predict(x_test)

Predicting with Random Forest...


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=1)]: Done 100 out of 100 | elapsed:   26.8s finished


Predicting with K-Nearest Neighbors...
Predicting with Dense Neural Network...


In [55]:
y_test_npy = y_test.to_numpy().argmax(axis=1)

f1_micro = f1_score(y_test_npy, pred, average='micro')
print("F1 Micro:", f1_micro)

f1_macro = f1_score(y_test_npy, pred, average='macro')
print("F1 Macro:", f1_macro)

F1 Micro: 0.9988395281100911
F1 Macro: 0.9316011215592404


In [25]:
pred_series = pd.Series(pred, name="Pred").replace({0: 'Benign', 1: 'Attack'})
y_series = pd.Series(y_test.to_numpy().ravel(), name="Actual").replace({0: 'Benign', 1: 'Attack'})

matrix = pd.crosstab(pred_series, y_series, margins=True)
matrix

Actual,Attack,Benign,All
Pred,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Attack,111226,442,111668
Benign,133,454348,454481
All,111359,454790,566149


# Log results

In [22]:
model_log = open(NOTEBOOK_PATH + "model_log.txt", "a")

model_log.write("\n" + model_filename)
model_log.write("\n\tF1 Micro: " + str(f1_micro))
model_log.write("\n\tF1 Macro: " + str(f1_macro))

model_log.close()