In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report, f1_score
from sklearn.preprocessing import LabelEncoder, StandardScaler, MinMaxScaler
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.outliers_influence import variance_inflation_factor  
import warnings
import pickle
import os
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
def save_predictions(predictions, targets, timeout, save_path= "results/ET"):
    name_file_pred = 'predictions_idle_' + str(timeout[0]) + "_active_" + str(timeout[1]) + ".p"
    name_file_y = 'targets_idle_' + str(timeout[0]) + "_active_" + str(timeout[1]) + ".p"

    pickle.dump(predictions, open(os.path.join(save_path, name_file_pred), 'wb') )
    pickle.dump(targets, open(os.path.join(save_path, name_file_y), 'wb') )
    
def load_predictions(timeout, save_path= "results/ET"):
    name_file_pred = 'predictions_idle_' + str(timeout[0]) + "_active_" + str(timeout[1]) + ".p"
    name_file_y = 'targets_idle_' + str(timeout[0]) + "_active_" + str(timeout[1]) + ".p"
    
    predictions =  pickle.load(open(os.path.join(save_path, name_file_pred), 'rb') )
    targets =  pickle.load(open(os.path.join(save_path, name_file_y), 'rb') )
    return predictions, targets

In [3]:
timeouts = [(0.5,2), (1, 2), (2,2), (0.5,3), (1,3), (2, 3), (3,3), (0.5,4), (1, 4), (2,4), (3,4), (4,4), (0.5,5), (1,5), (2,5), (3,5), (4,5), (5,5), (0.5, 30), (1, 30), (2,30), (3,30), (4,30), (5,30), (10, 30), (0.5, 60), (1, 60), (2,60), (3,60), (4,60), (5,60), (10, 60)]

# Load predictions - ET-BaslineFeatures

In [5]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, roc_curve

results = {}

classes = ['Benign', 'Botnet', 'DDoS', 'DoS GoldenEye', 'DoS Hulk', 'DoS Slowhttptest', 'DoS Slowloris', 'FTP-Patator', 'Infiltration', 'Infiltration - Portscan',
 'Portscan', 'SSH-Patator', 'Web Attack - Brute Force', 'Web Attack - SQL Injection', 'Web Attack - XSS']
for timeout in timeouts:
    print("Loading timeout : ", timeout)
    idle, active = timeout
    pred, y_test = load_predictions(timeout, save_path= "../results/ET")
    report = classification_report(y_test, pred,  target_names=classes, digits=4)
    f1 = f1_score(y_true=y_test, y_pred=pred, average='macro')
    
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred, average='macro')
    recall = recall_score(y_test, pred, average='macro')
    
    results[str(timeout)] = [f1, accuracy, precision, recall]
    
    
dictResults = sorted(results.items(), key=lambda x: x[1])
df = pd.DataFrame(dictResults)

df.to_excel('cic17_ET_baselineFeatures.xlsx')
print("------------------- DONE -------------------")

Loading timeout :  (0.5, 2)
Loading timeout :  (1, 2)
Loading timeout :  (2, 2)
Loading timeout :  (0.5, 3)
Loading timeout :  (1, 3)
Loading timeout :  (2, 3)
Loading timeout :  (3, 3)
Loading timeout :  (0.5, 4)
Loading timeout :  (1, 4)
Loading timeout :  (2, 4)
Loading timeout :  (3, 4)
Loading timeout :  (4, 4)
Loading timeout :  (0.5, 5)
Loading timeout :  (1, 5)
Loading timeout :  (2, 5)
Loading timeout :  (3, 5)
Loading timeout :  (4, 5)
Loading timeout :  (5, 5)
Loading timeout :  (0.5, 30)
Loading timeout :  (1, 30)
Loading timeout :  (2, 30)
Loading timeout :  (3, 30)
Loading timeout :  (4, 30)
Loading timeout :  (5, 30)
Loading timeout :  (10, 30)
Loading timeout :  (0.5, 60)
Loading timeout :  (1, 60)
Loading timeout :  (2, 60)
Loading timeout :  (3, 60)
Loading timeout :  (4, 60)
Loading timeout :  (5, 60)
Loading timeout :  (10, 60)
------------------- DONE -------------------


# Load predictions - RF-BaslineFeatures

In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, roc_curve

results = {}

classes = ['Benign', 'Botnet', 'DDoS', 'DoS GoldenEye', 'DoS Hulk', 'DoS Slowhttptest', 'DoS Slowloris', 'FTP-Patator', 'Infiltration', 'Infiltration - Portscan',
 'Portscan', 'SSH-Patator', 'Web Attack - Brute Force', 'Web Attack - SQL Injection', 'Web Attack - XSS']
for timeout in timeouts:
    print("Loading timeout : ", timeout)
    idle, active = timeout
    pred, y_test = load_predictions(timeout, save_path= "../results/RF")
    report = classification_report(y_test, pred,  target_names=classes, digits=4)
    f1 = f1_score(y_true=y_test, y_pred=pred, average='macro')
    
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred, average='macro')
    recall = recall_score(y_test, pred, average='macro')
    
    results[str(timeout)] = [f1, accuracy, precision, recall]
    
    
dictResults = sorted(results.items(), key=lambda x: x[1])
df = pd.DataFrame(dictResults)

df.to_excel('cic17_RF_baselineFeatures.xlsx')
print("------------------- DONE -------------------")

Loading timeout :  (0.5, 2)
Loading timeout :  (1, 2)
Loading timeout :  (2, 2)
Loading timeout :  (0.5, 3)
Loading timeout :  (1, 3)
Loading timeout :  (2, 3)
Loading timeout :  (3, 3)
Loading timeout :  (0.5, 4)
Loading timeout :  (1, 4)
Loading timeout :  (2, 4)
Loading timeout :  (3, 4)
Loading timeout :  (4, 4)
Loading timeout :  (0.5, 5)
Loading timeout :  (1, 5)
Loading timeout :  (2, 5)
Loading timeout :  (3, 5)
Loading timeout :  (4, 5)
Loading timeout :  (5, 5)
Loading timeout :  (0.5, 30)
Loading timeout :  (1, 30)
Loading timeout :  (2, 30)
Loading timeout :  (3, 30)
Loading timeout :  (4, 30)
Loading timeout :  (5, 30)
Loading timeout :  (10, 30)
Loading timeout :  (0.5, 60)
Loading timeout :  (1, 60)
Loading timeout :  (2, 60)
Loading timeout :  (3, 60)
Loading timeout :  (4, 60)
Loading timeout :  (5, 60)
Loading timeout :  (10, 60)
------------------- DONE -------------------


# Load predictions - MLP-BaslineFeatures

In [7]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, auc, roc_curve

results = {}

classes = ['Benign', 'Botnet', 'DDoS', 'DoS GoldenEye', 'DoS Hulk', 'DoS Slowhttptest', 'DoS Slowloris', 'FTP-Patator', 'Infiltration', 'Infiltration - Portscan',
 'Portscan', 'SSH-Patator', 'Web Attack - Brute Force', 'Web Attack - SQL Injection', 'Web Attack - XSS']
for timeout in timeouts:
    print("Loading timeout : ", timeout)
    idle, active = timeout
    pred, y_test = load_predictions(timeout, save_path= "../results/MLP")
    report = classification_report(y_test, pred,  target_names=classes, digits=4)
    f1 = f1_score(y_true=y_test, y_pred=pred, average='macro')
    
    accuracy = accuracy_score(y_test, pred)
    precision = precision_score(y_test, pred, average='macro')
    recall = recall_score(y_test, pred, average='macro')
    
    results[str(timeout)] = [f1, accuracy, precision, recall]
    
    
dictResults = sorted(results.items(), key=lambda x: x[1])
df = pd.DataFrame(dictResults)

df.to_excel('cic17_MLP_baselineFeatures.xlsx')
print("------------------- DONE -------------------")

Loading timeout :  (0.5, 2)
Loading timeout :  (1, 2)
Loading timeout :  (2, 2)
Loading timeout :  (0.5, 3)
Loading timeout :  (1, 3)
Loading timeout :  (2, 3)
Loading timeout :  (3, 3)
Loading timeout :  (0.5, 4)
Loading timeout :  (1, 4)
Loading timeout :  (2, 4)
Loading timeout :  (3, 4)
Loading timeout :  (4, 4)
Loading timeout :  (0.5, 5)
Loading timeout :  (1, 5)
Loading timeout :  (2, 5)
Loading timeout :  (3, 5)
Loading timeout :  (4, 5)
Loading timeout :  (5, 5)
Loading timeout :  (0.5, 30)
Loading timeout :  (1, 30)
Loading timeout :  (2, 30)
Loading timeout :  (3, 30)
Loading timeout :  (4, 30)
Loading timeout :  (5, 30)
Loading timeout :  (10, 30)
Loading timeout :  (0.5, 60)
Loading timeout :  (1, 60)
Loading timeout :  (2, 60)
Loading timeout :  (3, 60)
Loading timeout :  (4, 60)
Loading timeout :  (5, 60)
Loading timeout :  (10, 60)
------------------- DONE -------------------
