In [1]:
import pandas as pd
from benchmarkUtils import Benchmark
pd.set_option("display.width", 10000)
pd.set_option("display.max_rows", 500)
pd.set_option("display.max_columns", 500)
pd.set_option("display.max_colwidth", None)
bu = Benchmark()
bu.import_df("./benchmark_results.csv")

In [2]:
common_latex_args = {
    "index":False,
    "escape":True,
    "formatters":{
        "Info": lambda s: s.replace("|correlation| > 0.1", "$|corr| > 0.1$")
    },
    "float_format":"{:.4f}".format
}

In [3]:
def pretty_print(model, to_latex=False):
    df = bu.display(model=model)
    df = df.rename(columns={"Time per data per iter": "Detection time (ms)"})
    df["Dataset"] = df["Dataset"].str.replace(" attacks", "")
    df["Info"] = df["Info"].str.replace("correlation", "corr")
    df = df.rename(columns={"Info":"Pipeline"})
    tmp = df.drop(columns=["Data size", "Model"]).set_index(["Pipeline", "Dataset"])
    tmp = tmp.round(5)
    display(tmp)
    df = bu.display(model=model)
    params=df[["Model", "Info"]].drop_duplicates().reset_index(drop=True).values
    for param in params:
        print("Pipeline: ", param[1])
        print("Params  : ", param[0])
        print()
        
    latex_args = common_latex_args
    latex_args["index"] = True
    if to_latex:
        print(tmp.to_latex(multirow=True, multicolumn=True, **latex_args))

### Results of each model

In [4]:
pretty_print("SVM")

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Pipeline,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
All features scaled,Known,0.99757,0.99712,0.9973,0.99721,518.72968
All features scaled,Similar,0.99046,0.99466,0.98113,0.98785,1037.26303
All features scaled,New,0.76714,0.65625,0.00946,0.01866,2984.30678
|corr| > 0.1 features scaled,Known,0.99577,0.99711,0.99315,0.99513,362.89114
|corr| > 0.1 features scaled,Similar,0.98902,0.99755,0.97462,0.98595,749.06375
|corr| > 0.1 features scaled,New,0.76562,0.38255,0.00321,0.00637,2147.51506
All features with 95% PCA,Known,0.99686,0.9955,0.9973,0.9964,689.6471
All features with 95% PCA,Similar,0.99106,0.99688,0.98046,0.9886,1514.05368
All features with 95% PCA,New,0.76682,0.61966,0.00817,0.01612,3798.19162
|corr| > 0.1 features with 95% PCA,Known,0.99647,0.99855,0.99333,0.99594,428.19019


Pipeline:  All features scaled
Params  :  SVM {'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  SVM {'C': 1000, 'gamma': 'scale', 'kernel': 'rbf'}

Pipeline:  All features with 95% PCA
Params  :  SVM {'C': 100, 'gamma': 'auto', 'kernel': 'rbf'}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  SVM {'C': 1000, 'gamma': 'auto', 'kernel': 'rbf'}



In [5]:
pretty_print(model="GaussianNB")

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Pipeline,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
All features scaled,Known,0.81738,0.70689,0.99117,0.82523,9.91684
All features scaled,Similar,0.79484,0.66106,0.98716,0.79186,19.19476
All features scaled,New,0.69134,0.31335,0.2682,0.28902,50.29495
|corr| > 0.1 features scaled,Known,0.82601,0.7159,0.99477,0.8326,4.69666
|corr| > 0.1 features scaled,Similar,0.80567,0.67189,0.99368,0.8017,7.98878
|corr| > 0.1 features scaled,New,0.6997,0.31935,0.25084,0.28098,28.35442
All features with 95% PCA,Known,0.78556,0.67167,0.99189,0.80096,15.76805
All features with 95% PCA,Similar,0.76001,0.62474,0.98391,0.76423,30.44038
All features with 95% PCA,New,0.66436,0.38287,0.71079,0.49767,83.33984
|corr| > 0.1 features with 95% PCA,Known,0.93456,0.92272,0.92721,0.92496,12.79858


Pipeline:  All features scaled
Params  :  GaussianNB {'var_smoothing': 1.519911082952933e-07}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  GaussianNB {'var_smoothing': 4.328761281083053e-06}

Pipeline:  All features with 95% PCA
Params  :  GaussianNB {'var_smoothing': 4.328761281083053e-06}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  GaussianNB {'var_smoothing': 0.0005336699231206307}



In [6]:
pretty_print(model="Logistic Regression")

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Pipeline,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
All features scaled,Known,0.99334,0.98996,0.99477,0.99236,6.33683
All features scaled,Similar,0.98004,0.99511,0.95421,0.97423,18.70627
All features scaled,New,0.77448,0.84507,0.04394,0.08353,36.03333
|corr| > 0.1 features scaled,Known,0.98965,0.98323,0.99315,0.98817,11.0295
|corr| > 0.1 features scaled,Similar,0.98413,0.98764,0.97203,0.97977,15.14763
|corr| > 0.1 features scaled,New,0.78468,0.822,0.10145,0.18061,24.30066
All features with 95% PCA,Known,0.9797,0.97419,0.97928,0.97673,16.59493
All features with 95% PCA,Similar,0.98251,0.98343,0.97213,0.97775,22.98899
All features with 95% PCA,New,0.76731,0.53333,0.04191,0.07771,72.39632
|corr| > 0.1 features with 95% PCA,Known,0.97845,0.97377,0.97676,0.97526,7.6283


Pipeline:  All features scaled
Params  :  Logistic Regression {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  Logistic Regression {'C': 0.01, 'penalty': 'none', 'solver': 'newton-cg'}

Pipeline:  All features with 95% PCA
Params  :  Logistic Regression {'C': 1, 'penalty': 'l2', 'solver': 'liblinear'}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  Logistic Regression {'C': 1, 'penalty': 'l1', 'solver': 'liblinear'}



In [7]:
pretty_print(model="XGBClassifier")

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Pipeline,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
All features scaled,Known,0.99984,0.99982,0.99982,0.99982,39.1346
All features scaled,Similar,0.98917,0.99931,0.97328,0.98612,51.63573
All features scaled,New,0.76233,0.27559,0.00986,0.01904,126.80479
|corr| > 0.1 features scaled,Known,0.99976,0.99964,0.99982,0.99973,22.74885
|corr| > 0.1 features scaled,Similar,0.99084,0.99922,0.97759,0.98828,46.50661
|corr| > 0.1 features scaled,New,0.77997,0.75095,0.08884,0.15888,122.58658
All features with 95% PCA,Known,0.99577,0.99442,0.99586,0.99514,89.67574
All features with 95% PCA,Similar,0.99148,0.99631,0.98209,0.98915,105.65433
All features with 95% PCA,New,0.76745,0.60196,0.01729,0.03362,169.95784
|corr| > 0.1 features with 95% PCA,Known,0.99671,0.99604,0.9964,0.99622,76.1154


Pipeline:  All features scaled
Params  :  XGBClassifier {'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 800, 'scale_pos_weight': 1}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  XGBClassifier {'gamma': 0.1, 'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 800, 'scale_pos_weight': 5}

Pipeline:  All features with 95% PCA
Params  :  XGBClassifier {'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 200, 'scale_pos_weight': 1}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  XGBClassifier {'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 10, 'n_estimators': 100, 'scale_pos_weight': 1}



In [8]:
pretty_print(model="Random Forest")

Unnamed: 0_level_0,Unnamed: 1_level_0,Accuracy,Precision,Recall,F1,Detection time (ms)
Pipeline,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
All features scaled,Known,0.99984,0.99982,0.99982,0.99982,140.3135
All features scaled,Similar,0.98747,1.0,0.9683,0.98389,242.98107
All features scaled,New,0.76682,1.0,0.00315,0.00629,622.69861
|corr| > 0.1 features scaled,Known,0.99984,0.99982,0.99982,0.99982,68.07935
|corr| > 0.1 features scaled,Similar,0.98864,0.9998,0.97146,0.98543,111.21453
|corr| > 0.1 features scaled,New,0.76724,0.92308,0.00541,0.01075,308.59882
All features with 95% PCA,Known,0.9942,0.99174,0.99495,0.99334,157.93166
All features with 95% PCA,Similar,0.98917,0.99532,0.9772,0.98618,232.80571
All features with 95% PCA,New,0.76682,0.57821,0.01166,0.02286,525.65986
|corr| > 0.1 features with 95% PCA,Known,0.99686,0.99622,0.99658,0.9964,84.76016


Pipeline:  All features scaled
Params  :  Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 200}

Pipeline:  |correlation| > 0.1 features scaled
Params  :  Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

Pipeline:  All features with 95% PCA
Params  :  Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}

Pipeline:  |correlation| > 0.1 features with 95% PCA
Params  :  Random Forest {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}



In [9]:
def pretty_print_df(df):
    tmp = df.copy()
    tmp["Model"] = tmp["Model"].str.replace("{.*}", "", regex=True)
    tmp["Info"] = tmp["Info"].str.replace("correlation", "corr")
    tmp = tmp.set_index("Model")
    tmp = tmp.drop(columns=["Dataset", "Data size"])
    tmp = tmp.rename(columns={"Time per data per iter":"Detection time (ms)","Info":"Pipeline"})
    tmp = tmp.round(5)
    return tmp

### Results on New dataset

In [10]:
pretty_print_df(bu.display(dataset="New", sort_by="Time per data per iter", ascending=True, top=10))

Unnamed: 0_level_0,Pipeline,Accuracy,Precision,Recall,F1,Detection time (ms)
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Logistic Regression,|corr| > 0.1 features scaled,0.78468,0.822,0.10145,0.18061,24.30066
GaussianNB,|corr| > 0.1 features scaled,0.6997,0.31935,0.25084,0.28098,28.35442
Logistic Regression,|corr| > 0.1 features with 95% PCA,0.76736,0.5458,0.03256,0.06145,30.20472
Logistic Regression,All features scaled,0.77448,0.84507,0.04394,0.08353,36.03333
GaussianNB,|corr| > 0.1 features with 95% PCA,0.79337,0.72914,0.18561,0.2959,41.31183
GaussianNB,All features scaled,0.69134,0.31335,0.2682,0.28902,50.29495
Logistic Regression,All features with 95% PCA,0.76731,0.53333,0.04191,0.07771,72.39632
GaussianNB,All features with 95% PCA,0.66436,0.38287,0.71079,0.49767,83.33984
XGBClassifier,|corr| > 0.1 features scaled,0.77997,0.75095,0.08884,0.15888,122.58658
XGBClassifier,All features scaled,0.76233,0.27559,0.00986,0.01904,126.80479


In [11]:
pretty_print_df(bu.display(dataset="New", sort_by="F1", ascending=False, top=10))

Unnamed: 0_level_0,Pipeline,Accuracy,Precision,Recall,F1,Detection time (ms)
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GaussianNB,All features with 95% PCA,0.66436,0.38287,0.71079,0.49767,83.33984
GaussianNB,|corr| > 0.1 features with 95% PCA,0.79337,0.72914,0.18561,0.2959,41.31183
GaussianNB,All features scaled,0.69134,0.31335,0.2682,0.28902,50.29495
GaussianNB,|corr| > 0.1 features scaled,0.6997,0.31935,0.25084,0.28098,28.35442
Logistic Regression,|corr| > 0.1 features scaled,0.78468,0.822,0.10145,0.18061,24.30066
XGBClassifier,|corr| > 0.1 features scaled,0.77997,0.75095,0.08884,0.15888,122.58658
Logistic Regression,All features scaled,0.77448,0.84507,0.04394,0.08353,36.03333
Logistic Regression,All features with 95% PCA,0.76731,0.53333,0.04191,0.07771,72.39632
Logistic Regression,|corr| > 0.1 features with 95% PCA,0.76736,0.5458,0.03256,0.06145,30.20472
XGBClassifier,All features with 95% PCA,0.76745,0.60196,0.01729,0.03362,169.95784


In [12]:
pretty_print_df(bu.display(dataset="New", sort_by="Recall", ascending=False, top=5))

Unnamed: 0_level_0,Pipeline,Accuracy,Precision,Recall,F1,Detection time (ms)
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GaussianNB,All features with 95% PCA,0.66436,0.38287,0.71079,0.49767,83.33984
GaussianNB,All features scaled,0.69134,0.31335,0.2682,0.28902,50.29495
GaussianNB,|corr| > 0.1 features scaled,0.6997,0.31935,0.25084,0.28098,28.35442
GaussianNB,|corr| > 0.1 features with 95% PCA,0.79337,0.72914,0.18561,0.2959,41.31183
Logistic Regression,|corr| > 0.1 features scaled,0.78468,0.822,0.10145,0.18061,24.30066


In [13]:
pretty_print_df(bu.display(dataset="New", sort_by="Precision", ascending=False, top=5))

Unnamed: 0_level_0,Pipeline,Accuracy,Precision,Recall,F1,Detection time (ms)
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Random Forest,All features scaled,0.76682,1.0,0.00315,0.00629,622.69861
Random Forest,|corr| > 0.1 features scaled,0.76724,0.92308,0.00541,0.01075,308.59882
Logistic Regression,All features scaled,0.77448,0.84507,0.04394,0.08353,36.03333
Logistic Regression,|corr| > 0.1 features scaled,0.78468,0.822,0.10145,0.18061,24.30066
XGBClassifier,|corr| > 0.1 features scaled,0.77997,0.75095,0.08884,0.15888,122.58658


In [14]:
pretty_print_df(bu.display(dataset="New", sort_by="Accuracy", ascending=False, top=5))

Unnamed: 0_level_0,Pipeline,Accuracy,Precision,Recall,F1,Detection time (ms)
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
GaussianNB,|corr| > 0.1 features with 95% PCA,0.79337,0.72914,0.18561,0.2959,41.31183
Logistic Regression,|corr| > 0.1 features scaled,0.78468,0.822,0.10145,0.18061,24.30066
XGBClassifier,|corr| > 0.1 features scaled,0.77997,0.75095,0.08884,0.15888,122.58658
Logistic Regression,All features scaled,0.77448,0.84507,0.04394,0.08353,36.03333
XGBClassifier,|corr| > 0.1 features with 95% PCA,0.76748,0.69203,0.01076,0.02119,138.88963
