In [1]:
import pandas as pd
import numpy as np
import copy

import xgboost as xgb
from tqdm import tqdm
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold

import warnings
warnings.filterwarnings("ignore")

In [2]:
d = pd.read_csv("results/attacks_diagnoses.csv")
d = d[(d["dataset"] != "mfeat-morphological") & (d["attack"] != "lpf")]
d["attack_binary"] = np.where(d["attack"]=="org", 0, 1)

In [3]:
def q0(x: float) -> float:
    return x.quantile(0)


def q25(x: float) -> float:
    return x.quantile(0.25)


def q50(x: float) -> float:
    return x.quantile(0.5)


def q75(x: float) -> float:
    return x.quantile(0.75)


def q1(x: float) -> float:
    return x.quantile(1)


def minmax(x: float) -> float:
    return x.max() - x.min()

def false_negative_rate(y_true, y_pred):
    if confusion_matrix(y_true, y_pred).shape == (2, 2):
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        return fn / (fn + tp)
    else:
        return np.nan

attrs_con = d
cols_to_drop = ["approx", "target", "pred", "error", "name",
                    "overall_mean_target", "scores",
                    "mean_target_in_neighborhood",
                    "mean_approx_in_neighborhood",
                    "neighborhood_size_div_model_avg",
                    "neighborhood_size_pct",
                    "r_centered_entropy",
                "entropy",
                    "logk_r_centered_entropy",
                    "bacc_test"]

attrs_cols = attrs_con.drop(columns=cols_to_drop)
attrs_agg = attrs_cols \
    .groupby(["dataset", "model", "attack",
              "n_test", "n_classes", "attack_binary"]) \
    .agg(['mean', q0, q25, q50, q75, q1, minmax])
attrs_agg_correct_cols = copy.deepcopy(attrs_agg)
attrs_agg_correct_cols.columns = list(attrs_agg_correct_cols.columns.map('_'.join))
attrs_agg_correct_cols = attrs_agg_correct_cols.reset_index()

In [4]:
d_nn = pd.read_csv("results/attacks_diagnoses_nn.csv")
d_nn["attack_binary"] = np.where(d_nn["attack"]=="org", 0, 1)

attrs_cols = d_nn.drop(columns=cols_to_drop)
attrs_agg = attrs_cols \
    .groupby(["dataset", "model", "attack",
              "n_test", "n_classes", "attack_binary"]) \
    .agg(['mean', q0, q25, q50, q75, q1, minmax])
attrs_agg_correct_cols_nn = copy.deepcopy(attrs_agg)
attrs_agg_correct_cols_nn.columns = list(attrs_agg_correct_cols_nn.columns.map('_'.join))
attrs_agg_correct_cols_nn = attrs_agg_correct_cols_nn.reset_index()

In [50]:
attrs_agg_correct_cols_nn.to_csv("results/attrs_agg_correct_cols_nn.csv", index=False)

In [5]:
attrs_agg_correct_cols = pd.concat([attrs_agg_correct_cols, attrs_agg_correct_cols_nn])

In [6]:
np.unique(attrs_agg_correct_cols["attack_binary"], return_counts=True)

(array([0, 1]), array([ 88, 284]))

In [7]:
data_counts = attrs_agg_correct_cols[["dataset", "model", "attack"]].groupby(["dataset", "model"]).size().reset_index(name='counts')
data_counts

Unnamed: 0,dataset,model,counts
0,Bioresponse,lin,4
1,Bioresponse,nn,5
2,Bioresponse,svm,4
3,Bioresponse,xgb,4
4,churn,lin,4
...,...,...,...
83,wdbc,xgb,4
84,wilt,lin,4
85,wilt,nn,5
86,wilt,svm,4


In [8]:
data_counts_model_type = attrs_agg_correct_cols[["dataset", "model", "attack"]].groupby(["dataset", "attack"]).size().reset_index(name='counts')
data_counts_model_type

Unnamed: 0,dataset,attack,counts
0,Bioresponse,bim,1
1,Bioresponse,fgm,1
2,Bioresponse,hsj,3
3,Bioresponse,noise,1
4,Bioresponse,org,4
...,...,...,...
171,wilt,noise,1
172,wilt,org,4
173,wilt,per,3
174,wilt,pgd,1


In [9]:
le = LabelEncoder()
x_train = attrs_agg_correct_cols.drop(columns=["dataset", "model", "attack", "attack_binary"])
y_train = attrs_agg_correct_cols["attack_binary"]
y_train_enc = le.fit_transform(y_train)
model = RandomForestClassifier(random_state=123)
model.fit(x_train, y_train_enc)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [10]:
fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
fi_df = pd.DataFrame(fi_dict)
fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)

In [11]:
attrs_agg_correct_cols.to_csv("results/attr_attacks_binary_agr_nn_bacc.csv", index=False)

In [12]:
def create_hyperparams_grid(model, param_grid, exp_function, crit="kappa"):

    quality_measure = []
    params_list = []
    results_table = []
    results_fi = []

    for param1 in param_grid[list(param_grid.keys())[0]]:
        for param2 in param_grid[list(param_grid.keys())[1]]:
            for param3 in param_grid[list(param_grid.keys())[2]]:
                params = {list(param_grid.keys())[0]: param1,
                          list(param_grid.keys())[1]: param2,
                          list(param_grid.keys())[2]: param3}

                model.set_params(**params)
                summary, fi = exp_function(model=model)
                results_table.append(summary)
                results_fi.append(fi)
                params_list.append(params)
                crit_value = np.mean(summary["kappa"])
                quality_measure.append(crit_value)
                print(f"{crit} = {crit_value}, params: {params}")

    best_params_idx = np.argmax(quality_measure)
    best_params = params_list[best_params_idx]

    print(f"Params: {best_params} give best {crit} equal {quality_measure[best_params_idx]}")

    return results_table[best_params_idx], results_fi[best_params_idx]

# Leave-one-data-set-out

In [13]:
def leave_dataset_out(model=xgb.XGBClassifier(random_state=123)):

    datasets = np.unique(attrs_agg_correct_cols["dataset"])
    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    fi_all = pd.DataFrame()

    for dataset in tqdm(datasets):

        train = attrs_agg_correct_cols[attrs_agg_correct_cols["dataset"] != dataset]
        test = attrs_agg_correct_cols[attrs_agg_correct_cols["dataset"] == dataset]

        x_train = train.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_train = train["attack_binary"]

        x_test = test.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_test = test["attack_binary"]

        le = LabelEncoder()
        y_train_enc = le.fit_transform(y_train)
        y_test_enc = le.transform(y_test)

        model.fit(x_train, y_train_enc)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["dataset"] = dataset
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test_enc, preds))
        kappa_list.append(cohen_kappa_score(y_test_enc, preds))
        precision_list.append(precision_score(y_test_enc, preds))
        recall_list.append(recall_score(y_test_enc, preds))
        f1_list.append(f1_score(y_test_enc, preds))
        fnr_list.append(false_negative_rate(y_test_enc, preds))

    results_dict = {'dataset': datasets, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

### Default parameters

In [14]:
one_dataset_out_xgb, one_dataset_out_fi_xgb = leave_dataset_out()
one_dataset_out_xgb

100%|██████████| 22/22 [00:08<00:00,  2.74it/s]


Unnamed: 0,dataset,bacc,kappa,precision,recall,f1,fnr
0,Bioresponse,0.961538,0.849558,1.0,0.923077,0.96,0.076923
1,churn,1.0,1.0,1.0,1.0,1.0,0.0
2,cmc,1.0,1.0,1.0,1.0,1.0,0.0
3,cnae-9,0.625,0.337662,0.8125,1.0,0.896552,0.0
4,dna,0.961538,0.849558,1.0,0.923077,0.96,0.076923
5,har,0.884615,0.610687,1.0,0.769231,0.869565,0.230769
6,madelon,0.5,0.0,0.75,1.0,0.857143,0.0
7,mfeat-factors,1.0,1.0,1.0,1.0,1.0,0.0
8,mfeat-fourier,1.0,1.0,1.0,1.0,1.0,0.0
9,mfeat-karhunen,1.0,1.0,1.0,1.0,1.0,0.0


In [15]:
print(one_dataset_out_xgb.to_latex(index=False,
                  formatters={"name": str.upper},
                  float_format="{:.1f}".format,
))

\begin{tabular}{lrrrrrr}
\toprule
dataset & bacc & kappa & precision & recall & f1 & fnr \\
\midrule
Bioresponse & 1.0 & 0.8 & 1.0 & 0.9 & 1.0 & 0.1 \\
churn & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
cmc & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
cnae-9 & 0.6 & 0.3 & 0.8 & 1.0 & 0.9 & 0.0 \\
dna & 1.0 & 0.8 & 1.0 & 0.9 & 1.0 & 0.1 \\
har & 0.9 & 0.6 & 1.0 & 0.8 & 0.9 & 0.2 \\
madelon & 0.5 & 0.0 & 0.8 & 1.0 & 0.9 & 0.0 \\
mfeat-factors & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
mfeat-fourier & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
mfeat-karhunen & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
mfeat-zernike & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
nomao & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
optdigits & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
pendigits & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
phoneme & 0.6 & 0.3 & 0.8 & 1.0 & 0.9 & 0.0 \\
qsar-biodeg & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
satimage & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
semeion & 0.8 & 0.5 & 1.0 & 0.7 & 0.8 & 0.3 \\
spambase & 1.0 & 1.0 & 1.0 & 1.0

### Hyperparameters tuning

In [16]:
model=xgb.XGBClassifier(random_state=123)

param_grid = {
    'max_depth': [6, 9, 12],
    'learning_rate': [0.1, 0.3, 0.5],
    'n_estimators': [100, 200, 500]
}

one_dataset_out_xgb, one_dataset_out_fi_xgb = create_hyperparams_grid(model=model, param_grid=param_grid, exp_function=leave_dataset_out, crit="kappa")
one_dataset_out_xgb

100%|██████████| 22/22 [00:08<00:00,  2.65it/s]


kappa = 0.8049589809154892, params: {'max_depth': 6, 'learning_rate': 0.1, 'n_estimators': 100}


100%|██████████| 22/22 [00:11<00:00,  1.84it/s]


kappa = 0.7981206864665754, params: {'max_depth': 6, 'learning_rate': 0.1, 'n_estimators': 200}


100%|██████████| 22/22 [00:18<00:00,  1.18it/s]


kappa = 0.7909508195832221, params: {'max_depth': 6, 'learning_rate': 0.1, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  4.24it/s]


kappa = 0.8039378325702351, params: {'max_depth': 6, 'learning_rate': 0.3, 'n_estimators': 100}


100%|██████████| 22/22 [00:09<00:00,  2.38it/s]


kappa = 0.809235393808974, params: {'max_depth': 6, 'learning_rate': 0.3, 'n_estimators': 200}


100%|██████████| 22/22 [00:22<00:00,  1.00s/it]


kappa = 0.7823864055897475, params: {'max_depth': 6, 'learning_rate': 0.3, 'n_estimators': 500}


100%|██████████| 22/22 [00:08<00:00,  2.52it/s]


kappa = 0.7708638238390051, params: {'max_depth': 6, 'learning_rate': 0.5, 'n_estimators': 100}


100%|██████████| 22/22 [00:20<00:00,  1.10it/s]


kappa = 0.7716060853234237, params: {'max_depth': 6, 'learning_rate': 0.5, 'n_estimators': 200}


100%|██████████| 22/22 [00:18<00:00,  1.16it/s]


kappa = 0.7475704126828411, params: {'max_depth': 6, 'learning_rate': 0.5, 'n_estimators': 500}


100%|██████████| 22/22 [00:07<00:00,  3.07it/s]


kappa = 0.8234856971716127, params: {'max_depth': 9, 'learning_rate': 0.1, 'n_estimators': 100}


100%|██████████| 22/22 [00:09<00:00,  2.23it/s]


kappa = 0.7981206864665754, params: {'max_depth': 9, 'learning_rate': 0.1, 'n_estimators': 200}


100%|██████████| 22/22 [00:18<00:00,  1.20it/s]


kappa = 0.7901065311283143, params: {'max_depth': 9, 'learning_rate': 0.1, 'n_estimators': 500}


100%|██████████| 22/22 [00:06<00:00,  3.26it/s]


kappa = 0.801118993637095, params: {'max_depth': 9, 'learning_rate': 0.3, 'n_estimators': 100}


100%|██████████| 22/22 [00:11<00:00,  1.98it/s]


kappa = 0.7884513334284513, params: {'max_depth': 9, 'learning_rate': 0.3, 'n_estimators': 200}


100%|██████████| 22/22 [00:26<00:00,  1.22s/it]


kappa = 0.8024337430530452, params: {'max_depth': 9, 'learning_rate': 0.3, 'n_estimators': 500}


100%|██████████| 22/22 [00:11<00:00,  1.90it/s]


kappa = 0.7687128906802125, params: {'max_depth': 9, 'learning_rate': 0.5, 'n_estimators': 100}


100%|██████████| 22/22 [00:12<00:00,  1.76it/s]


kappa = 0.7669602705260058, params: {'max_depth': 9, 'learning_rate': 0.5, 'n_estimators': 200}


100%|██████████| 22/22 [00:20<00:00,  1.08it/s]


kappa = 0.7555922580940669, params: {'max_depth': 9, 'learning_rate': 0.5, 'n_estimators': 500}


100%|██████████| 22/22 [00:09<00:00,  2.41it/s]


kappa = 0.8234856971716127, params: {'max_depth': 12, 'learning_rate': 0.1, 'n_estimators': 100}


100%|██████████| 22/22 [00:11<00:00,  1.84it/s]


kappa = 0.7981206864665754, params: {'max_depth': 12, 'learning_rate': 0.1, 'n_estimators': 200}


100%|██████████| 22/22 [00:25<00:00,  1.15s/it]


kappa = 0.7901065311283143, params: {'max_depth': 12, 'learning_rate': 0.1, 'n_estimators': 500}


100%|██████████| 22/22 [00:09<00:00,  2.32it/s]


kappa = 0.801118993637095, params: {'max_depth': 12, 'learning_rate': 0.3, 'n_estimators': 100}


100%|██████████| 22/22 [00:18<00:00,  1.17it/s]


kappa = 0.7884513334284513, params: {'max_depth': 12, 'learning_rate': 0.3, 'n_estimators': 200}


100%|██████████| 22/22 [00:28<00:00,  1.28s/it]


kappa = 0.8024337430530452, params: {'max_depth': 12, 'learning_rate': 0.3, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  4.25it/s]


kappa = 0.7687128906802125, params: {'max_depth': 12, 'learning_rate': 0.5, 'n_estimators': 100}


100%|██████████| 22/22 [00:06<00:00,  3.41it/s]


kappa = 0.7669602705260058, params: {'max_depth': 12, 'learning_rate': 0.5, 'n_estimators': 200}


100%|██████████| 22/22 [00:11<00:00,  1.92it/s]

kappa = 0.7555922580940669, params: {'max_depth': 12, 'learning_rate': 0.5, 'n_estimators': 500}
Params: {'max_depth': 9, 'learning_rate': 0.1, 'n_estimators': 100} give best kappa equal 0.8234856971716127





Unnamed: 0,dataset,bacc,kappa,precision,recall,f1,fnr
0,Bioresponse,0.961538,0.849558,1.0,0.923077,0.96,0.076923
1,churn,1.0,1.0,1.0,1.0,1.0,0.0
2,cmc,1.0,1.0,1.0,1.0,1.0,0.0
3,cnae-9,0.711538,0.463158,0.857143,0.923077,0.888889,0.076923
4,dna,0.961538,0.849558,1.0,0.923077,0.96,0.076923
5,har,0.961538,0.849558,1.0,0.923077,0.96,0.076923
6,madelon,0.5,0.0,0.75,1.0,0.857143,0.0
7,mfeat-factors,1.0,1.0,1.0,1.0,1.0,0.0
8,mfeat-fourier,1.0,1.0,1.0,1.0,1.0,0.0
9,mfeat-karhunen,1.0,1.0,1.0,1.0,1.0,0.0


In [17]:
one_dataset_out_xgb[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std']).round(2)

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.92,0.97,0.97,0.96,0.03
std,0.15,0.08,0.08,0.06,0.08


In [18]:
def format_mean_std(dataset):   
    metrics = ["bacc", "precision", "recall", "f1", "fnr"] 
    stats = dataset[metrics].agg(['mean', 'std']).round(2)
    formatted = {col: f"{stats.loc['mean',col]:.2f} ({stats.loc['std',col]:.2f})" for col in metrics}
    df = pd.DataFrame([formatted])
    print(df.to_latex(index=False, header=True, escape=False))

format_mean_std(one_dataset_out_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.92 (0.15) & 0.97 (0.08) & 0.97 (0.08) & 0.96 (0.06) & 0.03 (0.08) \\
\bottomrule
\end{tabular}



In [19]:
fi_agg = one_dataset_out_fi_xgb[["var", "fi_rank"]].groupby(["var"]).agg(['mean'])
fi_agg.columns = fi_agg.columns.to_flat_index()
fi_agg.columns = list(fi_agg.columns.map('_'.join))
fi_agg.nsmallest(n=20, columns=["fi_rank_mean"])

Unnamed: 0_level_0,fi_rank_mean
var,Unnamed: 1_level_1
target_approx_consistency_in_neighborhood_mean,1.545455
n_classes,2.318182
pred_targets_consistency_in_neighborhood_q25,3.0
target_targets_consistency_in_neighborhood_mean,5.090909
uncertainty_q25,5.909091
pred_targets_consistency_in_neighborhood_mean,6.727273
uncertainty_q1,11.454545
pred_targets_consistency_in_neighborhood_q0,11.772727
neighborhood_size_minmax,13.636364
target_approx_consistency_in_neighborhood_q50,14.318182


## Random forest

### Default parameters

In [20]:
one_dataset_out_rf, one_dataset_out_fi_rf = leave_dataset_out(model=RandomForestClassifier(random_state=123))
one_dataset_out_rf

100%|██████████| 22/22 [00:07<00:00,  2.80it/s]


Unnamed: 0,dataset,bacc,kappa,precision,recall,f1,fnr
0,Bioresponse,0.5,0.0,0.764706,1.0,0.866667,0.0
1,churn,0.625,0.337662,0.8125,1.0,0.896552,0.0
2,cmc,1.0,1.0,1.0,1.0,1.0,0.0
3,cnae-9,0.961538,0.849558,1.0,0.923077,0.96,0.076923
4,dna,0.961538,0.849558,1.0,0.923077,0.96,0.076923
5,har,0.923077,0.721311,1.0,0.846154,0.916667,0.153846
6,madelon,0.5,0.0,0.75,1.0,0.857143,0.0
7,mfeat-factors,1.0,1.0,1.0,1.0,1.0,0.0
8,mfeat-fourier,0.875,0.821053,0.928571,1.0,0.962963,0.0
9,mfeat-karhunen,1.0,1.0,1.0,1.0,1.0,0.0


### Hyperparameters tuning

In [21]:
model=RandomForestClassifier(random_state=123)

param_grid = {
    'max_depth': [50, 80, 110],
    'min_samples_split': [2, 5, 8],
    'n_estimators': [100, 200, 500]
}

one_dataset_out_rf, one_dataset_out_fi_rf = create_hyperparams_grid(model=model, param_grid=param_grid, exp_function=leave_dataset_out, crit="kappa")

100%|██████████| 22/22 [00:05<00:00,  4.21it/s]


kappa = 0.6984347635061045, params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 100}


100%|██████████| 22/22 [00:10<00:00,  2.14it/s]


kappa = 0.6632669948306928, params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 200}


100%|██████████| 22/22 [00:33<00:00,  1.54s/it]


kappa = 0.6786152829062536, params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 500}


100%|██████████| 22/22 [00:11<00:00,  1.86it/s]


kappa = 0.6697134847848255, params: {'max_depth': 50, 'min_samples_split': 5, 'n_estimators': 100}


100%|██████████| 22/22 [00:12<00:00,  1.73it/s]


kappa = 0.6794880653839305, params: {'max_depth': 50, 'min_samples_split': 5, 'n_estimators': 200}


100%|██████████| 22/22 [00:25<00:00,  1.14s/it]


kappa = 0.6786152829062536, params: {'max_depth': 50, 'min_samples_split': 5, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  4.37it/s]


kappa = 0.6939635709818144, params: {'max_depth': 50, 'min_samples_split': 8, 'n_estimators': 100}


100%|██████████| 22/22 [00:09<00:00,  2.23it/s]


kappa = 0.6632669948306928, params: {'max_depth': 50, 'min_samples_split': 8, 'n_estimators': 200}


100%|██████████| 22/22 [00:17<00:00,  1.25it/s]


kappa = 0.6632669948306928, params: {'max_depth': 50, 'min_samples_split': 8, 'n_estimators': 500}


100%|██████████| 22/22 [00:04<00:00,  5.16it/s]


kappa = 0.6984347635061045, params: {'max_depth': 80, 'min_samples_split': 2, 'n_estimators': 100}


100%|██████████| 22/22 [00:09<00:00,  2.39it/s]


kappa = 0.6632669948306928, params: {'max_depth': 80, 'min_samples_split': 2, 'n_estimators': 200}


100%|██████████| 22/22 [00:30<00:00,  1.41s/it]


kappa = 0.6786152829062536, params: {'max_depth': 80, 'min_samples_split': 2, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  3.74it/s]


kappa = 0.6697134847848255, params: {'max_depth': 80, 'min_samples_split': 5, 'n_estimators': 100}


100%|██████████| 22/22 [00:08<00:00,  2.47it/s]


kappa = 0.6794880653839305, params: {'max_depth': 80, 'min_samples_split': 5, 'n_estimators': 200}


100%|██████████| 22/22 [00:19<00:00,  1.11it/s]


kappa = 0.6786152829062536, params: {'max_depth': 80, 'min_samples_split': 5, 'n_estimators': 500}


100%|██████████| 22/22 [00:04<00:00,  4.50it/s]


kappa = 0.6939635709818144, params: {'max_depth': 80, 'min_samples_split': 8, 'n_estimators': 100}


100%|██████████| 22/22 [00:08<00:00,  2.67it/s]


kappa = 0.6632669948306928, params: {'max_depth': 80, 'min_samples_split': 8, 'n_estimators': 200}


100%|██████████| 22/22 [00:21<00:00,  1.00it/s]


kappa = 0.6632669948306928, params: {'max_depth': 80, 'min_samples_split': 8, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  4.21it/s]


kappa = 0.6984347635061045, params: {'max_depth': 110, 'min_samples_split': 2, 'n_estimators': 100}


100%|██████████| 22/22 [00:10<00:00,  2.15it/s]


kappa = 0.6632669948306928, params: {'max_depth': 110, 'min_samples_split': 2, 'n_estimators': 200}


100%|██████████| 22/22 [00:33<00:00,  1.51s/it]


kappa = 0.6786152829062536, params: {'max_depth': 110, 'min_samples_split': 2, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  3.76it/s]


kappa = 0.6697134847848255, params: {'max_depth': 110, 'min_samples_split': 5, 'n_estimators': 100}


100%|██████████| 22/22 [00:10<00:00,  2.10it/s]


kappa = 0.6794880653839305, params: {'max_depth': 110, 'min_samples_split': 5, 'n_estimators': 200}


100%|██████████| 22/22 [00:23<00:00,  1.08s/it]


kappa = 0.6786152829062536, params: {'max_depth': 110, 'min_samples_split': 5, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  4.27it/s]


kappa = 0.6939635709818144, params: {'max_depth': 110, 'min_samples_split': 8, 'n_estimators': 100}


100%|██████████| 22/22 [00:09<00:00,  2.43it/s]


kappa = 0.6632669948306928, params: {'max_depth': 110, 'min_samples_split': 8, 'n_estimators': 200}


100%|██████████| 22/22 [00:21<00:00,  1.03it/s]

kappa = 0.6632669948306928, params: {'max_depth': 110, 'min_samples_split': 8, 'n_estimators': 500}
Params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 100} give best kappa equal 0.6984347635061045





In [22]:
one_dataset_out_rf[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.854021,0.936464,0.958042,0.941561,0.041958
std,0.202883,0.100732,0.09709,0.076748,0.09709


In [23]:
format_mean_std(one_dataset_out_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.85 (0.20) & 0.94 (0.10) & 0.96 (0.10) & 0.94 (0.08) & 0.04 (0.10) \\
\bottomrule
\end{tabular}



In [24]:
fi_agg = one_dataset_out_fi_rf[["var", "fi_rank"]].groupby(["var"]).agg(['mean'])
fi_agg.columns = fi_agg.columns.to_flat_index()
fi_agg.columns = list(fi_agg.columns.map('_'.join))
fi_agg.nsmallest(n=20, columns=["fi_rank_mean"])

Unnamed: 0_level_0,fi_rank_mean
var,Unnamed: 1_level_1
target_approx_consistency_in_neighborhood_q50,1.045455
target_approx_consistency_in_neighborhood_q25,1.954545
target_approx_consistency_in_neighborhood_mean,3.318182
pred_targets_consistency_in_neighborhood_mean,5.454545
pred_targets_consistency_in_neighborhood_q50,7.181818
pred_targets_consistency_in_neighborhood_q25,7.545455
target_diversity_in_neighborhood_q75,8.0
pred_targets_consistency_in_neighborhood_minmax,8.545455
pred_targets_consistency_in_neighborhood_q0,10.363636
target_targets_consistency_in_neighborhood_q50,10.545455


# Leave-one-model-out

In [25]:
def leave_model_out(model=xgb.XGBClassifier(random_state=123)):

    models = np.unique(attrs_agg_correct_cols["model"])
    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    fi_all = pd.DataFrame()

    for selected_model in models:

        train = attrs_agg_correct_cols[attrs_agg_correct_cols["model"] != selected_model]
        test = attrs_agg_correct_cols[attrs_agg_correct_cols["model"] == selected_model]

        x_train = train.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_train = train["attack_binary"]

        x_test = test.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_test = test["attack_binary"]

        le = LabelEncoder()
        y_train_enc = le.fit_transform(y_train)
        y_test_enc = le.transform(y_test)

        model.fit(x_train, y_train_enc)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["model"] = selected_model
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test_enc, preds))
        kappa_list.append(cohen_kappa_score(y_test_enc, preds))
        precision_list.append(precision_score(y_test_enc, preds))
        recall_list.append(recall_score(y_test_enc, preds))
        f1_list.append(f1_score(y_test_enc, preds))
        fnr_list.append(false_negative_rate(y_test_enc, preds))

    results_dict = {'model': models, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

In [26]:
one_model_out_xgb, one_model_out_fi_xgb = leave_model_out()
one_model_out_xgb

Unnamed: 0,model,bacc,kappa,precision,recall,f1,fnr
0,lin,0.931818,0.850746,0.969231,0.954545,0.961832,0.045455
1,nn,0.772727,0.545455,0.909091,0.909091,0.909091,0.090909
2,svm,0.939394,0.878788,0.969697,0.969697,0.969697,0.030303
3,xgb,0.945312,0.806431,1.0,0.890625,0.942149,0.109375


In [27]:
one_model_out_xgb[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.897313,0.962005,0.93099,0.945692,0.06901
std,0.083241,0.0381,0.037247,0.027012,0.037247


In [28]:
format_mean_std(one_model_out_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.90 (0.08) & 0.96 (0.04) & 0.93 (0.04) & 0.95 (0.03) & 0.07 (0.04) \\
\bottomrule
\end{tabular}



## Random forest

In [29]:
one_model_out_rf, one_model_out_fi_rf = leave_model_out(model=RandomForestClassifier(random_state=123))
one_model_out_rf

Unnamed: 0,model,bacc,kappa,precision,recall,f1,fnr
0,lin,0.931818,0.850746,0.969231,0.954545,0.961832,0.045455
1,nn,0.926136,0.882353,0.966667,0.988636,0.977528,0.011364
2,svm,0.954545,0.9375,0.970588,1.0,0.985075,0.0
3,xgb,0.90696,0.727157,0.982143,0.859375,0.916667,0.140625


In [30]:
one_model_out_rf[["bacc", "kappa"]].agg(['mean', 'std'])

Unnamed: 0,bacc,kappa
mean,0.929865,0.849439
std,0.019591,0.089055


In [31]:
format_mean_std(one_model_out_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.93 (0.02) & 0.97 (0.01) & 0.95 (0.06) & 0.96 (0.03) & 0.05 (0.06) \\
\bottomrule
\end{tabular}



# 10-fold cross validation

In [32]:
def cross_validation(model=xgb.XGBClassifier(random_state=123)):

    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    iter_cv = []
    fi_all = pd.DataFrame()

    le = LabelEncoder()
    x = attrs_agg_correct_cols.drop(columns=["dataset", "model", "attack", "attack_binary"])
    y = attrs_agg_correct_cols["attack_binary"]
    y_enc = le.fit_transform(y)

    skf = StratifiedKFold(n_splits=10, random_state=None)
    cv = 0
    for train, test in skf.split(x, y_enc):

        x_train = x.iloc[train]
        y_train = y_enc[train]

        x_test = x.iloc[test]
        y_test = y_enc[test]

        model.fit(x_train, y_train)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["cv"] = cv
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test, preds))
        kappa_list.append(cohen_kappa_score(y_test, preds))
        precision_list.append(precision_score(y_test, preds))
        recall_list.append(recall_score(y_test, preds))
        f1_list.append(f1_score(y_test, preds))
        fnr_list.append(false_negative_rate(y_test, preds))
        iter_cv.append(cv)
        cv = cv + 1

    results_dict = {'cv': iter_cv, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

In [33]:
cv_xgb, cv_fi_xgb = cross_validation()
cv_xgb

Unnamed: 0,cv,bacc,kappa,precision,recall,f1,fnr
0,0,0.982759,0.929889,1.0,0.965517,0.982456,0.034483
1,1,0.931034,0.747508,1.0,0.862069,0.925926,0.137931
2,2,0.8125,0.723192,0.90625,1.0,0.95082,0.0
3,3,1.0,1.0,1.0,1.0,1.0,0.0
4,4,0.982143,0.929254,1.0,0.964286,0.981818,0.035714
5,5,0.763889,0.444073,0.913043,0.75,0.823529,0.25
6,6,0.744048,0.453875,0.884615,0.821429,0.851852,0.178571
7,7,0.944444,0.923711,0.965517,1.0,0.982456,0.0
8,8,0.888889,0.841202,0.933333,1.0,0.965517,0.0
9,9,0.888889,0.841202,0.933333,1.0,0.965517,0.0


In [34]:
cv_xgb[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.893859,0.953609,0.93633,0.942989,0.06367
std,0.092441,0.044995,0.091493,0.059458,0.091493


In [35]:
format_mean_std(cv_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.89 (0.09) & 0.95 (0.04) & 0.94 (0.09) & 0.94 (0.06) & 0.06 (0.09) \\
\bottomrule
\end{tabular}



## Random forest

In [36]:
cv_rf, cv_fi_rf = cross_validation(model=RandomForestClassifier(random_state=123))
cv_rf

Unnamed: 0,cv,bacc,kappa,precision,recall,f1,fnr
0,0,0.927203,0.854406,0.965517,0.965517,0.965517,0.034483
1,1,0.862069,0.554252,1.0,0.724138,0.84,0.275862
2,2,0.8125,0.723192,0.90625,1.0,0.95082,0.0
3,3,0.982759,0.923711,1.0,0.965517,0.982456,0.034483
4,4,0.982143,0.929254,1.0,0.964286,0.981818,0.035714
5,5,0.652778,0.27451,0.84,0.75,0.792453,0.25
6,6,0.781746,0.489655,0.916667,0.785714,0.846154,0.214286
7,7,0.888889,0.841202,0.933333,1.0,0.965517,0.0
8,8,0.888889,0.841202,0.933333,1.0,0.965517,0.0
9,9,1.0,1.0,1.0,1.0,1.0,0.0


In [37]:
cv_rf[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.877898,0.94951,0.915517,0.929025,0.084483
std,0.107443,0.053665,0.113919,0.073481,0.113919


In [38]:
format_mean_std(cv_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.88 (0.11) & 0.95 (0.05) & 0.92 (0.11) & 0.93 (0.07) & 0.08 (0.11) \\
\bottomrule
\end{tabular}



# Leave-one-attack-out

In [39]:
np.unique(attrs_agg_correct_cols["attack"])

array(['bim', 'fgm', 'hsj', 'noise', 'org', 'per', 'pgd', 'zoo'],
      dtype=object)

In [40]:
def leave_attack_out(model=xgb.XGBClassifier(random_state=123)):

    # attacks = np.unique(attrs_agg_correct_cols["attack"])
    attacks = ['bim', 'fgm', 'hsj', 'noise', 'per', 'pgd', 'zoo'] # ["hsj", "per", "zoo"]
    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    fi_all = pd.DataFrame()

    for selected_attack in attacks:

        train = attrs_agg_correct_cols[attrs_agg_correct_cols["attack"] != selected_attack]
        test = attrs_agg_correct_cols[attrs_agg_correct_cols["attack"] == selected_attack]

        # y = attrs_agg_correct_cols["attack_binary"]

        x_train = train.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_train = train["attack_binary"]

        x_test = test.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_test = test["attack_binary"]

        le = LabelEncoder()
        # y_enc = le.fit(y)
        y_train_enc = le.fit_transform(y_train)
        y_test_enc = le.transform(y_test)

        model.fit(x_train, y_train_enc)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["attack"] = selected_attack
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test_enc, preds))
        kappa_list.append(cohen_kappa_score(y_test_enc, preds))
        precision_list.append(precision_score(y_test_enc, preds))
        recall_list.append(recall_score(y_test_enc, preds))
        f1_list.append(f1_score(y_test_enc, preds))
        fnr_list.append(false_negative_rate(y_test_enc, preds))

    results_dict = {'attack': attacks, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

In [41]:
one_attack_out_xgb, one_attack_out_fi_xgb = leave_attack_out()
one_attack_out_xgb

Unnamed: 0,attack,bacc,kappa,precision,recall,f1,fnr
0,bim,1.0,,1.0,1.0,1.0,
1,fgm,1.0,,1.0,1.0,1.0,
2,hsj,0.96875,0.0,1.0,0.96875,0.984127,0.03125
3,noise,1.0,,1.0,1.0,1.0,
4,per,0.924242,0.0,1.0,0.924242,0.96063,0.075758
5,pgd,1.0,,1.0,1.0,1.0,
6,zoo,0.878788,0.0,1.0,0.878788,0.935484,0.121212


In [42]:
one_attack_out_xgb[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.967397,1.0,0.967397,0.982892,0.076073
std,0.048249,0.0,0.048249,0.025545,0.044982


In [43]:
format_mean_std(one_attack_out_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.97 (0.05) & 1.00 (0.00) & 0.97 (0.05) & 0.98 (0.03) & 0.08 (0.04) \\
\bottomrule
\end{tabular}



## Random forest

In [44]:
one_attack_out_rf, one_attack_out_fi_rf = leave_attack_out(model=RandomForestClassifier(random_state=123))
one_attack_out_rf

Unnamed: 0,attack,bacc,kappa,precision,recall,f1,fnr
0,bim,1.0,,1.0,1.0,1.0,
1,fgm,1.0,,1.0,1.0,1.0,
2,hsj,0.984375,0.0,1.0,0.984375,0.992126,0.015625
3,noise,1.0,,1.0,1.0,1.0,
4,per,0.878788,0.0,1.0,0.878788,0.935484,0.121212
5,pgd,1.0,,1.0,1.0,1.0,
6,zoo,0.893939,0.0,1.0,0.893939,0.944,0.106061


In [45]:
one_attack_out_rf[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.9653,1.0,0.9653,0.981659,0.080966
std,0.054401,0.0,0.054401,0.028883,0.057092


In [46]:
format_mean_std(one_attack_out_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.97 (0.05) & 1.00 (0.00) & 0.97 (0.05) & 0.98 (0.03) & 0.08 (0.06) \\
\bottomrule
\end{tabular}



## Save data

In [47]:
def preprocess_output(df, scenario, mod):
    df["scenario"] = scenario
    df["model_class"] = mod

    return df

one_attack_out_rf = preprocess_output(one_attack_out_rf, "one-attack-out", "RF")
one_attack_out_xgb = preprocess_output(one_attack_out_xgb, "one-attack-out", "XGB")
one_dataset_out_rf = preprocess_output(one_dataset_out_rf, "one-data-set-out", "RF")
one_dataset_out_xgb = preprocess_output(one_dataset_out_xgb, "one-data-set-out", "XGB")
one_model_out_rf = preprocess_output(one_model_out_rf, "one-model-out", "RF")
one_model_out_xgb = preprocess_output(one_model_out_xgb, "one-model-out", "XGB")
cv_rf = preprocess_output(cv_rf, "10-fold cross-validation", "RF")
cv_xgb = preprocess_output(cv_xgb, "10-fold cross-validation", "XGB")

all_bacc = pd.concat([one_attack_out_rf, one_attack_out_xgb,
                      one_dataset_out_rf, one_dataset_out_xgb,
                      one_model_out_rf, one_model_out_xgb,
                      cv_rf, cv_xgb])

In [48]:
all_bacc.to_csv("results/detection_bacc_without_bacc.csv", index=False)

In [49]:
one_attack_out_fi_rf = preprocess_output(one_attack_out_fi_rf, "one-attack-out", "RF")
one_attack_out_fi_xgb = preprocess_output(one_attack_out_fi_xgb, "one-attack-out", "XGB")
one_dataset_out_fi_rf = preprocess_output(one_dataset_out_fi_rf, "one-data-set-out", "RF")
one_dataset_out_fi_xgb = preprocess_output(one_dataset_out_fi_xgb, "one-data-set-out", "XGB")
one_model_out_fi_rf = preprocess_output(one_model_out_fi_rf, "one-model-out", "RF")
one_model_out_fi_xgb = preprocess_output(one_model_out_fi_xgb, "one-model-out", "XGB")
cv_fi_rf = preprocess_output(cv_fi_rf, "10-fold cross-validation", "RF")
cv_fi_xgb = preprocess_output(cv_fi_xgb, "10-fold cross-validation", "XGB")

all_fi = pd.concat([one_attack_out_fi_rf, one_attack_out_fi_xgb,
                      one_dataset_out_fi_rf, one_dataset_out_fi_xgb,
                      one_model_out_fi_rf, one_model_out_fi_xgb,
                      cv_fi_rf, cv_fi_xgb])

all_fi.to_csv("results/detection_fi_without_bacc.csv", index=False)