In [51]:
import pandas as pd
import numpy as np
import copy

import xgboost as xgb
from tqdm import tqdm
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold

import warnings
warnings.filterwarnings("ignore")

In [52]:
d = pd.read_csv("results/attacks_diagnoses.csv")
d = d[(d["dataset"] != "mfeat-morphological") & (d["attack"] != "lpf")]
d["attack_binary"] = np.where(d["attack"]=="org", 0, 1)

In [53]:
def q0(x: float) -> float:
    return x.quantile(0)


def q25(x: float) -> float:
    return x.quantile(0.25)


def q50(x: float) -> float:
    return x.quantile(0.5)


def q75(x: float) -> float:
    return x.quantile(0.75)


def q1(x: float) -> float:
    return x.quantile(1)


def minmax(x: float) -> float:
    return x.max() - x.min()

def false_negative_rate(y_true, y_pred):
    if confusion_matrix(y_true, y_pred).shape == (2, 2):
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        return fn / (fn + tp)
    else:
        return np.nan

attrs_con = d
cols_to_drop = ["approx", "target", "pred", "error", "name",
                    "overall_mean_target", "scores",
                    "mean_target_in_neighborhood",
                    "mean_approx_in_neighborhood",
                    "neighborhood_size_div_model_avg",
                    "neighborhood_size_pct",
                    "r_centered_entropy",
                "entropy",
                    "logk_r_centered_entropy"]

attrs_cols = attrs_con.drop(columns=cols_to_drop)
attrs_agg = attrs_cols \
    .groupby(["dataset", "model", "attack",
              "n_test", "n_classes", "attack_binary"]) \
    .agg(['mean', q0, q25, q50, q75, q1, minmax])
attrs_agg_correct_cols = copy.deepcopy(attrs_agg)
attrs_agg_correct_cols.columns = list(attrs_agg_correct_cols.columns.map('_'.join))
attrs_agg_correct_cols = attrs_agg_correct_cols.reset_index()

In [54]:
d_nn = pd.read_csv("results/attacks_diagnoses_nn.csv")
d_nn["attack_binary"] = np.where(d_nn["attack"]=="org", 0, 1)

attrs_cols = d_nn.drop(columns=cols_to_drop)
attrs_agg = attrs_cols \
    .groupby(["dataset", "model", "attack",
              "n_test", "n_classes", "attack_binary"]) \
    .agg(['mean', q0, q25, q50, q75, q1, minmax])
attrs_agg_correct_cols_nn = copy.deepcopy(attrs_agg)
attrs_agg_correct_cols_nn.columns = list(attrs_agg_correct_cols_nn.columns.map('_'.join))
attrs_agg_correct_cols_nn = attrs_agg_correct_cols_nn.reset_index()

In [55]:
attrs_agg_correct_cols = pd.concat([attrs_agg_correct_cols, attrs_agg_correct_cols_nn])

In [56]:
np.unique(attrs_agg_correct_cols["attack_binary"], return_counts=True)

(array([0, 1]), array([ 88, 284]))

In [57]:
data_counts = attrs_agg_correct_cols[["dataset", "model", "attack"]].groupby(["dataset", "model"]).size().reset_index(name='counts')
data_counts

Unnamed: 0,dataset,model,counts
0,Bioresponse,lin,4
1,Bioresponse,nn,5
2,Bioresponse,svm,4
3,Bioresponse,xgb,4
4,churn,lin,4
...,...,...,...
83,wdbc,xgb,4
84,wilt,lin,4
85,wilt,nn,5
86,wilt,svm,4


In [58]:
data_counts_model_type = attrs_agg_correct_cols[["dataset", "model", "attack"]].groupby(["dataset", "attack"]).size().reset_index(name='counts')
data_counts_model_type

Unnamed: 0,dataset,attack,counts
0,Bioresponse,bim,1
1,Bioresponse,fgm,1
2,Bioresponse,hsj,3
3,Bioresponse,noise,1
4,Bioresponse,org,4
...,...,...,...
171,wilt,noise,1
172,wilt,org,4
173,wilt,per,3
174,wilt,pgd,1


In [59]:
le = LabelEncoder()
x_train = attrs_agg_correct_cols.drop(columns=["dataset", "model", "attack", "attack_binary"])
y_train = attrs_agg_correct_cols["attack_binary"]
y_train_enc = le.fit_transform(y_train)
model = RandomForestClassifier(random_state=123)
model.fit(x_train, y_train_enc)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [60]:
fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
fi_df = pd.DataFrame(fi_dict)
fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)

In [61]:
attrs_agg_correct_cols.to_csv("results/attr_attacks_binary_agr_nn_bacc.csv", index=False)

In [62]:
def create_hyperparams_grid(model, param_grid, exp_function, crit="kappa"):

    quality_measure = []
    params_list = []
    results_table = []
    results_fi = []

    for param1 in param_grid[list(param_grid.keys())[0]]:
        for param2 in param_grid[list(param_grid.keys())[1]]:
            for param3 in param_grid[list(param_grid.keys())[2]]:
                params = {list(param_grid.keys())[0]: param1,
                          list(param_grid.keys())[1]: param2,
                          list(param_grid.keys())[2]: param3}

                model.set_params(**params)
                summary, fi = exp_function(model=model)
                results_table.append(summary)
                results_fi.append(fi)
                params_list.append(params)
                crit_value = np.mean(summary["kappa"])
                quality_measure.append(crit_value)
                print(f"{crit} = {crit_value}, params: {params}")

    best_params_idx = np.argmax(quality_measure)
    best_params = params_list[best_params_idx]

    print(f"Params: {best_params} give best {crit} equal {quality_measure[best_params_idx]}")

    return results_table[best_params_idx], results_fi[best_params_idx]

# Leave-one-data-set-out

In [63]:
def leave_dataset_out(model=xgb.XGBClassifier(random_state=123)):

    datasets = np.unique(attrs_agg_correct_cols["dataset"])
    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    fi_all = pd.DataFrame()

    for dataset in tqdm(datasets):

        train = attrs_agg_correct_cols[attrs_agg_correct_cols["dataset"] != dataset]
        test = attrs_agg_correct_cols[attrs_agg_correct_cols["dataset"] == dataset]

        x_train = train.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_train = train["attack_binary"]

        x_test = test.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_test = test["attack_binary"]

        le = LabelEncoder()
        y_train_enc = le.fit_transform(y_train)
        y_test_enc = le.transform(y_test)

        model.fit(x_train, y_train_enc)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["dataset"] = dataset
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test_enc, preds))
        kappa_list.append(cohen_kappa_score(y_test_enc, preds))
        precision_list.append(precision_score(y_test_enc, preds))
        recall_list.append(recall_score(y_test_enc, preds))
        f1_list.append(f1_score(y_test_enc, preds))
        fnr_list.append(false_negative_rate(y_test_enc, preds))

    results_dict = {'dataset': datasets, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

### Default parameters

In [64]:
one_dataset_out_xgb, one_dataset_out_fi_xgb = leave_dataset_out()
one_dataset_out_xgb

100%|██████████| 22/22 [00:05<00:00,  4.23it/s]


Unnamed: 0,dataset,bacc,kappa,precision,recall,f1,fnr
0,Bioresponse,0.923077,0.721311,1.0,0.846154,0.916667,0.153846
1,churn,1.0,1.0,1.0,1.0,1.0,0.0
2,cmc,1.0,1.0,1.0,1.0,1.0,0.0
3,cnae-9,0.961538,0.849558,1.0,0.923077,0.96,0.076923
4,dna,0.961538,0.849558,1.0,0.923077,0.96,0.076923
5,har,1.0,1.0,1.0,1.0,1.0,0.0
6,madelon,0.5,0.0,0.75,1.0,0.857143,0.0
7,mfeat-factors,1.0,1.0,1.0,1.0,1.0,0.0
8,mfeat-fourier,1.0,1.0,1.0,1.0,1.0,0.0
9,mfeat-karhunen,1.0,1.0,1.0,1.0,1.0,0.0


In [65]:
print(one_dataset_out_xgb.to_latex(index=False,
                  formatters={"name": str.upper},
                  float_format="{:.1f}".format,
))

\begin{tabular}{lrrrrrr}
\toprule
dataset & bacc & kappa & precision & recall & f1 & fnr \\
\midrule
Bioresponse & 0.9 & 0.7 & 1.0 & 0.8 & 0.9 & 0.2 \\
churn & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
cmc & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
cnae-9 & 1.0 & 0.8 & 1.0 & 0.9 & 1.0 & 0.1 \\
dna & 1.0 & 0.8 & 1.0 & 0.9 & 1.0 & 0.1 \\
har & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
madelon & 0.5 & 0.0 & 0.8 & 1.0 & 0.9 & 0.0 \\
mfeat-factors & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
mfeat-fourier & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
mfeat-karhunen & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
mfeat-zernike & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
nomao & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
optdigits & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
pendigits & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
phoneme & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
qsar-biodeg & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
satimage & 1.0 & 1.0 & 1.0 & 1.0 & 1.0 & 0.0 \\
semeion & 0.9 & 0.7 & 1.0 & 0.8 & 0.9 & 0.2 \\
spambase & 1.0 & 1.0 & 1.0 & 1.0

### Hyperparameters tuning

In [66]:
model=xgb.XGBClassifier(random_state=123)

param_grid = {
    'max_depth': [6, 9, 12],
    'learning_rate': [0.1, 0.3, 0.5],
    'n_estimators': [100, 200, 500]
}

one_dataset_out_xgb, one_dataset_out_fi_xgb = create_hyperparams_grid(model=model, param_grid=param_grid, exp_function=leave_dataset_out, crit="kappa")
one_dataset_out_xgb

100%|██████████| 22/22 [00:08<00:00,  2.74it/s]


kappa = 0.8861032471497832, params: {'max_depth': 6, 'learning_rate': 0.1, 'n_estimators': 100}


100%|██████████| 22/22 [00:16<00:00,  1.30it/s]


kappa = 0.9006734053701991, params: {'max_depth': 6, 'learning_rate': 0.1, 'n_estimators': 200}


100%|██████████| 22/22 [00:27<00:00,  1.25s/it]


kappa = 0.8938351109212853, params: {'max_depth': 6, 'learning_rate': 0.1, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  3.75it/s]


kappa = 0.9006734053701991, params: {'max_depth': 6, 'learning_rate': 0.3, 'n_estimators': 100}


100%|██████████| 22/22 [00:07<00:00,  2.78it/s]


kappa = 0.8938351109212853, params: {'max_depth': 6, 'learning_rate': 0.3, 'n_estimators': 200}


100%|██████████| 22/22 [00:12<00:00,  1.71it/s]


kappa = 0.9006734053701991, params: {'max_depth': 6, 'learning_rate': 0.3, 'n_estimators': 500}


100%|██████████| 22/22 [00:03<00:00,  6.77it/s]


kappa = 0.8996644766810152, params: {'max_depth': 6, 'learning_rate': 0.5, 'n_estimators': 100}


100%|██████████| 22/22 [00:05<00:00,  4.30it/s]


kappa = 0.8996644766810152, params: {'max_depth': 6, 'learning_rate': 0.5, 'n_estimators': 200}


100%|██████████| 22/22 [00:11<00:00,  1.92it/s]


kappa = 0.8996644766810152, params: {'max_depth': 6, 'learning_rate': 0.5, 'n_estimators': 500}


100%|██████████| 22/22 [00:06<00:00,  3.56it/s]


kappa = 0.8861032471497832, params: {'max_depth': 9, 'learning_rate': 0.1, 'n_estimators': 100}


100%|██████████| 22/22 [00:11<00:00,  2.00it/s]


kappa = 0.9006734053701991, params: {'max_depth': 9, 'learning_rate': 0.1, 'n_estimators': 200}


100%|██████████| 22/22 [00:32<00:00,  1.49s/it]


kappa = 0.8938351109212853, params: {'max_depth': 9, 'learning_rate': 0.1, 'n_estimators': 500}


100%|██████████| 22/22 [00:08<00:00,  2.46it/s]


kappa = 0.9006734053701991, params: {'max_depth': 9, 'learning_rate': 0.3, 'n_estimators': 100}


100%|██████████| 22/22 [00:09<00:00,  2.28it/s]


kappa = 0.8938351109212853, params: {'max_depth': 9, 'learning_rate': 0.3, 'n_estimators': 200}


100%|██████████| 22/22 [00:15<00:00,  1.39it/s]


kappa = 0.9006734053701991, params: {'max_depth': 9, 'learning_rate': 0.3, 'n_estimators': 500}


100%|██████████| 22/22 [00:04<00:00,  5.23it/s]


kappa = 0.8996644766810152, params: {'max_depth': 9, 'learning_rate': 0.5, 'n_estimators': 100}


100%|██████████| 22/22 [00:06<00:00,  3.50it/s]


kappa = 0.8996644766810152, params: {'max_depth': 9, 'learning_rate': 0.5, 'n_estimators': 200}


100%|██████████| 22/22 [00:13<00:00,  1.59it/s]


kappa = 0.8996644766810152, params: {'max_depth': 9, 'learning_rate': 0.5, 'n_estimators': 500}


100%|██████████| 22/22 [00:05<00:00,  3.83it/s]


kappa = 0.8861032471497832, params: {'max_depth': 12, 'learning_rate': 0.1, 'n_estimators': 100}


100%|██████████| 22/22 [00:08<00:00,  2.48it/s]


kappa = 0.9006734053701991, params: {'max_depth': 12, 'learning_rate': 0.1, 'n_estimators': 200}


100%|██████████| 22/22 [00:17<00:00,  1.26it/s]


kappa = 0.8938351109212853, params: {'max_depth': 12, 'learning_rate': 0.1, 'n_estimators': 500}


100%|██████████| 22/22 [00:06<00:00,  3.60it/s]


kappa = 0.9006734053701991, params: {'max_depth': 12, 'learning_rate': 0.3, 'n_estimators': 100}


100%|██████████| 22/22 [00:11<00:00,  1.93it/s]


kappa = 0.8938351109212853, params: {'max_depth': 12, 'learning_rate': 0.3, 'n_estimators': 200}


100%|██████████| 22/22 [00:22<00:00,  1.02s/it]


kappa = 0.9006734053701991, params: {'max_depth': 12, 'learning_rate': 0.3, 'n_estimators': 500}


100%|██████████| 22/22 [00:06<00:00,  3.36it/s]


kappa = 0.8996644766810152, params: {'max_depth': 12, 'learning_rate': 0.5, 'n_estimators': 100}


100%|██████████| 22/22 [00:07<00:00,  2.98it/s]


kappa = 0.8996644766810152, params: {'max_depth': 12, 'learning_rate': 0.5, 'n_estimators': 200}


100%|██████████| 22/22 [00:13<00:00,  1.68it/s]

kappa = 0.8996644766810152, params: {'max_depth': 12, 'learning_rate': 0.5, 'n_estimators': 500}
Params: {'max_depth': 6, 'learning_rate': 0.1, 'n_estimators': 200} give best kappa equal 0.9006734053701991





Unnamed: 0,dataset,bacc,kappa,precision,recall,f1,fnr
0,Bioresponse,0.923077,0.721311,1.0,0.846154,0.916667,0.153846
1,churn,1.0,1.0,1.0,1.0,1.0,0.0
2,cmc,1.0,1.0,1.0,1.0,1.0,0.0
3,cnae-9,0.961538,0.849558,1.0,0.923077,0.96,0.076923
4,dna,0.961538,0.849558,1.0,0.923077,0.96,0.076923
5,har,1.0,1.0,1.0,1.0,1.0,0.0
6,madelon,0.5,0.0,0.75,1.0,0.857143,0.0
7,mfeat-factors,1.0,1.0,1.0,1.0,1.0,0.0
8,mfeat-fourier,1.0,1.0,1.0,1.0,1.0,0.0
9,mfeat-karhunen,1.0,1.0,1.0,1.0,1.0,0.0


In [67]:
one_dataset_out_xgb[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std']).round(2)

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.96,0.99,0.98,0.98,0.02
std,0.11,0.06,0.05,0.04,0.05


In [68]:
def format_mean_std(dataset):   
    metrics = ["bacc", "precision", "recall", "f1", "fnr"] 
    stats = dataset[metrics].agg(['mean', 'std']).round(2)
    formatted = {col: f"{stats.loc['mean',col]:.2f} ({stats.loc['std',col]:.2f})" for col in metrics}
    df = pd.DataFrame([formatted])
    print(df.to_latex(index=False, header=True, escape=False))

format_mean_std(one_dataset_out_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.96 (0.11) & 0.99 (0.06) & 0.98 (0.05) & 0.98 (0.04) & 0.02 (0.05) \\
\bottomrule
\end{tabular}



In [69]:
fi_agg = one_dataset_out_fi_xgb[["var", "fi_rank"]].groupby(["var"]).agg(['mean'])
fi_agg.columns = fi_agg.columns.to_flat_index()
fi_agg.columns = list(fi_agg.columns.map('_'.join))
fi_agg.nsmallest(n=20, columns=["fi_rank_mean"])

Unnamed: 0_level_0,fi_rank_mean
var,Unnamed: 1_level_1
bacc_test_mean,1.0
target_approx_consistency_in_neighborhood_mean,3.863636
neighborhood_size_mean,6.409091
uncertainty_q0,7.454545
neighborhood_size_q1,14.409091
target_approx_consistency_in_neighborhood_q50,14.409091
neighborhood_size_q0,14.545455
uncertainty_mean,15.545455
target_diversity_in_neighborhood_q25,16.0
pred_targets_consistency_in_neighborhood_q50,16.795455


## Random forest

### Default parameters

In [70]:
one_dataset_out_rf, one_dataset_out_fi_rf = leave_dataset_out(model=RandomForestClassifier(random_state=123))
one_dataset_out_rf

100%|██████████| 22/22 [00:06<00:00,  3.38it/s]


Unnamed: 0,dataset,bacc,kappa,precision,recall,f1,fnr
0,Bioresponse,0.923077,0.721311,1.0,0.846154,0.916667,0.153846
1,churn,0.875,0.821053,0.928571,1.0,0.962963,0.0
2,cmc,1.0,1.0,1.0,1.0,1.0,0.0
3,cnae-9,0.961538,0.849558,1.0,0.923077,0.96,0.076923
4,dna,0.961538,0.849558,1.0,0.923077,0.96,0.076923
5,har,1.0,1.0,1.0,1.0,1.0,0.0
6,madelon,0.5,0.0,0.75,1.0,0.857143,0.0
7,mfeat-factors,1.0,1.0,1.0,1.0,1.0,0.0
8,mfeat-fourier,1.0,1.0,1.0,1.0,1.0,0.0
9,mfeat-karhunen,1.0,1.0,1.0,1.0,1.0,0.0


### Hyperparameters tuning

In [71]:
model=RandomForestClassifier(random_state=123)

param_grid = {
    'max_depth': [50, 80, 110],
    'min_samples_split': [2, 5, 8],
    'n_estimators': [100, 200, 500]
}

one_dataset_out_rf, one_dataset_out_fi_rf = create_hyperparams_grid(model=model, param_grid=param_grid, exp_function=leave_dataset_out, crit="kappa")

100%|██████████| 22/22 [00:06<00:00,  3.39it/s]


kappa = 0.8725387841325953, params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 100}


100%|██████████| 22/22 [00:13<00:00,  1.65it/s]


kappa = 0.8762668167503171, params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 200}


100%|██████████| 22/22 [00:48<00:00,  2.21s/it]


kappa = 0.8844007880421831, params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 500}


100%|██████████| 22/22 [00:10<00:00,  2.16it/s]


kappa = 0.8762668167503171, params: {'max_depth': 50, 'min_samples_split': 5, 'n_estimators': 100}


100%|██████████| 22/22 [00:12<00:00,  1.73it/s]


kappa = 0.8762668167503171, params: {'max_depth': 50, 'min_samples_split': 5, 'n_estimators': 200}


100%|██████████| 22/22 [00:28<00:00,  1.30s/it]


kappa = 0.8762668167503171, params: {'max_depth': 50, 'min_samples_split': 5, 'n_estimators': 500}


100%|██████████| 22/22 [00:04<00:00,  4.63it/s]


kappa = 0.8762668167503171, params: {'max_depth': 50, 'min_samples_split': 8, 'n_estimators': 100}


100%|██████████| 22/22 [00:09<00:00,  2.43it/s]


kappa = 0.8762668167503171, params: {'max_depth': 50, 'min_samples_split': 8, 'n_estimators': 200}


100%|██████████| 22/22 [00:27<00:00,  1.25s/it]


kappa = 0.8762668167503171, params: {'max_depth': 50, 'min_samples_split': 8, 'n_estimators': 500}


100%|██████████| 22/22 [00:12<00:00,  1.80it/s]


kappa = 0.8725387841325953, params: {'max_depth': 80, 'min_samples_split': 2, 'n_estimators': 100}


100%|██████████| 22/22 [00:14<00:00,  1.48it/s]


kappa = 0.8762668167503171, params: {'max_depth': 80, 'min_samples_split': 2, 'n_estimators': 200}


100%|██████████| 22/22 [00:23<00:00,  1.09s/it]


kappa = 0.8844007880421831, params: {'max_depth': 80, 'min_samples_split': 2, 'n_estimators': 500}


100%|██████████| 22/22 [00:04<00:00,  4.80it/s]


kappa = 0.8762668167503171, params: {'max_depth': 80, 'min_samples_split': 5, 'n_estimators': 100}


100%|██████████| 22/22 [00:08<00:00,  2.56it/s]


kappa = 0.8762668167503171, params: {'max_depth': 80, 'min_samples_split': 5, 'n_estimators': 200}


100%|██████████| 22/22 [00:17<00:00,  1.26it/s]


kappa = 0.8762668167503171, params: {'max_depth': 80, 'min_samples_split': 5, 'n_estimators': 500}


100%|██████████| 22/22 [00:03<00:00,  6.03it/s]


kappa = 0.8762668167503171, params: {'max_depth': 80, 'min_samples_split': 8, 'n_estimators': 100}


100%|██████████| 22/22 [00:07<00:00,  2.96it/s]


kappa = 0.8762668167503171, params: {'max_depth': 80, 'min_samples_split': 8, 'n_estimators': 200}


100%|██████████| 22/22 [00:23<00:00,  1.07s/it]


kappa = 0.8762668167503171, params: {'max_depth': 80, 'min_samples_split': 8, 'n_estimators': 500}


100%|██████████| 22/22 [00:06<00:00,  3.34it/s]


kappa = 0.8725387841325953, params: {'max_depth': 110, 'min_samples_split': 2, 'n_estimators': 100}


100%|██████████| 22/22 [00:11<00:00,  1.87it/s]


kappa = 0.8762668167503171, params: {'max_depth': 110, 'min_samples_split': 2, 'n_estimators': 200}


100%|██████████| 22/22 [00:21<00:00,  1.04it/s]


kappa = 0.8844007880421831, params: {'max_depth': 110, 'min_samples_split': 2, 'n_estimators': 500}


100%|██████████| 22/22 [00:03<00:00,  6.07it/s]


kappa = 0.8762668167503171, params: {'max_depth': 110, 'min_samples_split': 5, 'n_estimators': 100}


100%|██████████| 22/22 [00:06<00:00,  3.20it/s]


kappa = 0.8762668167503171, params: {'max_depth': 110, 'min_samples_split': 5, 'n_estimators': 200}


100%|██████████| 22/22 [00:18<00:00,  1.20it/s]


kappa = 0.8762668167503171, params: {'max_depth': 110, 'min_samples_split': 5, 'n_estimators': 500}


100%|██████████| 22/22 [00:03<00:00,  5.52it/s]


kappa = 0.8762668167503171, params: {'max_depth': 110, 'min_samples_split': 8, 'n_estimators': 100}


100%|██████████| 22/22 [00:07<00:00,  2.78it/s]


kappa = 0.8762668167503171, params: {'max_depth': 110, 'min_samples_split': 8, 'n_estimators': 200}


100%|██████████| 22/22 [00:22<00:00,  1.02s/it]

kappa = 0.8762668167503171, params: {'max_depth': 110, 'min_samples_split': 8, 'n_estimators': 500}
Params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 500} give best kappa equal 0.8844007880421831





In [72]:
one_dataset_out_rf[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.94799,0.979329,0.975524,0.975223,0.024476
std,0.117676,0.06015,0.059991,0.043934,0.059991


In [73]:
format_mean_std(one_dataset_out_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.95 (0.12) & 0.98 (0.06) & 0.98 (0.06) & 0.98 (0.04) & 0.02 (0.06) \\
\bottomrule
\end{tabular}



In [74]:
fi_agg = one_dataset_out_fi_rf[["var", "fi_rank"]].groupby(["var"]).agg(['mean'])
fi_agg.columns = fi_agg.columns.to_flat_index()
fi_agg.columns = list(fi_agg.columns.map('_'.join))
fi_agg.nsmallest(n=20, columns=["fi_rank_mean"])

Unnamed: 0_level_0,fi_rank_mean
var,Unnamed: 1_level_1
bacc_test_q0,1.0
bacc_test_q75,2.090909
bacc_test_q50,3.227273
bacc_test_q25,4.227273
bacc_test_mean,4.590909
bacc_test_q1,5.863636
target_approx_consistency_in_neighborhood_mean,7.0
target_approx_consistency_in_neighborhood_q25,8.181818
target_approx_consistency_in_neighborhood_q50,8.818182
target_diversity_in_neighborhood_mean,11.545455


# Leave-one-model-out

In [75]:
def leave_model_out(model=xgb.XGBClassifier(random_state=123)):

    models = np.unique(attrs_agg_correct_cols["model"])
    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    fi_all = pd.DataFrame()

    for selected_model in models:

        train = attrs_agg_correct_cols[attrs_agg_correct_cols["model"] != selected_model]
        test = attrs_agg_correct_cols[attrs_agg_correct_cols["model"] == selected_model]

        x_train = train.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_train = train["attack_binary"]

        x_test = test.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_test = test["attack_binary"]

        le = LabelEncoder()
        y_train_enc = le.fit_transform(y_train)
        y_test_enc = le.transform(y_test)

        model.fit(x_train, y_train_enc)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["model"] = selected_model
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test_enc, preds))
        kappa_list.append(cohen_kappa_score(y_test_enc, preds))
        precision_list.append(precision_score(y_test_enc, preds))
        recall_list.append(recall_score(y_test_enc, preds))
        f1_list.append(f1_score(y_test_enc, preds))
        fnr_list.append(false_negative_rate(y_test_enc, preds))

    results_dict = {'model': models, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

In [76]:
one_model_out_xgb, one_model_out_fi_xgb = leave_model_out()
one_model_out_xgb

Unnamed: 0,model,bacc,kappa,precision,recall,f1,fnr
0,lin,0.977273,0.969231,0.985075,1.0,0.992481,0.0
1,nn,0.926136,0.810811,0.976471,0.943182,0.959538,0.056818
2,svm,0.984848,0.941176,1.0,0.969697,0.984615,0.030303
3,xgb,0.96875,0.884718,1.0,0.9375,0.967742,0.0625


In [77]:
one_model_out_xgb[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.964252,0.990386,0.962595,0.976094,0.037405
std,0.026247,0.011643,0.028614,0.015111,0.028614


In [78]:
format_mean_std(one_model_out_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.96 (0.03) & 0.99 (0.01) & 0.96 (0.03) & 0.98 (0.02) & 0.04 (0.03) \\
\bottomrule
\end{tabular}



## Random forest

In [79]:
one_model_out_rf, one_model_out_fi_rf = leave_model_out(model=RandomForestClassifier(random_state=123))
one_model_out_rf

Unnamed: 0,model,bacc,kappa,precision,recall,f1,fnr
0,lin,0.886364,0.836066,0.929577,1.0,0.963504,0.0
1,nn,0.960227,0.89011,0.988372,0.965909,0.977011,0.034091
2,svm,0.962121,0.910448,0.984615,0.969697,0.977099,0.030303
3,xgb,0.960938,0.857898,1.0,0.921875,0.95935,0.078125


In [80]:
one_model_out_rf[["bacc", "kappa"]].agg(['mean', 'std'])

Unnamed: 0,bacc,kappa
mean,0.942412,0.87363
std,0.037374,0.033094


In [81]:
format_mean_std(one_model_out_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.94 (0.04) & 0.98 (0.03) & 0.96 (0.03) & 0.97 (0.01) & 0.04 (0.03) \\
\bottomrule
\end{tabular}



# 10-fold cross validation

In [82]:
def cross_validation(model=xgb.XGBClassifier(random_state=123)):

    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    iter_cv = []
    fi_all = pd.DataFrame()

    le = LabelEncoder()
    x = attrs_agg_correct_cols.drop(columns=["dataset", "model", "attack", "attack_binary"])
    y = attrs_agg_correct_cols["attack_binary"]
    y_enc = le.fit_transform(y)

    skf = StratifiedKFold(n_splits=10, random_state=None)
    cv = 0
    for train, test in skf.split(x, y_enc):

        x_train = x.iloc[train]
        y_train = y_enc[train]

        x_test = x.iloc[test]
        y_test = y_enc[test]

        model.fit(x_train, y_train)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["cv"] = cv
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test, preds))
        kappa_list.append(cohen_kappa_score(y_test, preds))
        precision_list.append(precision_score(y_test, preds))
        recall_list.append(recall_score(y_test, preds))
        f1_list.append(f1_score(y_test, preds))
        fnr_list.append(false_negative_rate(y_test, preds))
        iter_cv.append(cv)
        cv = cv + 1

    results_dict = {'cv': iter_cv, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

In [83]:
cv_xgb, cv_fi_xgb = cross_validation()
cv_xgb

Unnamed: 0,cv,bacc,kappa,precision,recall,f1,fnr
0,0,0.982759,0.929889,1.0,0.965517,0.982456,0.034483
1,1,0.948276,0.804124,1.0,0.896552,0.945455,0.103448
2,2,0.875,0.824645,0.935484,1.0,0.966667,0.0
3,3,1.0,1.0,1.0,1.0,1.0,0.0
4,4,1.0,1.0,1.0,1.0,1.0,0.0
5,5,0.946429,0.802139,1.0,0.892857,0.943396,0.107143
6,6,0.855159,0.617241,0.958333,0.821429,0.884615,0.178571
7,7,0.833333,0.751678,0.903226,1.0,0.949153,0.0
8,8,0.944444,0.923711,0.965517,1.0,0.982456,0.0
9,9,1.0,1.0,1.0,1.0,1.0,0.0


In [84]:
cv_xgb[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.93854,0.976256,0.957635,0.96542,0.042365
std,0.062838,0.034665,0.064386,0.036281,0.064386


In [85]:
format_mean_std(cv_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.94 (0.06) & 0.98 (0.03) & 0.96 (0.06) & 0.97 (0.04) & 0.04 (0.06) \\
\bottomrule
\end{tabular}



## Random forest

In [86]:
cv_rf, cv_fi_rf = cross_validation(model=RandomForestClassifier(random_state=123))
cv_rf

Unnamed: 0,cv,bacc,kappa,precision,recall,f1,fnr
0,0,0.927203,0.854406,0.965517,0.965517,0.965517,0.034483
1,1,0.965517,0.864769,1.0,0.931034,0.964286,0.068966
2,2,0.8125,0.723192,0.90625,1.0,0.95082,0.0
3,3,1.0,1.0,1.0,1.0,1.0,0.0
4,4,1.0,1.0,1.0,1.0,1.0,0.0
5,5,0.946429,0.802139,1.0,0.892857,0.943396,0.107143
6,6,0.90873,0.787763,0.962963,0.928571,0.945455,0.071429
7,7,0.888889,0.841202,0.933333,1.0,0.965517,0.0
8,8,0.944444,0.923711,0.965517,1.0,0.982456,0.0
9,9,1.0,1.0,1.0,1.0,1.0,0.0


In [87]:
cv_rf[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.939371,0.973358,0.971798,0.971745,0.028202
std,0.059178,0.033142,0.04024,0.022548,0.04024


In [88]:
format_mean_std(cv_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.94 (0.06) & 0.97 (0.03) & 0.97 (0.04) & 0.97 (0.02) & 0.03 (0.04) \\
\bottomrule
\end{tabular}



# Leave-one-attack-out

In [89]:
np.unique(attrs_agg_correct_cols["attack"])

array(['bim', 'fgm', 'hsj', 'noise', 'org', 'per', 'pgd', 'zoo'],
      dtype=object)

In [90]:
def leave_attack_out(model=xgb.XGBClassifier(random_state=123)):

    # attacks = np.unique(attrs_agg_correct_cols["attack"])
    attacks = ['bim', 'fgm', 'hsj', 'noise', 'per', 'pgd', 'zoo'] # ["hsj", "per", "zoo"]
    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    fi_all = pd.DataFrame()

    for selected_attack in attacks:

        train = attrs_agg_correct_cols[attrs_agg_correct_cols["attack"] != selected_attack]
        test = attrs_agg_correct_cols[attrs_agg_correct_cols["attack"] == selected_attack]

        # y = attrs_agg_correct_cols["attack_binary"]

        x_train = train.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_train = train["attack_binary"]

        x_test = test.drop(columns=["dataset", "model", "attack", "attack_binary"])
        y_test = test["attack_binary"]

        le = LabelEncoder()
        # y_enc = le.fit(y)
        y_train_enc = le.fit_transform(y_train)
        y_test_enc = le.transform(y_test)

        model.fit(x_train, y_train_enc)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["attack"] = selected_attack
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test_enc, preds))
        kappa_list.append(cohen_kappa_score(y_test_enc, preds))
        precision_list.append(precision_score(y_test_enc, preds))
        recall_list.append(recall_score(y_test_enc, preds))
        f1_list.append(f1_score(y_test_enc, preds))
        fnr_list.append(false_negative_rate(y_test_enc, preds))

    results_dict = {'attack': attacks, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

In [91]:
one_attack_out_xgb, one_attack_out_fi_xgb = leave_attack_out()
one_attack_out_xgb

Unnamed: 0,attack,bacc,kappa,precision,recall,f1,fnr
0,bim,1.0,,1.0,1.0,1.0,
1,fgm,1.0,,1.0,1.0,1.0,
2,hsj,1.0,,1.0,1.0,1.0,
3,noise,0.863636,0.0,1.0,0.863636,0.926829,0.136364
4,per,0.954545,0.0,1.0,0.954545,0.976744,0.045455
5,pgd,1.0,,1.0,1.0,1.0,
6,zoo,0.939394,0.0,1.0,0.939394,0.96875,0.060606


In [92]:
one_attack_out_xgb[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.965368,1.0,0.965368,0.98176,0.080808
std,0.051541,0.0,0.051541,0.027515,0.048705


In [93]:
format_mean_std(one_attack_out_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.97 (0.05) & 1.00 (0.00) & 0.97 (0.05) & 0.98 (0.03) & 0.08 (0.05) \\
\bottomrule
\end{tabular}



## Random forest

In [94]:
one_attack_out_rf, one_attack_out_fi_rf = leave_attack_out(model=RandomForestClassifier(random_state=123))
one_attack_out_rf

Unnamed: 0,attack,bacc,kappa,precision,recall,f1,fnr
0,bim,1.0,,1.0,1.0,1.0,
1,fgm,1.0,,1.0,1.0,1.0,
2,hsj,1.0,,1.0,1.0,1.0,
3,noise,0.818182,0.0,1.0,0.818182,0.9,0.181818
4,per,0.939394,0.0,1.0,0.939394,0.96875,0.060606
5,pgd,1.0,,1.0,1.0,1.0,
6,zoo,0.939394,0.0,1.0,0.939394,0.96875,0.060606


In [95]:
one_attack_out_rf[["bacc", "precision", "recall", "f1", "fnr"]].agg(['mean', 'std'])

Unnamed: 0,bacc,precision,recall,f1,fnr
mean,0.95671,1.0,0.95671,0.976786,0.10101
std,0.067436,0.0,0.067436,0.036925,0.069982


In [96]:
format_mean_std(one_attack_out_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.96 (0.07) & 1.00 (0.00) & 0.96 (0.07) & 0.98 (0.04) & 0.10 (0.07) \\
\bottomrule
\end{tabular}



## Save data

In [97]:
def preprocess_output(df, scenario, mod):
    df["scenario"] = scenario
    df["model_class"] = mod

    return df

one_attack_out_rf = preprocess_output(one_attack_out_rf, "one-attack-out", "RF")
one_attack_out_xgb = preprocess_output(one_attack_out_xgb, "one-attack-out", "XGB")
one_dataset_out_rf = preprocess_output(one_dataset_out_rf, "one-data-set-out", "RF")
one_dataset_out_xgb = preprocess_output(one_dataset_out_xgb, "one-data-set-out", "XGB")
one_model_out_rf = preprocess_output(one_model_out_rf, "one-model-out", "RF")
one_model_out_xgb = preprocess_output(one_model_out_xgb, "one-model-out", "XGB")
cv_rf = preprocess_output(cv_rf, "10-fold cross-validation", "RF")
cv_xgb = preprocess_output(cv_xgb, "10-fold cross-validation", "XGB")

all_bacc = pd.concat([one_attack_out_rf, one_attack_out_xgb,
                      one_dataset_out_rf, one_dataset_out_xgb,
                      one_model_out_rf, one_model_out_xgb,
                      cv_rf, cv_xgb])

In [98]:
all_bacc.to_csv("results/detection_bacc_with_bacc.csv", index=False)

In [99]:
one_attack_out_fi_rf = preprocess_output(one_attack_out_fi_rf, "one-attack-out", "RF")
one_attack_out_fi_xgb = preprocess_output(one_attack_out_fi_xgb, "one-attack-out", "XGB")
one_dataset_out_fi_rf = preprocess_output(one_dataset_out_fi_rf, "one-data-set-out", "RF")
one_dataset_out_fi_xgb = preprocess_output(one_dataset_out_fi_xgb, "one-data-set-out", "XGB")
one_model_out_fi_rf = preprocess_output(one_model_out_fi_rf, "one-model-out", "RF")
one_model_out_fi_xgb = preprocess_output(one_model_out_fi_xgb, "one-model-out", "XGB")
cv_fi_rf = preprocess_output(cv_fi_rf, "10-fold cross-validation", "RF")
cv_fi_xgb = preprocess_output(cv_fi_xgb, "10-fold cross-validation", "XGB")

all_fi = pd.concat([one_attack_out_fi_rf, one_attack_out_fi_xgb,
                      one_dataset_out_fi_rf, one_dataset_out_fi_xgb,
                      one_model_out_fi_rf, one_model_out_fi_xgb,
                      cv_fi_rf, cv_fi_xgb])

all_fi.to_csv("results/detection_fi_without_bacc.csv", index=False)