In [42]:
import pandas as pd
import numpy as np
import copy
from typing import List

import xgboost as xgb
from tqdm import tqdm
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score, cohen_kappa_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.model_selection import StratifiedKFold

import warnings
warnings.filterwarnings("ignore")

In [43]:
d = pd.read_csv("results/attacks_diagnoses.csv")
d = d[(d["dataset"] != "mfeat-morphological") & (d["attack"] != "lpf")]

In [44]:
def multiclass_false_negative_rate(y_true: List[str], y_pred: List[str]) -> float:
    """
    Calculates the average False Negative Rate (FNR) across all classes in a
    multiclass classification.

    The False Negative Rate (FNR) for a class is the proportion of actual
    positive cases for that class that were incorrectly identified as negative.
    FNR = False Negatives / (False Negatives + True Positives)

    This function computes the FNR for each class and then returns the average
    of these values.

    Args:
        y_true (List[str]): A list of true labels.
        y_pred (List[str]): A list of predicted labels.

    Returns:
        float: The average False Negative Rate across all classes.
               Returns 0.0 if there are no classes with true positives.
    """
    if len(y_true) != len(y_pred):
        raise ValueError("Input arrays y_true and y_pred must have the same length.")

    unique_classes = np.unique(y_true)
    fnr_values = []

    for c in unique_classes:
        # Identify instances where the true label is the current class 'c'
        actual_positives = np.array(y_true) == c

        # Find true positives (y_true == c AND y_pred == c)
        true_positives = np.sum((np.array(y_pred) == c) & actual_positives)

        # Find false negatives (y_true == c AND y_pred != c)
        false_negatives = np.sum((np.array(y_pred) != c) & actual_positives)

        # Calculate FNR for the current class
        total_positives = true_positives + false_negatives
        if total_positives == 0:
            fnr = 0.0
        else:
            fnr = false_negatives / total_positives

        fnr_values.append(fnr)

    # Calculate the average FNR
    if not fnr_values:
        return 0.0
    return np.mean(fnr_values)

In [45]:
def q0(x: float) -> float:
    return x.quantile(0)


def q25(x: float) -> float:
    return x.quantile(0.25)


def q50(x: float) -> float:
    return x.quantile(0.5)


def q75(x: float) -> float:
    return x.quantile(0.75)


def q1(x: float) -> float:
    return x.quantile(1)


def minmax(x: float) -> float:
    return x.max() - x.min()

def false_negative_rate(y_true, y_pred):
    if confusion_matrix(y_true, y_pred).shape == (2, 2):
        tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
        return fn / (fn + tp)
    else:
        return np.nan

attrs_con = d
cols_to_drop = ["approx", "target", "pred", "error", "name",
                    "overall_mean_target", "scores",
                    "mean_target_in_neighborhood",
                    "mean_approx_in_neighborhood",
                    "neighborhood_size_div_model_avg",
                    "neighborhood_size_pct",
                    "r_centered_entropy",
                "entropy",
                    "logk_r_centered_entropy"]

attrs_cols = attrs_con.drop(columns=cols_to_drop)
attrs_agg = attrs_cols \
    .groupby(["dataset", "model", "attack", 
              "n_test", "n_classes"]) \
    .agg(['mean', q0, q25, q50, q75, q1, minmax])
attrs_agg_correct_cols = copy.deepcopy(attrs_agg)
attrs_agg_correct_cols.columns = list(attrs_agg_correct_cols.columns.map('_'.join))
attrs_agg_correct_cols = attrs_agg_correct_cols.reset_index()

In [46]:
d_nn = pd.read_csv("results/attacks_diagnoses_nn.csv")

attrs_cols = d_nn.drop(columns=cols_to_drop)
attrs_agg = attrs_cols \
    .groupby(["dataset", "model", "attack", 
              "n_test", "n_classes"]) \
    .agg(['mean', q0, q25, q50, q75, q1, minmax])
attrs_agg_correct_cols_nn = copy.deepcopy(attrs_agg)
attrs_agg_correct_cols_nn.columns = list(attrs_agg_correct_cols_nn.columns.map('_'.join))
attrs_agg_correct_cols_nn = attrs_agg_correct_cols_nn.reset_index()

In [47]:
attrs_agg_correct_cols = pd.concat([attrs_agg_correct_cols, attrs_agg_correct_cols_nn])

In [48]:
np.unique(attrs_agg_correct_cols["attack"], return_counts=True)

(array(['bim', 'fgm', 'hsj', 'noise', 'org', 'per', 'pgd', 'zoo'],
       dtype=object),
 array([22, 22, 64, 22, 88, 66, 22, 66]))

In [49]:
data_counts = attrs_agg_correct_cols[["dataset", "model", "attack"]].groupby(["dataset", "model"]).size().reset_index(name='counts')
data_counts

Unnamed: 0,dataset,model,counts
0,Bioresponse,lin,4
1,Bioresponse,nn,5
2,Bioresponse,svm,4
3,Bioresponse,xgb,4
4,churn,lin,4
...,...,...,...
83,wdbc,xgb,4
84,wilt,lin,4
85,wilt,nn,5
86,wilt,svm,4


In [50]:
data_counts_model_type = attrs_agg_correct_cols[["dataset", "model", "attack"]].groupby(["dataset", "attack"]).size().reset_index(name='counts')
data_counts_model_type

Unnamed: 0,dataset,attack,counts
0,Bioresponse,bim,1
1,Bioresponse,fgm,1
2,Bioresponse,hsj,3
3,Bioresponse,noise,1
4,Bioresponse,org,4
...,...,...,...
171,wilt,noise,1
172,wilt,org,4
173,wilt,per,3
174,wilt,pgd,1


In [51]:
le = LabelEncoder()
x_train = attrs_agg_correct_cols.drop(columns=["dataset", "model", "attack"])
y_train = attrs_agg_correct_cols["attack"]
y_train_enc = le.fit_transform(y_train)
model = RandomForestClassifier(random_state=123)
model.fit(x_train, y_train_enc)

0,1,2
,n_estimators,100
,criterion,'gini'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,'sqrt'
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


In [52]:
fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
fi_df = pd.DataFrame(fi_dict)
fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)

In [53]:
attrs_agg_correct_cols.to_csv("results/attr_attacks_type_agr_nn_bacc.csv", index=False)

In [54]:
def create_hyperparams_grid(model, param_grid, exp_function, crit="kappa"):

    quality_measure = []
    params_list = []
    results_table = []
    results_fi = []

    for param1 in param_grid[list(param_grid.keys())[0]]:
        for param2 in param_grid[list(param_grid.keys())[1]]:
            for param3 in param_grid[list(param_grid.keys())[2]]:
                params = {list(param_grid.keys())[0]: param1,
                          list(param_grid.keys())[1]: param2,
                          list(param_grid.keys())[2]: param3}

                model.set_params(**params)
                summary, fi = exp_function(model=model)
                results_table.append(summary)
                results_fi.append(fi)
                params_list.append(params)
                crit_value = np.mean(summary["kappa"])
                quality_measure.append(crit_value)
                print(f"{crit} = {crit_value}, params: {params}")

    best_params_idx = np.argmax(quality_measure)
    best_params = params_list[best_params_idx]

    print(f"Params: {best_params} give best {crit} equal {quality_measure[best_params_idx]}")

    return results_table[best_params_idx], results_fi[best_params_idx]

# Leave-one-data-set-out

In [55]:
def leave_dataset_out(model=xgb.XGBClassifier(random_state=123), params=None):

    datasets = np.unique(attrs_agg_correct_cols["dataset"])
    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    fi_all = pd.DataFrame()

    for dataset in tqdm(datasets):

        train = attrs_agg_correct_cols[attrs_agg_correct_cols["dataset"] != dataset]
        test = attrs_agg_correct_cols[attrs_agg_correct_cols["dataset"] == dataset]

        x_train = train.drop(columns=["dataset", "model", "attack"])
        y_train = train["attack"]

        x_test = test.drop(columns=["dataset", "model", "attack"])
        y_test = test["attack"]

        le = LabelEncoder()
        y_train_enc = le.fit_transform(y_train)
        y_test_enc = le.transform(y_test)

        if params is not None:
            model.set_params(**params)
        model.fit(x_train, y_train_enc)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["dataset"] = dataset
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test_enc, preds))
        kappa_list.append(cohen_kappa_score(y_test_enc, preds))
        precision_list.append(precision_score(y_test_enc, preds, average='weighted'))
        recall_list.append(recall_score(y_test_enc, preds, average='weighted'))
        f1_list.append(f1_score(y_test_enc, preds, average='weighted'))
        fnr_list.append(multiclass_false_negative_rate(y_test_enc, preds))

    results_dict = {'dataset': datasets, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}

    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

### Default parameters

In [56]:
one_dataset_out_xgb, one_dataset_out_fi_xgb = leave_dataset_out()
one_dataset_out_xgb

100%|██████████| 22/22 [00:12<00:00,  1.73it/s]


Unnamed: 0,dataset,bacc,kappa,precision,recall,f1,fnr
0,Bioresponse,0.833333,0.857143,0.862745,0.882353,0.858824,0.166667
1,churn,0.666667,0.719008,0.752941,0.764706,0.728431,0.333333
2,cmc,0.875,0.929752,0.911765,0.941176,0.921569,0.125
3,cnae-9,0.75,0.717842,0.717647,0.764706,0.730719,0.25
4,dna,0.541667,0.650206,0.747059,0.705882,0.70719,0.458333
5,har,0.791667,0.647303,0.6,0.705882,0.646125,0.208333
6,madelon,0.5,0.342466,0.384821,0.4375,0.344345,0.5
7,mfeat-factors,0.875,0.929752,0.911765,0.941176,0.921569,0.125
8,mfeat-fourier,0.916667,0.859504,0.882353,0.882353,0.882353,0.083333
9,mfeat-karhunen,1.0,1.0,1.0,1.0,1.0,0.0


### Hyperparameters tuning

In [57]:
retrain = False

if retrain:
    model=xgb.XGBClassifier(random_state=123)

    param_grid = {
        'max_depth': [6, 9, 12],
        'learning_rate': [0.1, 0.3, 0.5],
        'n_estimators': [100, 200, 500]
    }

    one_dataset_out_xgb, one_dataset_out_fi_xgb = create_hyperparams_grid(model=model, param_grid=param_grid, exp_function=leave_dataset_out, crit="kappa")
    one_dataset_out_xgb
else:
    # Params: {'max_depth': 6, 'learning_rate': 0.3, 'n_estimators': 500} give best kappa equal 0.7297931298863016
    params = {'max_depth': 6, 'learning_rate': 0.3, 'n_estimators': 500, 'random_state': 123}
    one_dataset_out_xgb, one_dataset_out_fi_xgb = leave_dataset_out(model=xgb.XGBClassifier(random_state=123), params=params)

100%|██████████| 22/22 [01:20<00:00,  3.67s/it]


In [58]:
one_dataset_out_xgb[["bacc", "kappa"]].agg(['mean', 'std'])

Unnamed: 0,bacc,kappa
mean,0.736742,0.729793
std,0.190349,0.201127


In [59]:
def format_mean_std(dataset):   
    metrics = ["bacc", "precision", "recall", "f1", "fnr"] 
    stats = dataset[metrics].agg(['mean', 'std']).round(2)
    formatted = {col: f"{stats.loc['mean',col]:.2f} ({stats.loc['std',col]:.2f})" for col in metrics}
    df = pd.DataFrame([formatted])
    print(df.to_latex(index=False, header=True, escape=False))

format_mean_std(one_dataset_out_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.74 (0.19) & 0.76 (0.18) & 0.77 (0.17) & 0.74 (0.19) & 0.26 (0.19) \\
\bottomrule
\end{tabular}



In [60]:
fi_agg = one_dataset_out_fi_xgb[["var", "fi_rank"]].groupby(["var"]).agg(['mean'])
fi_agg.columns = fi_agg.columns.to_flat_index()
fi_agg.columns = list(fi_agg.columns.map('_'.join))
fi_agg.nsmallest(n=20, columns=["fi_rank_mean"])

Unnamed: 0_level_0,fi_rank_mean
var,Unnamed: 1_level_1
uncertainty_q25,2.227273
bacc_test_mean,3.454545
uncertainty_minmax,4.318182
uncertainty_q75,5.954545
uncertainty_mean,7.5
target_targets_consistency_in_neighborhood_q75,7.863636
target_diversity_in_neighborhood_q1,9.318182
uncertainty_q0,9.318182
uncertainty_q50,10.0
target_approx_consistency_in_neighborhood_q25,12.636364


## Random forest

### Default parameters

In [61]:
one_dataset_out_rf, one_dataset_out_fi_rf = leave_dataset_out(model=RandomForestClassifier(random_state=123))
one_dataset_out_rf

100%|██████████| 22/22 [00:07<00:00,  3.07it/s]


Unnamed: 0,dataset,bacc,kappa,precision,recall,f1,fnr
0,Bioresponse,0.583333,0.571429,0.598039,0.647059,0.59888,0.416667
1,churn,0.666667,0.719008,0.752941,0.764706,0.728431,0.333333
2,cmc,1.0,1.0,1.0,1.0,1.0,0.0
3,cnae-9,0.708333,0.647303,0.776471,0.705882,0.660131,0.291667
4,dna,0.666667,0.717842,0.751961,0.764706,0.730999,0.333333
5,har,0.625,0.506224,0.464706,0.588235,0.513072,0.375
6,madelon,0.541667,0.30131,0.329861,0.375,0.274621,0.458333
7,mfeat-factors,0.875,0.929752,0.911765,0.941176,0.921569,0.125
8,mfeat-fourier,0.791667,0.789256,0.841176,0.823529,0.789216,0.208333
9,mfeat-karhunen,1.0,1.0,1.0,1.0,1.0,0.0


### Hyperparameters tuning

In [62]:
retrain = False

if retrain:

    model=RandomForestClassifier(random_state=123)

    param_grid = {
        'max_depth': [50, 80, 110],
        'min_samples_split': [2, 5, 8],
        'n_estimators': [100, 200, 500]
    }

    one_dataset_out_rf, one_dataset_out_fi_rf = create_hyperparams_grid(model=model, param_grid=param_grid, exp_function=leave_dataset_out, crit="kappa")
else:
    # Params: {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 500} give best kappa equal 0.7182473851325019
    params = {'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 500, 'random_state': 123}
    one_dataset_out_rf, one_dataset_out_fi_rf = leave_dataset_out(model=RandomForestClassifier(random_state=123), params=params)

100%|██████████| 22/22 [00:30<00:00,  1.38s/it]


In [63]:
one_dataset_out_rf[["bacc", "kappa"]].agg(['mean', 'std'])

Unnamed: 0,bacc,kappa
mean,0.74053,0.718247
std,0.187084,0.212155


In [64]:
format_mean_std(one_dataset_out_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.74 (0.19) & 0.74 (0.20) & 0.76 (0.18) & 0.72 (0.21) & 0.26 (0.19) \\
\bottomrule
\end{tabular}



In [65]:
fi_agg = one_dataset_out_fi_rf[["var", "fi_rank"]].groupby(["var"]).agg(['mean'])
fi_agg.columns = fi_agg.columns.to_flat_index()
fi_agg.columns = list(fi_agg.columns.map('_'.join))
fi_agg.nsmallest(n=20, columns=["fi_rank_mean"])

Unnamed: 0_level_0,fi_rank_mean
var,Unnamed: 1_level_1
bacc_test_q0,1.181818
bacc_test_q75,3.136364
bacc_test_mean,3.772727
bacc_test_q50,3.863636
bacc_test_q1,4.5
bacc_test_q25,4.545455
uncertainty_mean,7.227273
uncertainty_minmax,8.863636
uncertainty_q75,9.318182
uncertainty_q50,9.681818


# Leave-one-model-out

In [66]:
def leave_model_out(model=xgb.XGBClassifier(random_state=123)):

    models = np.unique(attrs_agg_correct_cols["model"])
    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    fi_all = pd.DataFrame()

    for selected_model in models:

        train = attrs_agg_correct_cols[attrs_agg_correct_cols["model"] != selected_model]
        test = attrs_agg_correct_cols[attrs_agg_correct_cols["model"] == selected_model]

        y = attrs_agg_correct_cols["attack"]

        x_train = train.drop(columns=["dataset", "model", "attack"])
        y_train = train["attack"]

        x_test = test.drop(columns=["dataset", "model", "attack"])
        y_test = test["attack"]

        le = LabelEncoder()
        # le.fit(y)
        y_train_enc = le.fit_transform(y_train)
        # y_test_enc = le.transform(y_test)

        model.fit(x_train, y_train_enc)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["model"] = selected_model
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test, le.inverse_transform(preds)))
        kappa_list.append(cohen_kappa_score(y_test, le.inverse_transform(preds)))
        precision_list.append(precision_score(y_test, le.inverse_transform(preds), average='weighted'))
        recall_list.append(recall_score(y_test, le.inverse_transform(preds), average='weighted'))
        f1_list.append(f1_score(y_test, le.inverse_transform(preds), average='weighted'))
        fnr_list.append(multiclass_false_negative_rate(y_test, le.inverse_transform(preds)))

    results_dict = {'model': models, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}

    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

In [67]:
one_model_out_xgb, one_model_out_fi_xgb = leave_model_out()
one_model_out_xgb

Unnamed: 0,model,bacc,kappa,precision,recall,f1,fnr
0,lin,0.829545,0.772727,0.840909,0.829545,0.831269,0.170455
1,nn,0.181818,0.14611,0.173913,0.181818,0.177778,0.818182
2,svm,0.738636,0.651515,0.75869,0.738636,0.737994,0.261364
3,xgb,0.55,0.412653,0.540335,0.55814,0.519806,0.45


In [68]:
one_model_out_xgb[["bacc", "kappa"]].agg(['mean', 'std'])

Unnamed: 0,bacc,kappa
mean,0.575,0.495751
std,0.286814,0.276967


In [69]:
format_mean_std(one_model_out_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.57 (0.29) & 0.58 (0.30) & 0.58 (0.29) & 0.57 (0.29) & 0.42 (0.29) \\
\bottomrule
\end{tabular}



## Random forest

In [70]:
one_model_out_rf, one_model_out_fi_rf = leave_model_out(model=RandomForestClassifier(random_state=123))
one_model_out_rf

Unnamed: 0,model,bacc,kappa,precision,recall,f1,fnr
0,lin,0.829545,0.772727,0.845014,0.829545,0.832216,0.170455
1,nn,0.190909,0.153992,0.175,0.190909,0.182609,0.809091
2,svm,0.772727,0.699248,0.803601,0.772727,0.781889,0.227273
3,xgb,0.630682,0.522393,0.689169,0.639535,0.616291,0.369318


In [71]:
one_model_out_rf[["bacc", "kappa"]].agg(['mean', 'std'])

Unnamed: 0,bacc,kappa
mean,0.605966,0.53709
std,0.289068,0.276164


In [72]:
format_mean_std(one_model_out_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.61 (0.29) & 0.63 (0.31) & 0.61 (0.29) & 0.60 (0.30) & 0.39 (0.29) \\
\bottomrule
\end{tabular}



# 10-fold cross validation

In [73]:
def cross_validation(model=xgb.XGBClassifier(random_state=123)):

    kappa_list = []
    bacc_list = []
    precision_list = []
    recall_list = []
    f1_list = []
    fnr_list = []
    iter_cv = []
    fi_all = pd.DataFrame()

    le = LabelEncoder()
    x = attrs_agg_correct_cols.drop(columns=["dataset", "model", "attack"])
    y = attrs_agg_correct_cols["attack"]
    y_enc = le.fit_transform(y)

    skf = StratifiedKFold(n_splits=10)
    cv = 0
    for train, test in skf.split(x, y_enc):

        x_train = x.iloc[train]
        y_train = y_enc[train]

        x_test = x.iloc[test]
        y_test = y_enc[test]

        model.fit(x_train, y_train)

        fi_dict = {'var': x_train.columns, 'fi': model.feature_importances_}
        fi_df = pd.DataFrame(fi_dict)
        fi_df["fi_rank"] = fi_df["fi"].rank(ascending=False)
        fi_df["cv"] = cv
        fi_all = pd.concat([fi_all, fi_df])

        preds = model.predict(x_test)
        bacc_list.append(balanced_accuracy_score(y_test, preds))
        kappa_list.append(cohen_kappa_score(y_test, preds))
        precision_list.append(precision_score(y_test, preds, average='weighted'))
        recall_list.append(recall_score(y_test, preds, average='weighted'))
        f1_list.append(f1_score(y_test, preds, average='weighted'))
        fnr_list.append(multiclass_false_negative_rate(y_test, preds))
        iter_cv.append(cv)
        cv = cv + 1

    results_dict = {'cv': iter_cv, 'bacc': bacc_list, 'kappa': kappa_list, 'precision': precision_list,
                    'recall': recall_list, 'f1': f1_list, 'fnr': fnr_list}
    results_df = pd.DataFrame(results_dict)
    return results_df, fi_all

## XGBoost

In [74]:
cv_xgb, cv_fi_xgb = cross_validation()
cv_xgb

Unnamed: 0,cv,bacc,kappa,precision,recall,f1,fnr
0,0,0.779762,0.810316,0.873604,0.842105,0.838712,0.220238
1,1,0.712302,0.655116,0.739035,0.710526,0.703538,0.287698
2,2,0.571429,0.545614,0.610401,0.621622,0.587688,0.428571
3,3,0.696429,0.772208,0.812162,0.810811,0.794852,0.303571
4,4,0.895833,0.935484,0.954955,0.945946,0.945946,0.104167
5,5,0.58631,0.675439,0.725676,0.72973,0.71564,0.41369
6,6,0.643849,0.581739,0.685907,0.648649,0.656269,0.356151
7,7,0.624008,0.611888,0.72973,0.675676,0.680657,0.375992
8,8,0.780754,0.773601,0.823423,0.810811,0.812654,0.219246
9,9,0.300595,0.384952,0.495946,0.486486,0.480566,0.699405


In [75]:
cv_xgb[["bacc", "kappa"]].agg(['mean', 'std'])

Unnamed: 0,bacc,kappa
mean,0.659127,0.674636
std,0.160762,0.15609


In [76]:
format_mean_std(cv_xgb)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.66 (0.16) & 0.75 (0.13) & 0.73 (0.13) & 0.72 (0.13) & 0.34 (0.16) \\
\bottomrule
\end{tabular}



## Random forest

In [77]:
cv_rf, cv_fi_rf = cross_validation(model=RandomForestClassifier(random_state=123))
cv_rf

Unnamed: 0,cv,bacc,kappa,precision,recall,f1,fnr
0,0,0.699405,0.683069,0.768049,0.736842,0.734589,0.300595
1,1,0.709325,0.68543,0.720395,0.736842,0.725641,0.290675
2,2,0.575893,0.548387,0.657248,0.621622,0.606514,0.424107
3,3,0.678571,0.739437,0.794895,0.783784,0.766818,0.321429
4,4,0.833333,0.903141,0.927928,0.918919,0.917117,0.166667
5,5,0.627976,0.707638,0.761776,0.756757,0.752396,0.372024
6,6,0.649802,0.583189,0.665073,0.648649,0.618406,0.350198
7,7,0.561508,0.57807,0.710425,0.648649,0.652244,0.438492
8,8,0.759921,0.741259,0.788288,0.783784,0.77973,0.240079
9,9,0.467262,0.449694,0.457138,0.540541,0.484217,0.532738


In [78]:
cv_rf[["bacc", "kappa"]].agg(['mean', 'std'])

Unnamed: 0,bacc,kappa
mean,0.6563,0.661931
std,0.105026,0.126816


In [79]:
format_mean_std(cv_rf)

\begin{tabular}{lllll}
\toprule
bacc & precision & recall & f1 & fnr \\
\midrule
0.66 (0.11) & 0.73 (0.12) & 0.72 (0.11) & 0.70 (0.12) & 0.34 (0.11) \\
\bottomrule
\end{tabular}



## Save data

In [80]:
def preprocess_output(df, scenario, mod):
    df["scenario"] = scenario
    df["model_class"] = mod

    return df

one_dataset_out_rf = preprocess_output(one_dataset_out_rf, "one-data-set-out", "RF")
one_dataset_out_xgb = preprocess_output(one_dataset_out_xgb, "one-data-set-out", "XGB")
one_model_out_rf = preprocess_output(one_model_out_rf, "one-model-out", "RF")
one_model_out_xgb = preprocess_output(one_model_out_xgb, "one-model-out", "XGB")
cv_rf = preprocess_output(cv_rf, "10-fold cross-validation", "RF")
cv_xgb = preprocess_output(cv_xgb, "10-fold cross-validation", "XGB")

all_bacc = pd.concat([one_dataset_out_rf, one_dataset_out_xgb,
                      one_model_out_rf, one_model_out_xgb,
                      cv_rf, cv_xgb])

In [81]:
all_bacc.to_csv("results/isolation_bacc_with_bacc.csv", index=False)

In [82]:
one_dataset_out_fi_rf = preprocess_output(one_dataset_out_fi_rf, "one-data-set-out", "RF")
one_dataset_out_fi_xgb = preprocess_output(one_dataset_out_fi_xgb, "one-data-set-out", "XGB")
one_model_out_fi_rf = preprocess_output(one_model_out_fi_rf, "one-model-out", "RF")
one_model_out_fi_xgb = preprocess_output(one_model_out_fi_xgb, "one-model-out", "XGB")
cv_fi_rf = preprocess_output(cv_fi_rf, "10-fold cross-validation", "RF")
cv_fi_xgb = preprocess_output(cv_fi_xgb, "10-fold cross-validation", "XGB")

all_fi = pd.concat([one_dataset_out_fi_rf, one_dataset_out_fi_xgb,
                      one_model_out_fi_rf, one_model_out_fi_xgb,
                      cv_fi_rf, cv_fi_xgb])

all_fi.to_csv("results/isolation_fi_with_bacc.csv", index=False)