In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sdgym
from sdgym.synthesizers import UniformSynthesizer, DataIdentity
from sklearn.preprocessing import LabelEncoder
import sys
sys.path.append('..')
from ctabganplus.model.evaluation import get_utility_metrics

In [2]:
np.random.seed(42)

In [3]:
unf_syn = UniformSynthesizer()
data_id = DataIdentity()

### Get the benchmarking baseline

In [4]:
from sdgym import load_dataset

In [5]:
train_data, metadata = load_dataset(modality="single-table", dataset='bm_datasets/train_data/')

In [6]:
hpo_data, metadata = load_dataset(modality="single-table", dataset='bm_datasets/hpo_data/')

In [7]:
train_model_unf = unf_syn.get_trained_synthesizer(train_data, metadata)
train_sample_data_unf = unf_syn.sample_from_synthesizer(train_model_unf, train_data.shape[0])
train_model_id = data_id.get_trained_synthesizer(train_data, metadata)
train_sample_data_ind = data_id.sample_from_synthesizer(train_model_id, train_data.shape[0])

In [8]:
hpo_model_unf = unf_syn.get_trained_synthesizer(hpo_data, metadata)
hpo_sample_data_unf = unf_syn.sample_from_synthesizer(hpo_model_unf, hpo_data.shape[0])
hpo_model_id = data_id.get_trained_synthesizer(hpo_data, metadata)
hpo_sample_data_id = data_id.sample_from_synthesizer(hpo_model_id, hpo_data.shape[0])

### Get Baseline Metrics

In [9]:
test_data = pd.read_csv('bm_datasets/test_data/test_data.csv')

In [10]:
hpo_data = pd.read_csv('bm_datasets/hpo_data/hpo_data.csv')

In [11]:
real_data = pd.read_csv('bm_datasets/train_data/train_data.csv')

In [12]:
le_dict = {"attack_type": "le_attack_type", "label": "le_label", "proto": "le_proto", "tos": "le_tos"}
for c in le_dict.keys():
    le_dict[c] = LabelEncoder()
    test_data[c] = le_dict[c].fit_transform(test_data[c])
    hpo_data[c] = le_dict[c].fit_transform(hpo_data[c])
    train_sample_data_unf[c] = le_dict[c].fit_transform(train_sample_data_unf[c])
    train_sample_data_ind[c] = le_dict[c].fit_transform(train_sample_data_ind[c])
    hpo_sample_data_unf[c] = le_dict[c].fit_transform(hpo_sample_data_unf[c])
    hpo_sample_data_id[c] = le_dict[c].fit_transform(hpo_sample_data_id[c])
    real_data[c] = le_dict[c].fit_transform(real_data[c])

In [13]:
bm_id_results, cr_id = get_utility_metrics(hpo_data, test_data, hpo_sample_data_id, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [14]:
bm_id_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fake,65.548447,0.874391,0.612742,0.082394,0.000549,0.000835
Real,61.350125,0.851963,0.560196,0.081251,0.000562,0.000786
Difference,-4.198321,-0.022428,-0.052545,-0.001142,1.2e-05,-4.9e-05


In [15]:
bm_unf_results, cr_unf = get_utility_metrics(hpo_data, test_data, hpo_sample_data_unf, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [15]:
bm_unf_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Real,61.350125,0.851963,0.560196,0.081251,0.000562,0.000786
Difference,41.629886,0.396527,0.485659,0.013051,-0.000303,0.00037
Fake,19.72024,0.455436,0.074537,0.068201,0.000865,0.000416


In [16]:
hpo_data.attack_type.value_counts()

0    253721
2     64172
4      1983
1       169
3         5
Name: attack_type, dtype: int64

In [17]:
train_id_results, cr_id_train = get_utility_metrics(real_data, test_data, train_sample_data_ind, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [18]:
train_id_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fake,86.706537,0.959955,0.783915,0.057149,0.000331,0.000668
Real,83.390822,0.937712,0.736659,0.059537,0.000369,0.000634
Difference,-3.315715,-0.022243,-0.047255,0.002389,3.8e-05,-3.4e-05


In [19]:
train_unf_results, cr_unf_train = get_utility_metrics(real_data, test_data, train_sample_data_unf, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [20]:
train_unf_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Real,83.390822,0.937712,0.736659,0.059537,0.000369,0.000634
Difference,63.192901,0.413944,0.634805,-0.009508,-0.000503,0.000115
Fake,20.197921,0.523768,0.101854,0.069045,0.000872,0.000518


### Train and HPO+test data as test

In [21]:
test_hpo = pd.concat([hpo_data, test_data])
test_hpo.reset_index(drop=True, inplace=True)

In [22]:
test_hpo.drop_duplicates(inplace=True)
test_hpo.attack_type.value_counts()

0    515381
2    126913
4      4613
1       422
3        11
Name: attack_type, dtype: int64

In [23]:
le_dict = {"attack_type": "le_attack_type", "label": "le_label", "proto": "le_proto", "tos": "le_tos"}
for c in le_dict.keys():
    le_dict[c] = LabelEncoder()
    test_hpo[c] = le_dict[c].fit_transform(test_hpo[c])

In [24]:
train_id_hpo_test_results, cr_id_hpo_test = get_utility_metrics(real_data, test_hpo, train_sample_data_ind, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [25]:
train_id_hpo_test_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fake,81.375894,0.926241,0.705966,0.047321,0.000312,0.000551
Real,79.661273,0.908148,0.63422,0.047276,0.000329,0.000545
Difference,-1.71462,-0.018094,-0.071747,-4.5e-05,1.6e-05,-7e-06


In [26]:
train_unf_hpo_test_results, cr_unf_hpo_test = get_utility_metrics(real_data, test_hpo, train_sample_data_unf, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [27]:
train_unf_hpo_test_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Real,79.661273,0.908148,0.63422,0.047276,0.000329,0.000545
Difference,58.1696,0.37734,0.533521,-0.003591,-0.000291,0.000177
Fake,21.491673,0.530807,0.100699,0.050867,0.000619,0.000368
