In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sdgym
from sdgym.synthesizers import UniformSynthesizer, DataIdentity
from sklearn.preprocessing import LabelEncoder
import sys
sys.path.append('..')
from ctabganplus.model.evaluation import get_utility_metrics

In [2]:
unf_syn = UniformSynthesizer()
data_id = DataIdentity()

### Get the benchmarking baseline

In [3]:
from sdgym import load_dataset

In [4]:
train_data, metadata = load_dataset(modality="single-table", dataset='bm_datasets/train_data/')

In [None]:
hpo_data, metadata = load_dataset(modality="single-table", dataset='bm_datasets/hpo_data/')

In [5]:
train_model_unf = unf_syn.get_trained_synthesizer(train_data, metadata)
train_sample_data_unf = unf_syn.sample_from_synthesizer(train_model_unf, train_data.shape[0])
train_model_id = data_id.get_trained_synthesizer(train_data, metadata)
train_sample_data_ind = data_id.sample_from_synthesizer(train_model_id, train_data.shape[0])

In [7]:
hpo_model_unf = unf_syn.get_trained_synthesizer(hpo_data, metadata)
hpo_sample_data_unf = unf_syn.sample_from_synthesizer(hpo_model_unf, hpo_data.shape[0])
hpo_model_id = data_id.get_trained_synthesizer(hpo_data, metadata)
hpo_sample_data_id = data_id.sample_from_synthesizer(hpo_model_id, hpo_data.shape[0])

### Get Baseline Metrics

In [6]:
test_data = pd.read_csv('bm_datasets/test_data/test_data.csv')

In [7]:
hpo_data = pd.read_csv('bm_datasets/hpo_data/hpo_data.csv')

In [8]:
real_data = pd.read_csv('bm_datasets/train_data/train_data.csv')

In [10]:
le_dict = {"attack_type": "le_attack_type", "label": "le_label", "proto": "le_proto", "tos": "le_tos"}
for c in le_dict.keys():
    le_dict[c] = LabelEncoder()
    test_data[c] = le_dict[c].fit_transform(test_data[c])
    hpo_data[c] = le_dict[c].fit_transform(hpo_data[c])
    train_sample_data_unf[c] = le_dict[c].fit_transform(train_sample_data_unf[c])
    train_sample_data_ind[c] = le_dict[c].fit_transform(train_sample_data_ind[c])
    #hpo_sample_data_unf[c] = le_dict[c].fit_transform(hpo_sample_data_unf[c])
    #hpo_sample_data_id[c] = le_dict[c].fit_transform(hpo_sample_data_id[c])
    real_data[c] = le_dict[c].fit_transform(real_data[c])

In [None]:
bm_id_results, cr_id = get_utility_metrics(hpo_data, test_data, hpo_sample_data_id, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

In [23]:
bm_id_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fake,65.516171,0.874539,0.612798,0.082403,0.000549,0.000835
Real,61.350125,0.851963,0.560196,0.081251,0.000562,0.000786
Difference,-4.166045,-0.022576,-0.052602,-0.001151,1.3e-05,-4.9e-05


In [12]:
bm_unf_results, cr_unf = get_utility_metrics(hpo_data, test_data, hpo_sample_data_unf, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [16]:
bm_unf_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Real,61.350125,0.851963,0.560196,0.081251,0.000562,0.000786
Difference,37.02826,0.348872,0.448018,0.00828,-0.000302,0.000256
Fake,24.321865,0.503091,0.112178,0.072971,0.000864,0.00053


In [14]:
hpo_data.attack_type.value_counts()

0    253721
2     64172
4      1983
1       169
3         5
Name: attack_type, dtype: int64

In [15]:
train_id_results, cr_id_train = get_utility_metrics(real_data, test_data, train_sample_data_ind, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [17]:
train_id_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fake,86.709319,0.959889,0.784177,0.057144,0.000332,0.000668
Real,83.348174,0.936337,0.735378,0.059556,0.000372,0.000633
Difference,-3.361145,-0.023552,-0.048798,0.002412,4.1e-05,-3.5e-05


In [18]:
train_unf_results, cr_unf_train = get_utility_metrics(real_data, test_data, train_sample_data_unf, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data
Model:  rf trained on fake data
Model:  mlp trained on fake data


In [19]:
train_unf_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Real,83.348174,0.936337,0.735378,0.059556,0.000372,0.000633
Difference,63.045393,0.424436,0.634904,-0.01044,-0.000497,0.00014
Fake,20.30278,0.511901,0.100475,0.069996,0.00087,0.000492


### Train and HPO+test data as test

In [11]:
test_hpo = pd.concat([hpo_data, test_data])
test_hpo.reset_index(drop=True, inplace=True)

In [12]:
test_hpo.drop_duplicates(inplace=True)
test_hpo.attack_type.value_counts()

0    515381
2    126913
4      4613
1       422
3        11
Name: attack_type, dtype: int64

In [13]:
le_dict = {"attack_type": "le_attack_type", "label": "le_label", "proto": "le_proto", "tos": "le_tos"}
for c in le_dict.keys():
    le_dict[c] = LabelEncoder()
    test_hpo[c] = le_dict[c].fit_transform(test_hpo[c])

In [None]:
train_id_hpo_test_results, cr_id_hpo_test = get_utility_metrics(real_data, test_hpo, train_sample_data_ind, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

In [21]:
train_id_hpo_test_results.drop(["Model"],axis=1).groupby(["Type"]).mean().sort_values(by="F1_Score", ascending=False).head(100)

Unnamed: 0_level_0,Acc,AUC,F1_Score,SE_Acc,SE_AUC,SE_F1
Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Fake,81.378509,0.925396,0.706223,0.047319,0.000314,0.000551
Real,79.632853,0.907248,0.633137,0.047282,0.00033,0.000544
Difference,-1.745656,-0.018148,-0.073086,-3.7e-05,1.6e-05,-7e-06


In [15]:
train_unf_hpo_test_results, cr_unf_hpo_test = get_utility_metrics(real_data, test_hpo, train_sample_data_unf, scaler="MinMax",type={"Classification":["xgb","lr","dt","rf","mlp"]})

Model:  xgb trained on real data
Model:  lr trained on real data
Model:  dt trained on real data
Model:  rf trained on real data
Model:  mlp trained on real data
Model:  xgb trained on fake data
Model:  lr trained on fake data
Model:  dt trained on fake data


KeyboardInterrupt: 