### SynthEval Benchmark guide

This notebook demonstrates how SynthEval can be used in dataset benchmarks for bulk evaluation and internal ranking of synthetic tabular datasets. This guides the user in selecting between several instances of synthetic data, for example produced during hyperparameter search, or from different generative models. This is a practical use case that reflects a decision that an analyst using synthetic data generation methods would want to make.

This example is based on the "Hepatitis C Virus (HCV) for Egyptian patients" available from UCI (https://archive.ics.uci.edu/dataset/503).

In [1]:
### Imports
import pandas as pd 
from syntheval import SynthEval

### Access datasets
df_train = pd.read_csv('example/hepatitis_train.csv')
df_test  = pd.read_csv('example/hepatitis_test.csv')

SYN_PATH = 'example/ex_data_dir/'

In [2]:
### Dictionary of metric configuration, could practically be placed in a json file instead of taking up space in a script
metrics = {
    "corr_diff" : {"mixed_corr": True},
    "mi_diff"   : {},
    "ks_test"   : {"sig_lvl": 0.05, "do_permutation": True, "n_perms": 1000},
    "p_mse"     : {"k_folds": 5, "max_iter": 1000, "solver": "liblinear"},
    "cls_acc"   : {"F1_type": "micro", "k_folds": 5},
    "dcr"       : {},
    "eps_risk"  : {},
    "mia_risk"  : {"num_eval_iter": 5},
    "attr_discl_cats" : {}
}

In [3]:
class_cat_col = ['Gender','Fever','Nausea/Vomting','Headache','Diarrhea','Fatigue & generalized bone ache','Jaundice','Epigastric pain','WBC','RBC','Plat','RNA Base','RNA 4','RNA 12','RNA EOT','RNA EF','Baselinehistological staging']
predict_class = 'Baselinehistological staging'

SE = SynthEval(df_train, hold_out = df_test, cat_cols=class_cat_col)

df_vals, df_rank = SE.benchmark(SYN_PATH,predict_class,rank_strategy='linear',**metrics)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
SynthEval: benchmark: 100%|██████████| 11/11 [05:49<00:00, 31.77s/it]


In [4]:
df_vals

metric,corr_mat_diff,mutual_inf_diff,avg_ks_stat,frac_ks_sigs,avg_pMSE,cls_F1_diff,cls_F1_diff_hout,median_DCR,eps_identif_risk,mia_cls_risk,att_discl_risk,rank,u_rank,p_rank
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
a_hepatitis_sampling_baseline,2.466638,2.367049,0.017103,0.0,0.0,0.018939,0.030303,1.135284,0.279523,1.0,0.455522,7.149517,5.335021,1.814496
b_hepatitis_smote_baseline,0.410143,2.973446,0.068139,0.448276,0.0,0.019774,0.012987,0.819172,0.527627,0.693506,0.602309,5.505258,5.167995,0.337263
c_hepatitis_daib_baseline,2.071261,2.751713,0.213072,0.931034,0.0,0.185598,0.183442,1.283728,0.049837,0.0,0.396779,5.264583,1.515381,3.749202
d_hepatitis_datasynthesizer_syn,0.404462,0.169534,0.019763,0.068966,0.002616,0.023258,0.010823,0.973568,0.513543,1.0,0.578679,7.259431,6.775168,0.484263
e_hepatitis_synthpop_syn_best,0.440553,0.263388,0.017622,0.0,0.00065,0.026771,0.021104,0.969873,0.542795,1.0,0.586063,7.149371,6.767009,0.382362
f_hepatitis_CTGAN_syn_best,0.812515,1.87706,0.052095,0.37931,0.0,0.028428,0.035173,0.997596,0.508126,0.858874,0.564087,6.15175,5.402111,0.749639
g_hepatitis_ADSGAN_syn_best,0.830577,1.879755,0.046416,0.482759,0.0,0.035206,0.031926,0.969887,0.533044,0.844156,0.56358,5.961339,5.293937,0.667403
h_hepatitis_synthpop_1_syn,0.560022,0.198939,0.020006,0.0,0.000707,0.034965,0.022186,0.955216,0.539545,1.0,0.56689,7.140109,6.684555,0.455554
i_hepatitis_BN_7_syn,2.33441,2.219916,0.127864,0.413793,0.062103,0.016799,0.012446,1.580542,0.062839,0.203463,0.49424,6.833919,3.537947,3.295972
j_hepatitis_CTGAN_9_syn,2.459151,2.801471,0.06339,0.551724,0.0,0.006206,0.041667,1.13644,0.290358,0.889177,0.456978,6.256843,4.358979,1.897864


In [5]:
df_rank

metric,corr_mat_diff,mutual_inf_diff,avg_ks_stat,frac_ks_sigs,avg_pMSE,cls_F1_diff,cls_F1_diff_hout,median_DCR,eps_identif_risk,mia_cls_risk,att_discl_risk,rank,u_rank,p_rank
dataset,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
a_hepatitis_sampling_baseline,0.302587,0.216268,1.0,1.0,1.0,0.929018,0.887147,0.566241,0.534066,0.0,0.714189,7.149517,5.335021,1.814496
b_hepatitis_smote_baseline,0.998079,0.0,0.73957,0.518519,1.0,0.924367,0.987461,0.0,0.030769,0.306494,0.0,5.505258,5.167995,0.337263
c_hepatitis_daib_baseline,0.436301,0.07908,0.0,0.0,1.0,0.0,0.0,0.749202,1.0,1.0,1.0,5.264583,1.515381,3.749202
d_hepatitis_datasynthesizer_syn,1.0,1.0,0.986425,0.925926,0.957871,0.904946,1.0,0.309951,0.059341,0.0,0.114971,7.259431,6.775168,0.484263
e_hepatitis_synthpop_syn_best,0.987794,0.966528,0.997349,1.0,0.989539,0.88536,0.940439,0.303316,0.0,0.0,0.079046,7.149371,6.767009,0.382362
f_hepatitis_CTGAN_syn_best,0.861999,0.39102,0.821441,0.592593,1.0,0.876124,0.858934,0.352213,0.07033,0.141126,0.185971,6.15175,5.402111,0.749639
g_hepatitis_ADSGAN_syn_best,0.855891,0.390059,0.850418,0.481481,1.0,0.838344,0.877743,0.303342,0.01978,0.155844,0.188437,5.961339,5.293937,0.667403
h_hepatitis_synthpop_1_syn,0.947391,0.989513,0.985182,1.0,0.988613,0.839687,0.934169,0.276631,0.006593,0.0,0.17233,7.140109,6.684555,0.455554
i_hepatitis_BN_7_syn,0.347306,0.268742,0.4348,0.555556,0.0,0.940948,0.990596,1.0,0.973626,0.796537,0.525809,6.833919,3.537947,3.295972
j_hepatitis_CTGAN_9_syn,0.305119,0.061334,0.763801,0.407407,1.0,1.0,0.821317,0.567847,0.512088,0.110823,0.707107,6.256843,4.358979,1.897864
