In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import random
import numpy as np
import pandas as pd
from sklearn import preprocessing
from utils.evaluation_framework import EvaluationFramework

random.seed(42)
os.environ["PYTHONHASHSEED"] = str(42)
np.random.seed(42)

### Data


In [3]:
# Load real data
df_real_data = pd.read_csv("./Data/Real/clean_data.csv")

d_synthetic_data = {}
d_synthetic_data["CTGAN"] = pd.read_csv(f"./Data/Synthetic/ctgan_data.csv")
d_synthetic_data["TVAE"] = pd.read_csv(f"./Data/Synthetic/tvae_data.csv")
d_synthetic_data["COP"] = pd.read_csv(f"./Data/Synthetic/cop_synthetic_data.csv")
d_synthetic_data["Gauss"] = pd.read_csv(f"./Data/Synthetic/gauss_data.csv")
d_synthetic_data["GReaT"] = pd.read_csv(f"./Data/Synthetic/great_data.csv")

# Get a list with categorical features' names
categorical_features = [
    feature for feature in df_real_data.select_dtypes(include="object")
]

### Pre-processing

In [4]:
# Create a dictionary to store the mappings for each feature
mappings = {}

# Iterate over each categorical feature to create the mappings
for feature in categorical_features:
    # Fit the LabelEncoder to the real data
    label_encoder = preprocessing.LabelEncoder()
    label_encoder.fit(df_real_data[feature])
    
    # Create the mapping dictionary
    mappings[feature] = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
    
    # Apply the mapping to the real data
    df_real_data[feature] = df_real_data[feature].map(mappings[feature])
    
    # Apply the mapping to each synthetic dataset
    for dataset in d_synthetic_data:
        d_synthetic_data[dataset][feature] = d_synthetic_data[dataset][feature].map(mappings[feature])

# In case there are any NaN values after mapping (for unseen labels in synthetic data), handle them accordingly
for feature in categorical_features:
    df_real_data[feature].fillna(-1, inplace=True)
    for dataset in d_synthetic_data:
        d_synthetic_data[dataset][feature].fillna(-1, inplace=True)


### Evaluation


In [5]:
evaluation = EvaluationFramework(df_real_data, d_synthetic_data, categorical_features, verbose=True)

In [6]:
# Wasserstein/Cramers-v test
score_wasserstein_cramers_v = evaluation.wasserstein_cramers_v_test()

# Novelty test
score_novelty = evaluation.novelty_test()

# Anomaly detection test
score_anomaly = evaluation.anomaly_detection()

# Domain classifier test
from sklearn.ensemble import RandomForestClassifier
xgbc = RandomForestClassifier(n_estimators=50, max_depth=5)
score_classification = evaluation.domain_classifier(model=xgbc, n_folds=5)

  cramers_v = np.sqrt(chi2_stat / (n * min(k - 1, r - 1)))
  cramers_v = np.sqrt(chi2_stat / (n * min(k - 1, r - 1)))
  cramers_v = np.sqrt(chi2_stat / (n * min(k - 1, r - 1)))
  cramers_v = np.sqrt(chi2_stat / (n * min(k - 1, r - 1)))
  cramers_v = np.sqrt(chi2_stat / (n * min(k - 1, r - 1)))
  cramers_v = np.sqrt(chi2_stat / (n * min(k - 1, r - 1)))
  cramers_v = np.sqrt(chi2_stat / (n * min(k - 1, r - 1)))


Wasserstein/Cramers-v test
--------------------------------------------------
GReaT score: 27.5500
TVAE score: 46.7000
Gauss score: 58.7500
CTGAN score: 59.1500
COP score: 60.3500


Novelty test
--------------------------------------------------
CTGAN score: 0.0000
TVAE score: 0.0000
COP score: 0.0000
Gauss score: 0.0000
GReaT score: 0.0864


Anomaly detection test
--------------------------------------------------
CTGAN score: 0.0000
TVAE score: 0.0000
COP score: 0.0000
Gauss score: 0.0000
GReaT score: 0.0001


Method: CTGAN
> (Train) AUC score: 100.00
> (Test) AUC score: 100.00
Method: TVAE
> (Train) AUC score: 100.00
> (Test) AUC score: 100.00
Method: COP
> (Train) AUC score: 100.00
> (Test) AUC score: 100.00
Method: Gauss
> (Train) AUC score: 100.00
> (Test) AUC score: 100.00
Method: GReaT
> (Train) AUC score: 100.00
> (Test) AUC score: 99.99


Domain classifier test
--------------------------------------------------
CTGAN score: 100.00
TVAE score: 100.00
COP score: 100.00
Gauss sc

In [7]:
# Statistical analysis results
Ranking = evaluation.get_synthesizers_ranking()
display(Ranking)

[INFO] H0: {All methods exhibited similar results with no statistical differences}
[INFO] FAR: 2.171 (p-value: 0.70426) - H0 is failed to be rejected)


Unnamed: 0,Methods,FAR,APV,Null hypothesis
0,TVAE,7.25,-,-
1,GReaT,9.25,0.000002,Rejected
2,Gauss,11.75,0.000004,Rejected
3,CTGAN,12.0,0.000009,Rejected
4,COP,12.25,0.0455,Rejected
