In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import os
import random
import numpy as np
import pandas as pd
from sklearn import preprocessing
from utils.evaluation_framework import EvaluationFramework

random.seed(42)
os.environ["PYTHONHASHSEED"] = str(42)
np.random.seed(42)

### Data


In [3]:
# Load real data
df_real_data = pd.read_csv("./Data/Real/dataset.csv")

d_synthetic_data = {}
d_synthetic_data["Method1"] = pd.read_csv(f"./Data/Synthetic/Synthetic_Method1.csv")
d_synthetic_data["Method2"] = pd.read_csv(f"./Data/Synthetic/Synthetic_Method2.csv")
d_synthetic_data["Method3"] = pd.read_csv(f"./Data/Synthetic/Synthetic_Method3.csv")

# Get a list with categorical features' names
categorical_features = [
    feature for feature in df_real_data.select_dtypes(include="object")
]

### Pre-processing

In [4]:
# Label encoder
label_encoder = preprocessing.LabelEncoder()
for feature in categorical_features:
    # Real data
    df_real_data[feature] = label_encoder.fit_transform(df_real_data[feature])
    # Syntheric data
    for dataset in d_synthetic_data:
        d_synthetic_data[dataset][feature] = label_encoder.transform(
            d_synthetic_data[dataset][feature]
        )

### Evaluation


In [5]:
evaluation = EvaluationFramework(df_real_data, d_synthetic_data, categorical_features, verbose=True)

In [13]:
# Wasserstein/Cramers-v test
score_wasserstein_cramers_v = evaluation.wasserstein_cramers_v_test()

# Novelty test
score_novelty = evaluation.novelty_test()

# Anomaly detection test
score_anomaly = evaluation.anomaly_detection()

# Domain classifier test
from sklearn.ensemble import RandomForestClassifier
xgbc = RandomForestClassifier(n_estimators=50, max_depth=5)
score_classification = evaluation.domain_classifier(model=xgbc, n_folds=5)

Wasserstein/Cramers-v test
--------------------------------------------------
Method2 score: 18.0588
Method1 score: 29.6471
Method3 score: 30.2941


Anomaly detection test
--------------------------------------------------
Method1 score: 0.0090
Method2 score: 0.0060
Method3 score: 0.0030


Method: Method1
> (Train) AUC score: 99.20
> (Test) AUC score: 97.63
Method: Method2
> (Train) AUC score: 99.22
> (Test) AUC score: 96.77
Method: Method3
> (Train) AUC score: 99.06
> (Test) AUC score: 98.22


Domain classifier test
--------------------------------------------------
Method1 score: 97.63
Method2 score: 96.77
Method3 score: 98.22




In [14]:
# Statistical analysis results
Ranking = evaluation.get_synthesizers_ranking()
display(Ranking)

[INFO] H0: {All methods exhibited similar results with no statistical differences}
[INFO] FAR: 3.556 (p-value: 0.16901) - H0 is failed to be rejected)


Unnamed: 0,Methods,FAR,APV,Null hypothesis
0,Method2,2.333333,-,-
1,Method1,6.333333,0.000127,Rejected
2,Method3,6.333333,0.000127,Rejected
