In [None]:
import logging
import numpy as np
import pandas as pd

import sdgym
from sdgym import benchmark
from sdgym import load_dataset
from sdgym.synthesizers.base import BaseSynthesizer
from sdgym.synthesizers import (
    CLBNSynthesizer, CTGANSynthesizer, IdentitySynthesizer, IndependentSynthesizer,
    MedganSynthesizer, PrivBNSynthesizer, TableganSynthesizer, TVAESynthesizer,
    UniformSynthesizer, VEEGANSynthesizer
)

from synthsonic.models.kde_utils import kde_smooth_peaks_1dim, kde_smooth_peaks
from synthsonic.models.kde_copula_nn_pdf import KDECopulaNNPdf

In [None]:
logging.basicConfig(level=logging.INFO)

In [None]:
dataset_name = 'adult'

In [None]:
data, categorical_columns, ordinal_columns = load_dataset(dataset_name)

In [None]:
data.shape

In [None]:
categorical_columns, ordinal_columns

# run sdgym

In [None]:
class KDECopulaNNPdf_Synthesizer(BaseSynthesizer):
        def __init__(self, iterations):
            self.random_state = 42

        def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
            self.categorical_columns = categorical_columns
            self.ordinal_columns = ordinal_columns
            xdata = np.float64(data)

            kde = KDECopulaNNPdf(
                use_KDE=False, 
                categorical_columns=categorical_columns + ordinal_columns,
                n_uniform_bins=30,
                n_calibration_bins=100,
                test_size=0.25,
                random_state=self.random_state,
            )
            self.kde = kde.fit(xdata)
    

        def sample(self, samples):
            X_gen = self.kde.sample_no_weights(
                samples,
                random_state=self.random_state + 10,
            )

            if len(self.categorical_columns) + len(self.ordinal_columns) > 0:
                X_gen[:, self.categorical_columns + self.ordinal_columns] = np.round(
                    X_gen[:, self.categorical_columns + self.ordinal_columns]
                )

            X_gen = np.float32(X_gen)
            return X_gen

In [None]:
all_synthesizers = [
    #IdentitySynthesizer,
    #IndependentSynthesizer,
    KDECopulaNNPdf_Synthesizer,
]

In [None]:
scores = sdgym.run(synthesizers=all_synthesizers, datasets=[dataset_name], iterations=1, cache_dir='.')

In [None]:
scores