In [1]:
import numpy as np
from sklearn.mixture import GaussianMixture

from sdgym.constants import CONTINUOUS
from sdgym.synthesizers.base import BaseSynthesizer
from sdgym.synthesizers.utils import Transformer

class SampleSynthesizer(BaseSynthesizer):
    """
    Performs dumb perturbation
    """
    def __init__(self):
        self.div_param = 2

    def fit(self, data, categorical_columns=tuple(), ordinal_columns=tuple()):
        self.dtype = data.dtype
        self.meta = Transformer.get_metadata(data, categorical_columns, ordinal_columns)
        
        self.models = []
        for id_, info in enumerate(self.meta):
            nomial = np.bincount(data[:, id_].astype('int'), minlength=info['size'])
            nomial = nomial / np.sum(nomial)
            self.models.append(nomial)

    def sample(self, samples):
        data = np.zeros([samples, len(self.meta)], self.dtype)
        
        for i, info in enumerate(self.meta):
            size = len(self.models[i])
            data[:, i] = np.random.choice(np.arange(size), samples, p=self.models[i])
        return data

In [5]:
data = load_data("iris", .1)

categorical_columns = []
ordinal_columns = range(0,len(data))

synthesizer = CTGANSynthesizer()
synthesizer.fit(data, categorical_columns, ordinal_columns)
sampled = synthesizer.sample(3)
sampled

array([[32., 11., 21., 25.,  0.],
       [17., 29.,  2., 15., 20.],
       [17., 24.,  0., 35., 15.]])

In [1]:
from sdgym import benchmark
from mwem.mwem import MWEMSynthesizer

mwem_1 = MWEMSynthesizer()
# PrivBNSynthesizer is DP

scores_asia = benchmark(mwem_1.fit_sample, ["asia"], repeat=1)

mwem_2 = MWEMSynthesizer()
scores_grid = benchmark(mwem_2.fit_sample, ["grid"], repeat=1)

Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29
Iteration: 0
Iteration: 1
Iteration: 2
Iteration: 3
Iteration: 4
Iteration: 5
Iteration: 6
Iteration: 7
Iteration: 8
Iteration: 9
Iteration: 10
Iteration: 11
Iteration: 12
Iteration: 13
Iteration: 14
Iteration: 15
Iteration: 16
Iteration: 17
Iteration: 18
Iteration: 19
Iteration: 20
Iteration: 21
Iteration: 22
Iteration: 23
Iteration: 24
Iteration: 25
Iteration: 26
Iteration: 27
Iteration: 28
Iteration: 29


In [7]:
scores_asia

Unnamed: 0,name,syn_likelihood,test_likelihood,distance,dataset,iter
0,Bayesian Likelihood,-3.823316,-2.489065,0.0,asia,0
