## Benchmarking Smartnoise synthesizers with SDGym 

In [1]:
import numpy as np
import pandas as pd

import sdmetrics

from snsynth import MWEMSynthesizer
from snsynth.pytorch.nn import DPCTGAN, PATECTGAN
from snsynth.preprocessors import GeneralTransformer
from snsynth.pytorch import PytorchDPSynthesizer

import utils

In [2]:
df, df_non_continuous, sample_size = utils.retrieve_PUMS_data_categorical()

In [3]:
meta = utils.return_PUMS_metadata()

In [4]:
patectgan = PytorchDPSynthesizer(1.0, PATECTGAN(), None)
dpctgan = PytorchDPSynthesizer(1.0, DPCTGAN(), None)

dpctgan.fit(df_non_continuous, categorical_columns=['sex','educ','race','married'])
patectgan.fit(df_non_continuous, categorical_columns=['sex','educ','race','married'])

synth_data_dpctgan = dpctgan.sample(sample_size)
synth_data_patectgan = patectgan.sample(sample_size)

# Note that most SDMetrics metrics require a unique sample id. Here we add this
# back to the data after synthesizing.
synth_data_dpctgan['pid'] = df['pid']
synth_data_patectgan['pid'] = df['pid']
df_non_continuous['pid'] = df['pid']

data_real = {'pums': df_non_continuous}
data_synth_patectgan = {'pums': synth_data_patectgan}
data_synth_dpctgan = {'pums': synth_data_dpctgan}



Epoch 1, Loss G: 0.6701, Loss D: 1.3862
epsilon is 0.08429801659035999, alpha is 63.0
Epoch 2, Loss G: 0.6677, Loss D: 1.3897
epsilon is 0.8159572645684117, alpha is 17.0


### Benchmarking against other synthesizers

In [7]:
def patectgan_synthesizer_function(real_data, metadata):
    patectgan = PytorchDPSynthesizer(1.0, PATECTGAN(), None)
    patectgan.fit(real_data, categorical_columns=['sex','educ','race','married'])
    sample_size = len(real_data)
    synth_data_patectgan = patectgan.sample(sample_size)
    return synth_data_patectgan