# Comparing our method with the SynthCity TVAE

Run using the forked version of `synthcity` on my GitHub. Installed via `pip install ".[all]"` in a virtual environment.

In [22]:
import pandas as pd
import sys
from synthcity.plugins import Plugins
import synthcity.logger as log
from synthcity.plugins.core.dataloader import GenericDataLoader
from synthcity.benchmark import Benchmarks

In [11]:
log.add(sink=sys.stderr, level="INFO")

In [12]:
df = pd.read_csv("../data/.nosync/support_pycox.csv")

In [13]:
Plugins(categories=["privacy"]).list()

['pategan', 'privbayes', 'decaf', 'adsgan', 'dpgan']

In [20]:
loader = GenericDataLoader(
    df,
    sensitive_features=list(df.columns),
)

In [21]:
loader.sensitive_features

['x0',
 'x1',
 'x2',
 'x3',
 'x4',
 'x5',
 'x6',
 'x7',
 'x8',
 'x9',
 'x10',
 'x11',
 'x12',
 'x13',
 'duration',
 'event']

In [23]:
score = Benchmarks.evaluate(
    [(f"test_eps_{eps}", "dpgan", {"epsilon": eps}) for eps in [0.1, 1, 10]],
    loader,
    synthetic_size=1000,
    repeats=2,
)

[2023-04-20T19:49:14.149360+0100][46773][INFO] Testcase : test_eps_0.1
[2023-04-20T19:49:14.149360+0100][46773][INFO] Testcase : test_eps_0.1
[2023-04-20T19:49:14.151301+0100][46773][INFO] [testcase] Experiment repeat: 0 task type: classification Train df hash = 3373739518293110150
[2023-04-20T19:49:14.151301+0100][46773][INFO] [testcase] Experiment repeat: 0 task type: classification Train df hash = 3373739518293110150
[2023-04-20T19:49:14.177436+0100][46773][INFO] Encoding x0 5528808244182675421
[2023-04-20T19:49:14.177436+0100][46773][INFO] Encoding x0 5528808244182675421
[2023-04-20T19:49:14.574660+0100][46773][INFO] Encoding x1 3545159164392961763
[2023-04-20T19:49:14.574660+0100][46773][INFO] Encoding x1 3545159164392961763
[2023-04-20T19:49:14.576940+0100][46773][INFO] Encoding x2 1054521425829868034
[2023-04-20T19:49:14.576940+0100][46773][INFO] Encoding x2 1054521425829868034
[2023-04-20T19:49:14.578836+0100][46773][INFO] Encoding x3 5733022348409430233
[2023-04-20T19:49:14.57

In [None]:
Benchmarks.print(score)

In [14]:
syn_model = Plugins().get("tvae")

In [15]:
syn_model.fit(df)

 40%|████      | 400/1000 [10:47<16:11,  1.62s/it]


<synthcity.plugins.generic.plugin_tvae.TVAEPlugin at 0x2c112ece0>

In [22]:
syn_df = syn_model.generate(len(df)).dataframe()
syn_df.to_csv("./support_pycox_synthetic.csv", index=False)
syn_df.to_pickle("./support_pycox_synthetic.pkl")

In [6]:
syn_model = Plugins().get("goggle")

In [7]:
syn_model.fit(df)

  8%|▊         | 84/1000 [45:17<8:13:49, 32.35s/it]


<synthcity.plugins.generic.plugin_goggle.GOGGLEPlugin at 0x2c0778130>

In [8]:
syn_df = syn_model.generate(len(df)).dataframe()
syn_df.to_csv("./support_pycox_synthetic.csv", index=False)
syn_df.to_pickle("./support_pycox_synthetic.pkl")