# Adaptive and Iterative Mechanism for Differentially Private Synthetic Data

This method is designed for categorical data.

In [1]:
# stdlib
import warnings
import sys

warnings.filterwarnings("ignore")


# synthcity absolute
from synthcity.plugins import Plugins
from synthcity.utils.datasets.categorical.categorical_adult import CategoricalAdultDataloader
import synthcity.logger as log
log.add(sink=sys.stderr, level="INFO")

eval_plugin = "aim"

                  variable OMP_PATH to the location of the header before importing keopscore or pykeops,
                  e.g. using os.environ: import os; os.environ['OMP_PATH'] = '/path/to/omp/header'


### Load dataset

In [2]:
# synthcity absolute
from synthcity.plugins.core.dataloader import GenericDataLoader

X = CategoricalAdultDataloader().load()
loader = GenericDataLoader(X, target_column="income>50K", sensitive_columns=["sex", "race"])

loader.dataframe()

Unnamed: 0,age,workclass,fnlwgt,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,income>50K
0,23,5,4,12,2,8,3,0,1,2,0,39,0,0
1,34,1,4,12,0,4,2,0,1,0,0,12,0,0
2,22,0,13,8,1,6,3,0,1,0,0,39,0,0
3,37,0,15,6,0,6,2,4,1,0,0,39,0,0
4,12,0,22,12,0,5,0,4,0,0,0,39,12,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
48837,23,0,13,12,1,5,3,0,0,0,0,35,0,0
48838,48,8,20,8,4,14,4,4,1,0,0,39,0,0
48839,22,0,24,12,0,5,2,0,1,0,0,49,0,0
48840,28,0,4,12,1,8,1,1,1,5,0,39,0,0


### Train the generator

In [None]:
# synthcity absolute
from synthcity.plugins import Plugins

syn_model = Plugins().get(eval_plugin)


In [None]:

syn_model.fit(loader)

### Generate new samples

In [None]:
syn_model.generate(count=10).dataframe()

In [None]:
# third party
import matplotlib.pyplot as plt

syn_model.plot(plt, loader, count=100)

plt.show()

### Benchmarks

In [None]:
# synthcity absolute
from synthcity.benchmark import Benchmarks

score = Benchmarks.evaluate(
    [
        (eval_plugin, eval_plugin, {"epsilon": 1.0, "delta": 1e-7, "max_model_size": 80, "degree": 2, "num_marginals": None, "max_cells": 1000}),
    ],  # (testname, plugin, plugin_args) The plugin_args are given are simply to illustrate some of the paramters that can be passed to the plugin
    loader,
    repeats=2,
    metrics={
        "detection": ["detection_mlp"],
        "privacy": ["distinct l-diversity", "k-anonymization", "k-map"],
    },
)

In [None]:
Benchmarks.print(score)