# Differential Privacy (DP) Aggregate Seeded Synthesizer

> Example based on: https://github.com/microsoft/synthetic-data-showcase/blob/main/packages/lib-pacsynth/samples/dp_aggregate_seeded_short_example.ipynb

In [7]:
from snsynth.aggregate_seeded import \
    AggregateSeededSynthesizer, \
    AccuracyMode, \
    FabricationMode, \
    AggregateSeededDataset

from utils import gen_data_frame

## Generating an example data frame with random data

In [8]:
number_of_records_to_generate = 6000

sensitive_df = gen_data_frame(number_of_records_to_generate)

## Generating the synthetic data

In [9]:
reporting_length = 4

synth = AggregateSeededSynthesizer(
    reporting_length=reporting_length,
    epsilon=4.0,
    accuracy_mode=AccuracyMode.prioritize_long_combinations(),
    fabrication_mode=FabricationMode.uncontrolled(),
    use_synthetic_counts=True
)

synth.fit(sensitive_df)

synthetic_df = synth.sample(synth.get_dp_number_of_records())

## Generating/exporting aggregate data

This illustrates how to generate aggregates directly from the sensitive and synthetic data, as well as how to access the DP aggregates.

In [10]:
sensitive_aggregates = synth.get_sensitive_aggregates(';')

dp_aggregates = synth.get_dp_aggregates(';')

synthetic_aggregates = AggregateSeededDataset.from_data_frame(synthetic_df)

## Evaluating

In [11]:
sensitive_df.replace('', '0').astype('int').describe()

Unnamed: 0,H1,H2,H3,H4,H5,H6,H7,H8,H9,H10
count,6000.0,6000.0,6000.0,6000.0,6000.0,6000.0,6000.0,6000.0,6000.0,6000.0
mean,1.002833,2.651667,4.603167,0.495333,0.498667,0.501833,0.486333,0.5015,0.496167,0.5005
std,0.813594,2.116225,3.346275,0.50002,0.50004,0.500038,0.499855,0.500039,0.500027,0.500041
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,3.0,5.0,0.0,0.0,1.0,0.0,1.0,0.0,1.0
75%,2.0,5.0,8.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,2.0,6.0,10.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [12]:
synthetic_df.replace('', '0').astype('int').describe()

Unnamed: 0,H1,H2,H3,H4,H5,H6,H7,H8,H9,H10
count,6005.0,6005.0,6005.0,6005.0,6005.0,6005.0,6005.0,6005.0,6005.0,6005.0
mean,0.956536,2.538884,4.314738,0.453622,0.461282,0.463447,0.450291,0.474604,0.475437,0.461615
std,0.821883,2.139076,3.41693,0.497886,0.49854,0.498704,0.497564,0.499396,0.499438,0.498566
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,2.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
75%,2.0,4.0,7.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
max,2.0,6.0,10.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
