In [9]:
import itertools

import luigi
import dill
import numpy as np
from scipy.stats import binom

from experiment_framework.helpers import AllAsymptotics, load_completed_AAs_into_dataframe
from empirical_privacy import one_bit_sum


In [2]:
n = 1
p = 0.9
ds = {
    'n_trials': n, 'prob_success': p, 'gen_distr_type': 'multidim_binom',
}
asys = {
    'gen_sample_kwargs'  : {'generate_in_batch': True,
                            'x_concatenator': 'numpy.vstack'
                           },
    'fitter'             : 'knn',
    'fitter_kwargs'      : {'neighbor_method': 'gyorfi'},
    'n_docs'                : 30,
    'n_trials_per_training_set_size': 10,
    'n_max'              : 2**12,
    'validation_set_size': 2**11,
    'p'                  : 0.9,
    't'                  : 0.01
}

All = AllAsymptotics(
        gen_sample_path='empirical_privacy.one_bit_sum.GenSampleOneBitSum',
        dataset_settings=ds,
        asymptotic_settings=asys)
luigi.build([All], local_scheduler=True, workers=4, log_level='ERROR')



True

In [3]:
AAs = All.requires()
DF = load_completed_AAs_into_dataframe(AAs)
n_docs = DF.doc_ind.nunique()
print(n_docs)

30


In [10]:
DF = DF[DF.n_max==DF.n_max.max()]

def binom_pmf(k, n, p):
    return binom(n, p).pmf(k)

def sd2(p1, p2, d):
    combos = itertools.product(*[range(2) for _ in range(d)])
    rv = 0
    nk=0
    for combo in combos:
        k = np.sum(combo)
        nk+=1
        prob1 = binom_pmf(k, d, p1)
        prob2 = binom_pmf(k, d, p2)
        delta = abs(prob1 - prob2)
        rv += delta
    return 0.5 * rv / nk
pc = 0.5+0.5*sd2(0.9, 0.1, 3)

In [11]:
pc

0.58599999999999997

In [12]:
DF[DF['upper_bound']>=pc].shape

(30, 15)

In [13]:
DF

Unnamed: 0,confidence_interval_prob,confidence_interval_width,doc_ind,gen_distr_type,mean,median,n_bootstraps,n_max,n_trials,n_trials_per_training_set_size,p,prob_success,std,upper_bound,validation_set_size
0,0.9,0.01,0,multidim_binom,0.976572,0.976531,2879,4096,1,10,0.9,0.9,0.003075,0.986572,2048
1,0.9,0.01,1,multidim_binom,0.976731,0.976759,2879,4096,1,10,0.9,0.9,0.002111,0.986731,2048
2,0.9,0.01,2,multidim_binom,0.977695,0.977598,2879,4096,1,10,0.9,0.9,0.00238,0.987695,2048
3,0.9,0.01,3,multidim_binom,0.975275,0.97528,2879,4096,1,10,0.9,0.9,0.001779,0.985275,2048
4,0.9,0.01,4,multidim_binom,0.980127,0.980094,2879,4096,1,10,0.9,0.9,0.002465,0.990127,2048
5,0.9,0.01,5,multidim_binom,0.976092,0.975952,2879,4096,1,10,0.9,0.9,0.00142,0.986092,2048
6,0.9,0.01,6,multidim_binom,0.977725,0.977713,2879,4096,1,10,0.9,0.9,0.001546,0.987725,2048
7,0.9,0.01,7,multidim_binom,0.975342,0.975386,2879,4096,1,10,0.9,0.9,0.001597,0.985342,2048
8,0.9,0.01,8,multidim_binom,0.975743,0.975711,2879,4096,1,10,0.9,0.9,0.002134,0.985743,2048
9,0.9,0.01,9,multidim_binom,0.97426,0.97433,2879,4096,1,10,0.9,0.9,0.002063,0.98426,2048
