In [1]:
import numpy as np
from scipy.special import erf
import os
import sys
import time
import copy
import logging
import matplotlib.pyplot as plt

# Navigate to the parent directory of the project structure
project_dir = os.path.abspath(os.path.join(os.getcwd(), '../../'))
src_dir = os.path.join(project_dir, 'src')
log_dir = os.path.join(project_dir, 'log')
fig_dir = os.path.join(project_dir, 'fig')

os.makedirs(log_dir, exist_ok=True)
os.makedirs(fig_dir, exist_ok=True)

# Add the src directory to sys.path
sys.path.append(src_dir)

from analysis.multivariate_gaussian_privacy import compute_gaussian_mechanism_privacy_over_query, standard_gaussian_BOC
from analysis.multivariate_gaussian_privacy import generate_sparse_linear_classifier_for_standard_gaussian_robust
from analysis.multivariate_gaussian_privacy import generate_sparse_linear_classifier_for_standard_gaussian
from mech.GaussianMechanism import GaussianSampleGenerator
from classifier.kNN import stack_samples
from utils.commons import accuracy_to_delta


In [2]:
dim = 50
query1 = np.zeros(dim)
query2 = np.zeros(dim)
query2[0] = 1
delta1 = 0.00001
sensitivity = np.linalg.norm(query2 - query1, ord=2)
epsilon = 1.0

gaussian_scale = 2*np.log(1.25/delta1)*np.power(sensitivity, 2)/np.power(epsilon, 2)    
sigma = np.sqrt(gaussian_scale)

claimed_epsilon = 0
compute_gaussian_mechanism_privacy_over_query(claimed_epsilon, sigma, query1, query2)


0.08219839698441395

In [3]:
def generate_default_configuration():
    epsilon = 1
    claimed_epsilon = 0
    gamma = 0.01

    dataset_settings = {
        'database_0': query1.tolist(),
        'database_1': query2.tolist(),
        'sensitivity': 1.,
        'epsilon': epsilon,
        'claimed_epsilon': claimed_epsilon,
        'delta': 0.00001
    }

    kwargs = {
        'dataset_settings': dataset_settings, 
        'gamma': gamma,
        'training_set_size': 10**6, 'validation_set_size': 10**6
    }
    return kwargs

kwargs = generate_default_configuration()
sample_generator = GaussianSampleGenerator(kwargs)

num_positive_samples = 1000000
num_negative_samples = 1000000

positive_samples = sample_generator.gen_samples(num_positive_samples, generate_positive_sample=True)
negative_samples = sample_generator.gen_samples(num_negative_samples, generate_positive_sample=False)
samples = stack_samples(positive_samples=positive_samples, negative_samples=negative_samples)

In [4]:
X = samples['X']
y = samples['y']

error = 0

for i in range(len(y)):
    error += np.abs(standard_gaussian_BOC(X[i], query1, query2) - y[i])

accuracy = 1 - error / len(y)

delta = accuracy_to_delta(accuracy, 0)

print(delta)

0.081793


In [5]:
training_positive_samples = sample_generator.gen_samples(num_positive_samples, generate_positive_sample=True)
training_negative_samples = sample_generator.gen_samples(num_negative_samples, generate_positive_sample=False)
training_samples = stack_samples(positive_samples=training_positive_samples, negative_samples=training_negative_samples)


In [6]:
sparse_linear_classifier = generate_sparse_linear_classifier_for_standard_gaussian_robust(training_samples)

X = training_samples['X']
y = training_samples['y']

error = 0

for i in range(len(y)):
    error += np.abs(sparse_linear_classifier(X[i]) - y[i])

accuracy = 1 - error / len(y)

delta = accuracy_to_delta(accuracy, 0)

print(delta)

0.08290599999999992
