### Experimenting with Differential Privacy and Bayesian Reasoning

This notebook explores how we can develop Bayesian priors when working with differential privacy. It is a work in progress!

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from math import e, exp, log, sqrt
from scipy.stats import norm
plt.style.use('ggplot')

np.random.seed(42)
%matplotlib inline

In [4]:
# Function definitions

gaussianSigmaAccuracy = 1e-3

def delta_for_gaussian(sigma, l0_sensitivity, linf_sensitivity, epsilon):
    l2_sensitivity = linf_sensitivity * sqrt(l0_sensitivity)
    a = l2_sensitivity / (2*sigma)
    b = epsilon * sigma / l2_sensitivity
    c = exp(epsilon)
    
    if np.isinf(c): 
        return 0
    if np.isinf(b):
        return 0
    return norm.cdf (a-b) - norm.cdf(-a-b)

def sigma_for_gaussian(l0_sensitivity, linf_sensitivity, epsilon, delta):
    if delta >=1:
        return 0
    
    l2_sensitivity = linf_sensitivity * sqrt(l0_sensitivity)
    upper_bound = l2_sensitivity
    
    while delta_for_gaussian(upper_bound, l0_sensitivity, linf_sensitivity, epsilon) > delta:
        lower_bound = upper_bound
        upper_bound = upper_bound * 2
    
    while upper_bound - lower_bound > gaussianSigmaAccuracy * lower_bound:
        middle = lower_bound * 0.5 + upper_bound * 0.5
        if delta_for_gaussian(middle, l0_sensitivity, linf_sensitivity, epsilon) > delta:
            lower_bound = middle
        else:
            upper_bound = middle
            
    return upper_bound

def gaussian_dp_mechanism(value, epsilon, delta, linf_sensitivity, l0_sensitivity=1):
    # Please do not use this function in real life - it is susceptible to well known attacks
    # instead, use a well-known and audited open-source DP library 
    orig_value = value
    gauss_scale = sigma_for_gaussian(l0_sensitivity, linf_sensitivity, epsilon, delta)
    value = np.random.normal(value, gauss_scale)
    print("Noise: {}".format(value - orig_value))
    return value

def laplace_dp_mechanism(value, epsilon, linf_sensitivity):
    # Please do not use this function in real life - it is susceptible to well known attacks
    # instead, use a well-known and audited open-source DP library 
    orig_value = value
    value =  np.random.laplace(value, linf_sensitivity/epsilon)
    #print("Noise: {}".format(value - orig_value))
    return value

In [6]:
mid_level_age = 45
mid_level_salary = 50000

age_scale = 10 #scale represents one standard deviation
salary_scale = 10000

salaries = [round(np.random.normal(mid_level_salary,salary_scale)) for _ in range(1000)]
ages = [round(np.random.normal(mid_level_age,age_scale)) for _ in range(1000)]

In [None]:
#TODO: run experiments!

In [None]:
#TODO: data chart on relative error

In [None]:
#TODO: Bayesian prior development