## Day 11

In [1]:
import numpy as np
import torch


In [2]:
def parallel_db(db, remove_entry):
    return torch.cat((db[:remove_entry], db[remove_entry + 1:]))
    

In [3]:
# parallel_db(db, remove_entry).shape

In [4]:
def get_parallel_dbs(db):
    
    parallel_dbs = []

    for i in range(len(db)):
        parallel_dbs.append((parallel_db(db, i)))
        
    return parallel_dbs

In [5]:
 def create_db_and_parallel(num_entries):
#         num_entries = 5000
        db = torch.randn(num_entries) > 0.5
        
        pdbs = get_parallel_dbs(db)
        
        return db, pdbs
    

In [6]:
def query(db):
#     return db.sum()
    return db.float().mean() #Gives emperical sensitivity

> Find difference between db.sum() and sum(db)

In [7]:
def sensitivity_fn(query, num_entries = 500):
    
    db, pdbs = create_db_and_parallel(num_entries)
    
    full_db_result = query(db)
    print(full_db_result)
    
    sensitivity = 0

    for pdb in pdbs:
        pdb_result = query(pdb)
        db_distance = torch.abs(pdb_result - full_db_result)

        if (db_distance > sensitivity):
            sensitivity = db_distance
            
    return sensitivity

In [8]:
sensitivity_fn(query)

tensor(0.3240)


tensor(0.0014)

In [9]:
# while removing/ calculating parallel db, each entry corresponds to the unique person... 

In [10]:
def query(db, threshold=5):
    return (db.sum() > threshold).float() #L1 sensitivity

In [11]:
db, queries = create_db_and_parallel(10)
print(query(db), db.sum())

tensor(0.) tensor(2)


In [12]:
for i in range(10):
    sens_f = sensitivity_fn(query, num_entries = 100)
    print(sens_f)

tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0


# perform defferencing attack on row 10

In [13]:
db, _ = create_db_and_parallel(100)
print(db, db[10])

tensor([0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 1,
        1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0,
        1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1,
        0, 0, 0, 0], dtype=torch.uint8) tensor(1, dtype=torch.uint8)


In [14]:
pdb = parallel_db(db, 10)

In [15]:
db[10]

tensor(1, dtype=torch.uint8)

In [16]:
#differencing attack using sum
db.sum() - pdb.sum()

tensor(1)

In [17]:
#differencing attack using mean

(sum(db).float() / len(db)) - (sum(pdb).float() / len(pdb))

tensor(0.0064)

# local differential privacy adds noise to input of query function and global adds noise to output of query function

In [18]:
def query(db, noise = 2):

    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) > 0.5).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    db_result = torch.mean(augmented_database.float()) * noise - 0.5
    
    return db_result, true_result

In [19]:
db, pdbs = create_db_and_parallel(10)
private_result, true_result = query(db, 2.5)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.)
Without Noise:tensor(0.2000)


In [20]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.0800)
Without Noise:tensor(0.1800)


In [21]:
db, pdbs = create_db_and_parallel(5000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.2896)
Without Noise:tensor(0.2914)


In [22]:
db, pdbs = create_db_and_parallel(20000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.3126)
Without Noise:tensor(0.3059)


In [23]:
#varying the amount of noise
def query(db, noise = 0.2):

    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) > noise).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    db_result = torch.mean(augmented_database.float()) * noise - 0.5
    
    return db_result, true_result

In [24]:
def query(db, noise=0.2):
    
    true_result = torch.mean(db.float())

    first_coin_flip = (torch.rand(len(db)) < noise).float()
    second_coin_flip = (torch.rand(len(db)) < 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    sk_result = augmented_database.float().mean()

    private_result = ((sk_result / noise) - 0.5) * noise / (1 - noise)

    return private_result, true_result

In [25]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db, .1)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4778)
Without Noise:tensor(0.4000)


In [26]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db, .4)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4167)
Without Noise:tensor(0.4100)


In [27]:
for i in range(3):
    db, pdbs = create_db_and_parallel(1000)
    private_result, true_result = query(db, 0.8)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

With Noise:tensor(-0.1900)
Without Noise:tensor(0.3150)
With Noise:tensor(-0.2300)
Without Noise:tensor(0.3310)
With Noise:tensor(-0.2300)
Without Noise:tensor(0.2980)


In [28]:
#larger the data more the chance of privacy preserve

In [29]:
epsilon = 0.0001

In [30]:
db, pdbs = create_db_and_parallel(1000)

In [31]:
def sum_query(db):
    return db.sum()

In [32]:
def laplacian_mechanism(db, query, sensitivity):
    
    beta = sensitivity / epsilon
    noise = torch.tensor(np.random.laplace(0, beta, 1))
    
    return query(db) + noise

In [33]:
def mean_query(db):
    return torch.mean(db.float())

In [34]:
laplacian_mechanism(db, sum_query, 1)

tensor([849.2267], dtype=torch.float64)

In [35]:
laplacian_mechanism(db, mean_query, 1/100)

tensor([205.8019], dtype=torch.float64)

# Deep Learning
## Perfect Privacy
> A query to database returns the same value even if we remove the person from the database.

In [36]:
import numpy as np

In [47]:
num_teachers = 10 # cuz we're working with 10 partners hospitals
num_examples = 1000 # size of our dataset
num_labels = 10 # number of labels for our classifier


In [48]:
preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int) # fake predictions

In [None]:
an_image = preds[:, 0]

In [51]:
new_labels = list()
for an_image in preds:
    label_counts = np.bincount(an_image, minlength = num_labels)
     
    epsilon = 0.1
    beta = 1 / epsilon

    for i in range(len(label_counts)):
        label_counts[i] += np.random.laplace(0, beta, 1)
        
    new_label = np.argmax(label_counts)
    new_labels.append(new_label)

In [52]:
new_labels

[1, 1, 9, 1, 8, 6, 5, 5, 1, 3]

# PATE Analysis

In [55]:
labels = np.array([9, 9, 3, 6, 9, 9, 9, 9, 8, 2])
counts = np.bincount(labels, minlength=10)
print(counts)
query_result = np.argmax(counts)
query_result

[0 0 1 1 0 0 1 0 1 6]


9

In [54]:
from syft.frameworks.torch.differential_privacy import pate



In [56]:
num_teachers, num_examples, num_labels = (100, 100, 10)
preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int) #fake preds
indices = (np.random.rand(num_examples) * num_labels).astype(int) # true answers

preds[:,0:10] *= 0

data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5)

assert data_dep_eps < data_ind_eps





In [57]:
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 11.756462732485115
Data Dependent Epsilon: 1.52655213289881


In [58]:
preds[:,0:50] *= 0

In [59]:
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5, moments=20)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 11.756462732485115
Data Dependent Epsilon: 0.9029013677789843
