## Day 7

In [30]:
import numpy as np
import torch


In [31]:
def parallel_db(db, remove_entry):
    return torch.cat((db[:remove_entry], db[remove_entry + 1:]))
    

In [32]:
# parallel_db(db, remove_entry).shape

In [33]:
def get_parallel_dbs(db):
    
    parallel_dbs = []

    for i in range(len(db)):
        parallel_dbs.append((parallel_db(db, i)))
        
    return parallel_dbs

In [34]:
 def create_db_and_parallel(num_entries):
#         num_entries = 5000
        db = torch.randn(num_entries) > 0.5
        
        pdbs = get_parallel_dbs(db)
        
        return db, pdbs
    

In [35]:
def query(db):
#     return db.sum()
    return db.float().mean() #Gives emperical sensitivity

> Find difference between db.sum() and sum(db)

In [36]:
def sensitivity_fn(query, num_entries = 500):
    
    db, pdbs = create_db_and_parallel(num_entries)
    
    full_db_result = query(db)
    print(full_db_result)
    
    sensitivity = 0

    for pdb in pdbs:
        pdb_result = query(pdb)
        db_distance = torch.abs(pdb_result - full_db_result)

        if (db_distance > sensitivity):
            sensitivity = db_distance
            
    return sensitivity

In [37]:
sensitivity_fn(query)

tensor(0.2860)


tensor(0.0014)

In [38]:
# while removing/ calculating parallel db, each entry corresponds to the unique person... 

In [39]:
def query(db, threshold=5):
    return (db.sum() > threshold).float() #L1 sensitivity

In [40]:
db, queries = create_db_and_parallel(10)
print(query(db), db.sum())

tensor(0.) tensor(1)


In [41]:
for i in range(10):
    sens_f = sensitivity_fn(query, num_entries = 100)
    print(sens_f)

tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0


# perform defferencing attack on row 10

In [42]:
db, _ = create_db_and_parallel(100)
print(db, db[10])

tensor([0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0,
        0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0,
        0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0,
        0, 0, 0, 0], dtype=torch.uint8) tensor(0, dtype=torch.uint8)


In [43]:
pdb = parallel_db(db, 10)

In [44]:
db[10]

tensor(0, dtype=torch.uint8)

In [45]:
#differencing attack using sum
db.sum() - pdb.sum()

tensor(0)

In [46]:
#differencing attack using mean

(sum(db).float() / len(db)) - (sum(pdb).float() / len(pdb))

tensor(-0.0034)

# local differential privacy adds noise to input of query function and global adds noise to output of query function

In [47]:
def query(db, noise = 2):

    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) > 0.5).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    db_result = torch.mean(augmented_database.float()) * noise - 0.5
    
    return db_result, true_result

In [48]:
db, pdbs = create_db_and_parallel(10)
private_result, true_result = query(db, 2.5)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.5000)
Without Noise:tensor(0.2000)


In [49]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.1600)
Without Noise:tensor(0.2800)


In [50]:
db, pdbs = create_db_and_parallel(5000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.3096)
Without Noise:tensor(0.3178)


In [51]:
db, pdbs = create_db_and_parallel(20000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.2985)
Without Noise:tensor(0.3076)


In [52]:
#varying the amount of noise
def query(db, noise = 0.2):

    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) > noise).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    db_result = torch.mean(augmented_database.float()) * noise - 0.5
    
    return db_result, true_result

In [53]:
def query(db, noise=0.2):
    
    true_result = torch.mean(db.float())

    first_coin_flip = (torch.rand(len(db)) < noise).float()
    second_coin_flip = (torch.rand(len(db)) < 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    sk_result = augmented_database.float().mean()

    private_result = ((sk_result / noise) - 0.5) * noise / (1 - noise)

    return private_result, true_result

In [54]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db, .1)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4333)
Without Noise:tensor(0.2600)


In [55]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db, .4)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4333)
Without Noise:tensor(0.3500)


In [56]:
for i in range(3):
    db, pdbs = create_db_and_parallel(1000)
    private_result, true_result = query(db, 0.8)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

With Noise:tensor(-0.2000)
Without Noise:tensor(0.3120)
With Noise:tensor(-0.3950)
Without Noise:tensor(0.2670)
With Noise:tensor(-0.2400)
Without Noise:tensor(0.3040)


In [57]:
#larger the data more the chance of privacy preserve

In [66]:
epsilon = 0.0001

In [71]:
db, pdbs = create_db_and_parallel(1000)

In [72]:
def sum_query(db):
    return db.sum()

In [73]:
def laplacian_mechanism(db, query, sensitivity):
    
    beta = sensitivity / epsilon
    noise = torch.tensor(np.random.laplace(0, beta, 1))
    
    return query(db) + noise

In [74]:
def mean_query(db):
    return torch.mean(db.float())

In [77]:
laplacian_mechanism(db, sum_query, 1)

tensor([14275.2942], dtype=torch.float64)

In [78]:
laplacian_mechanism(db, mean_query, 1/100)

tensor([-23.4976], dtype=torch.float64)

ModuleNotFoundError: No module named 'syft'