## Day 6

In [1]:
import numpy as np
import torch


In [2]:
def parallel_db(db, remove_entry):
    return torch.cat((db[:remove_entry], db[remove_entry + 1:]))
    

In [3]:
# parallel_db(db, remove_entry).shape

In [4]:
def get_parallel_dbs(db):
    
    parallel_dbs = []

    for i in range(len(db)):
        parallel_dbs.append((parallel_db(db, i)))
        
    return parallel_dbs

In [5]:
 def create_db_and_parallel(num_entries):
#         num_entries = 5000
        db = torch.randn(num_entries) > 0.5
        
        pdbs = get_parallel_dbs(db)
        
        return db, pdbs
    

In [6]:
def query(db):
#     return db.sum()
    return db.float().mean() #Gives emperical sensitivity

> Find difference between db.sum() and sum(db)

In [7]:
def sensitivity_fn(query, num_entries = 500):
    
    db, pdbs = create_db_and_parallel(num_entries)
    
    full_db_result = query(db)
    print(full_db_result)
    
    sensitivity = 0

    for pdb in pdbs:
        pdb_result = query(pdb)
        db_distance = torch.abs(pdb_result - full_db_result)

        if (db_distance > sensitivity):
            sensitivity = db_distance
            
    return sensitivity

In [8]:
sensitivity_fn(query)

tensor(0.3000)


tensor(0.0014)

In [9]:
# while removing/ calculating parallel db, each entry corresponds to the unique person... 

In [10]:
def query(db, threshold=5):
    return (db.sum() > threshold).float() #L1 sensitivity

In [11]:
db, queries = create_db_and_parallel(10)
print(query(db), db.sum())

tensor(0.) tensor(5)


In [12]:
for i in range(10):
    sens_f = sensitivity_fn(query, num_entries = 100)
    print(sens_f)

tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0
tensor(1.)
0


# perform defferencing attack on row 10

In [13]:
db, _ = create_db_and_parallel(100)
print(db, db[10])

tensor([0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0,
        1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
        1, 0, 0, 0], dtype=torch.uint8) tensor(0, dtype=torch.uint8)


In [14]:
pdb = parallel_db(db, 10)

In [15]:
db[10]

tensor(0, dtype=torch.uint8)

In [16]:
#differencing attack using sum
db.sum() - pdb.sum()

tensor(0)

In [17]:
#differencing attack using mean

(sum(db).float() / len(db)) - (sum(pdb).float() / len(pdb))

tensor(-0.0025)

# local differential privacy adds noise to input of query function and global adds noise to output of query function

In [18]:
def query(db, noise = 2):

    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) > 0.5).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    db_result = torch.mean(augmented_database.float()) * noise - 0.5
    
    return db_result, true_result

In [19]:
db, pdbs = create_db_and_parallel(10)
private_result, true_result = query(db, 2.5)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.)
Without Noise:tensor(0.4000)


In [20]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.3800)
Without Noise:tensor(0.3800)


In [21]:
db, pdbs = create_db_and_parallel(5000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.3168)
Without Noise:tensor(0.3118)


In [36]:
db, pdbs = create_db_and_parallel(20000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4509)
Without Noise:tensor(0.3133)


In [23]:
#varying the amount of noise
def query(db, noise = 0.2):

    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) > noise).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    db_result = torch.mean(augmented_database.float()) * noise - 0.5
    
    return db_result, true_result

In [31]:
def query(db, noise=0.2):
    
    true_result = torch.mean(db.float())

    first_coin_flip = (torch.rand(len(db)) < noise).float()
    second_coin_flip = (torch.rand(len(db)) < 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    sk_result = augmented_database.float().mean()

    private_result = ((sk_result / noise) - 0.5) * noise / (1 - noise)

    return private_result, true_result

In [32]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db, .1)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4111)
Without Noise:tensor(0.2900)


In [33]:
db, pdbs = create_db_and_parallel(100)
private_result, true_result = query(db, .4)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4167)
Without Noise:tensor(0.3600)


In [41]:
for i in range(3):
    db, pdbs = create_db_and_parallel(1000)
    private_result, true_result = query(db, 0.8)
    print("With Noise:" + str(private_result))
    print("Without Noise:" + str(true_result))

With Noise:tensor(-0.3950)
Without Noise:tensor(0.2890)
With Noise:tensor(-0.3050)
Without Noise:tensor(0.3060)
With Noise:tensor(-0.1950)
Without Noise:tensor(0.3160)


In [42]:
#larger the data more the chance of privacy preserve