### Imports and device selection

In [1]:
import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cpu')

### Generate Parallel Databases
"When querying a database, if I removed someone from the database, would the output of the query be any different?"
 Differential Privacy

In [2]:
def get_parallel_db(db, remove_index):
    return torch.cat((db[:remove_index],
                      db[remove_index+1:]))

def get_parallel_dbs(db):
    parallel_dbs = list()
    for i in range(len(db)):
        pdb = get_parallel_db(db, i)
        parallel_dbs.append(pdb)
    return parallel_dbs

def create_db_and_parallels(num_entries):
    db = torch.rand(num_entries) > 0.5
    return db, get_parallel_dbs(db)

In [51]:
db, pdbs = create_db_and_parallels(5000)

### Evaluating Privacy
Evaluating whether the output of a query changes when we remove someone from the database
* with default sum()
* with mean()
* with threshold 5

In [76]:
def query1(db):
    return db.sum()

def query2(db):
    return db.float().mean()

def query3(db, threshold=5):
    return (db.sum() > threshold).float()

In [None]:
def sensitivity(query, num_entries=10):
    db, pdbs = create_db_and_parallels(num_entries)
    full_db_result = query(db)
    sensitivity = 0
    for pdb in pdbs:
        pdb_result = query(pdb)
        db_distance = torch.abs(pdb_result - sensitivity)
        if db_distance > sensitivity:
            sensitivity = db_distance
    return sensitivity

In [89]:
db, pdbs = create_db_and_parallels(10)
db.sum()



# for _ in range(10):
#     print(f"db{_} sensativity "
#           f"sum(): {sensitivity(query1)}, "
#           f"mean(): {sensitivity(query2):.3f }, "
#           f"threshold: {sensitivity(query3)}")

tensor(6)

In [92]:
query2(db)


tensor(0.6000)

In [99]:
for i in range(10):
    print(sensitivity(query3, num_entries=10))


0
tensor(1.)
tensor(1.)
tensor(1.)
0
0
0
0
0
0


### Perform Differencing attack

In [107]:
db, _ = create_db_and_parallels(100)
pdbs = get_parallel_db(db, remove_index=10)
sum(db) - sum(pdbs)

tensor(1)

In [109]:
# diffetencing attac using sum()
query1(db) - query1(pdbs)


tensor(1)

In [110]:
# diffetencing attac using mean()
query2(db) - query2(pdbs)


tensor(0.0056)

In [112]:
# diffetencing attac using threshold
query3(db) - query3(pdbs)

tensor(0.)

### Implement Local DP
*Flip a coin 2 times.
*If the first coin flip is heads, answer honestly
*If the first coin flip is tails, answer according to the second coin flip (heads for yes, tails for no)!

In [5]:
def query(db):
    true_res = torch.mean(db.float())
    first_coin_flip = (torch.rand(len(db)) > 0.5).float()
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()
    augmented_db = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip
    db_result = torch.mean(augmented_db.float()) * 2 - 0.5

    return db_result, true_res

def query_with_noise(db , noise)

In [15]:
dbs, pdbs  = create_db_and_parallels(100)
private_result, true_result = query(dbs)
print(f"With noise: {str(private_result)}, without noise: {str(true_result)}")

With noise: tensor(0.5800), without noise: tensor(0.5100)


In [16]:
dbs, pdbs  = create_db_and_parallels(1000)
private_result, true_result = query(dbs)
print(f"With noise: {str(private_result)}, without noise: {str(true_result)}")


With noise: tensor(0.5360), without noise: tensor(0.5200)


In [17]:
dbs, pdbs  = create_db_and_parallels(10000)
private_result, true_result = query(dbs)
print(f"With noise: {str(private_result)}, without noise: {str(true_result)}")


With noise: tensor(0.5130), without noise: tensor(0.5006)


In [18]:
dbs, pdbs  = create_db_and_parallels(100000)
private_result, true_result = query(dbs)
print(f"With noise: {str(private_result)}, without noise: {str(true_result)}")


With noise: tensor(0.5004), without noise: tensor(0.5027)
