<a href="https://colab.research.google.com/github/shreyas269/programming/blob/master/differential_privacy/differential_privacy_basics.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import torch

num_entries = 5000

# Initializing a db of size num_entries with random 0-1 entries
db = torch.rand(num_entries) > 0.5

In [0]:
db[0:5]

tensor([False,  True,  True, False, False])

In [0]:
# Get an adjacent db which differ at a specific index
def get_parallel_db(db, remove_index):
  return torch.cat((db[0:remove_index], 
                   db[remove_index+1:]))

In [0]:
get_parallel_db(db, 2)

tensor([False,  True, False,  ..., False, False, False])

In [0]:
# Get a list of adjacent db's w.r.t a specific db
def get_parallel_dbs(db):
  parallel_dbs = list()

  for i in range(len(db)):
    pdb = get_parallel_db(db, i)
    parallel_dbs.append(pdb)

  return parallel_dbs

In [0]:
# Generates a random db of specified size along with its adjacent db's as well
def create_db_and_parallels(num_entries):
  db = torch.rand(num_entries) > 0.5
  pdbs = get_parallel_dbs(db)
  return db,pdbs

In [0]:
db, pdbs = create_db_and_parallels(20)

In [0]:
# Returns sum of elements in a db
def query(db):
  return db.sum()

In [0]:
query(db)

tensor(6)

In [0]:
full_db_result = query(db)

In [0]:
max_distance = 0

for pdb in pdbs:
  pdb_result = query(pdb)
  
  db_distance = torch.abs(pdb_result - full_db_result)
  if(db_distance > max_distance):
    max_distance = db_distance

In [0]:
max_distance

tensor(1)

In [0]:
# Returns sensitivity of a query on a randomly generated db
def sensitivity(query, n_entries = 100):
  db, pdbs = create_db_and_parallels(n_entries)

  full_db_result = query(db)

  max_distance = 0

  for pdb in pdbs:
    pdb_result = query(pdb)
    
    db_distance = torch.abs(pdb_result - full_db_result)
    if(db_distance > max_distance):
      max_distance = db_distance

  return max_distance

In [0]:
sensitivity(query)

tensor(1)

In [0]:
# Returns the mean of the elements in a db
def query_mean(db):
  return db.float().mean()

In [0]:
sensitivity(query_mean)

tensor(0.0056)

In [0]:
threshold = 5

# Threshold query which returns a bollean if the threshold is exceeded
def query_sum_threshold(db):
  return (db.sum() > threshold).float()

In [0]:
for i in range(10):
  sens_f = sensitivity(query_sum_threshold, n_entries = 10)
  print(sens_f)

0
tensor(1.)
0
0
0
0
0
0
0
0


In [0]:
# Provide a noisy answer with the randomized response mechanism on a db
def randomized_response(db):

  true_result = query_mean(db)

  first_coin_flip = (torch.rand(len(db)) > 0.5).float()
  second_coin_flip = (torch.rand(len(db)) > 0.5).float()

  skewed_db_result = db.float() * first_coin_flip + second_coin_flip.float() * (1 - first_coin_flip)
  noised_db_result = torch.mean(skewed_db_result.float()) * 2 - 0.5

  return noised_db_result, true_result

In [0]:
db, pdbs = create_db_and_parallels(10000)

private_result, true_result = randomized_response(db)

print("True result: ", true_result)
print("Private_result: ", private_result)

True result:  tensor(0.5020)
Private_result:  tensor(0.5054)


In [0]:
# Randomized response mechanism with biased coin flips on the answers of db
def randomized_response(db, p):

  true_result = query_mean(db)

  first_coin_flip = (torch.rand(len(db)) > 1-p).float()
  second_coin_flip = (torch.rand(len(db)) > 0.5).float()

  skewed_db_result = db.float() * first_coin_flip + second_coin_flip.float() * (1 - first_coin_flip)
  noised_db_result = torch.mean(skewed_db_result.float()) / p - 0.5 * (1-p) / p

  return noised_db_result, true_result

In [0]:
db, pdbs = create_db_and_parallels(10000)

first_coin_bias = 0.75

private_result, true_result = randomized_response(db, first_coin_bias)

print("True result: ", true_result)
print("Private_result: ", private_result)

True result:  tensor(0.5034)
Private_result:  tensor(0.5071)


In [0]:
epsilon = 0.5

import numpy as np

db ,pdbs = create_db_and_parallels(100)

In [0]:
# Provides noisy answer using the laplace mechanism
def laplacian_mechanism(db, query, sensitivity):
  beta = sensitivity / epsilon
  noise = torch.tensor(np.random.laplace(0, beta, 1))

  return query(db) + noise

In [0]:
print(query_mean(db))
print(laplacian_mechanism(db, query_mean, 1/100))

tensor(0.4800)
tensor([0.4991], dtype=torch.float64)
