<a href="https://colab.research.google.com/github/myidispg/PytorchProjects/blob/master/Private%20AI/Differential_Privacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import numpy as np
import torch

In [3]:
db = torch.randn(5000) > 0.5
db

tensor([1, 0, 1,  ..., 1, 0, 1], dtype=torch.uint8)

In [4]:
def generate_pdb(db, index):
  
  return torch.cat((db[:index], db[index+1:]))



def get_parallel_dbs(db):
  
  pdbs = torch.zeros((len(db), len(db)-1))
  
  for i in range(len(db)):
    pdb = generate_pdb(db, i)
    pdbs[i] = pdb
    
  return pdbs
   
def get_db_pdbs(num_elems = 10):
  
  db = torch.randn(num_elems) > 0.5
  
  pdbs = get_parallel_dbs(db)
  
  return db, pdbs

db, pdbs = get_db_pdbs(5000)
print(db.shape)
print(pdbs.shape)

torch.Size([5000])
torch.Size([5000, 4999])


In [5]:
def query(db):
  return db.sum()

db, pdbs = get_db_pdbs(10)
query(db)

tensor(4)

In [6]:
def calculate_sensitivity(db, pdbs, query):
  
  db_result = query(db)
  max_result = 0
  
  for pdb in pdbs:
    pdb_result = query(pdb)
    
    db_distance = torch.abs(pdb_result - db_result)
    if db_distance > max_result:
      max_result = db_distance
  
  return max_result

print(f'The sensitivity of the database is: {calculate_sensitivity(db, pdbs, query)}')
  

The sensitivity of the database is: 1.0


In [7]:
def mean_query(db):
  return db.float().mean()

db, pdbs = get_db_pdbs(10)

print(f'The sensitivity of the database is: {calculate_sensitivity(db, pdbs, mean_query)}')

The sensitivity of the database is: 0.07777778804302216


### Threshold sensitvity

In [8]:
def threshold_query(db, threshold=5):
  return (db.sum() > threshold).float()

for i in range(10):
  db, pdb = get_db_pdbs(10)
  print(threshold_query(db, 5))


tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)
tensor(0.)


### Performing differencing attack

In [9]:
db, _ = get_db_pdbs(10)
pdb = generate_pdb(db, 9)

print('Performing differencing attacks...\nIf the results are not 0, then the value at index was 1.')
print(f'Using sum query: {query(db) - query(pdb)}')
print(f'Using mean query: {mean_query(db) - mean_query(pdb)}')
print(f'Using threshold query: {threshold_query(db, threshold=2) - threshold_query(pdb,threshold=2)}')

Performing differencing attacks...
If the results are not 0, then the value at index was 1.
Using sum query: 0
Using mean query: -0.04444444179534912
Using threshold query: 0.0


### Apply Local Differential Privacy

In [18]:
def randomize_db(db, noise=0.5):
  
  for i in range(len(db)):
    
    first_flip = np.random.rand(1) > noise
    
    if first_flip:
      pass
    else:
      db[i] = int(np.random.rand(1) > 0.5)

db, _ = get_db_pdbs(100)
print(f'Before randomizing:\n{mean_query(db)}')
randomize_db(db, noise=0.6)
print(f'\nAfter randomizing: \n{mean_query(db)}')

Before randomizing:
0.25

After randomizing: 
0.44999998807907104
