## Differential Privacy Project

In [2]:
import torch

### Creating Randomized Database and set boolean value

In [3]:
DATABASE_LENGTH = 5000

database = torch.rand(DATABASE_LENGTH) > 0.5   # convert to boolean values

In [5]:
FIRST_INDEX = 0
database[FIRST_INDEX]

tensor(True)

In [6]:
database[2:10]

tensor([ True,  True, False, False,  True, False,  True,  True])

In [7]:
database.size()

torch.Size([5000])

### Build Parallel Database

In [8]:
def build_parallel_database(database, remove_index):
    """
    concatenate database before remove_index and from remove index
    """
    return torch.cat((database[0:remove_index],
                     database[remove_index+1:]))

REMOVE_INDEX = 1000

parallel_database = build_parallel_database(database, REMOVE_INDEX)
parallel_database



tensor([ True,  True,  True,  ..., False,  True,  True])

In [9]:
parallel_database.size()

torch.Size([4999])

### Build Multiple Databases

In [12]:
def build_parallel_databases(database):
    parallel_databases = list()
    
    for row in range(len(database)):
        one_database = build_parallel_database(database, row)   # build a database per row
        parallel_databases.append(one_database)

    return parallel_databases

parallel_databases = build_parallel_databases(database)
parallel_databases

[tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]),
 tensor([ True,  True,  True,  ..., False,  True,  True]

### Determine if Query Leaks Private Data

Evaluate the differential privacy

In [15]:
def sum_query(database):
    return database.sum()
initial_result = sum_query(database)

In [18]:
sum_query(parallel_databases[2])

tensor(2474)

#### Calculate Sensitivity of Query

measures how much the query changed

In [19]:
sensitivity = 0
sensitivity_scale = []

for parallel_database in parallel_databases:
    parallel_result = sum_query(parallel_database)
    distance = torch.abs(parallel_result - initial_result)

    if (distance > sensitivity):
        sensitivity_scale.append(distance)
        sensitivity = distance
sensitivity

tensor(1)

In [20]:
sensitivity_scale

[tensor(1)]

### Finding the Sensitivity of Avg Function

In [21]:
def average_query(database):
    return database.float().mean()

average_query(database)

tensor(0.4950)