# **Section Project : Differential Privacy**

In [1]:
import torchvision.datasets as datasets
from torchvision import transforms
import torch
from torch.utils.data import Subset

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
#trainloader = torch.utils.data.DataLoader(mnist_trainset, batch_size=64, shuffle = True)

mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
#testloader = torch.utils.data.DataLoader(mnist_testset, batch_size=64, shuffle=True)


  0%|          | 16384/9912422 [00:00<01:27, 112479.71it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 28297833.11it/s]                           


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz


32768it [00:00, 439735.70it/s]
  1%|          | 16384/1648877 [00:00<00:11, 144991.87it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 6532134.40it/s]                           
8192it [00:00, 179908.05it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!


Private Datasets

In [0]:
num_teachers = 100
teacher_loaders = []
data_size = len(mnist_trainset) // num_teachers

for i in range(data_size):
  indices = list(range(i*data_size, (i+1)*data_size))
  subset_data = Subset(mnist_trainset, indices)
  loader = torch.utils.data.DataLoader(subset_data, batch_size=32, shuffle=True)
  teacher_loaders.append(loader)

Public Datasets

In [0]:
student_train_data = Subset(mnist_testset, list(range(9000)))
student_test_data = Subset(mnist_testset, list(range(9000, 10000)))

student_trainloader = torch.utils.data.DataLoader(student_train_data, batch_size=32, shuffle=True)
student_testloader = torch.utils.data.DataLoader(student_test_data, batch_size=32, shuffle=True)


Training

In [0]:
from torch import nn, optim
def fit(trainloader):
  model = nn.Sequential(nn.Linear(784,128),
                       nn.ReLU(),
                       nn.Dropout(p=0.2),
                       nn.Linear(128,64),
                       nn.ReLU(),
                       nn.Dropout(p=0.2),
                       nn.Linear(64,10),
                       nn.LogSoftmax(dim=1))
  
  criterion = nn.NLLLoss()
  optimizer = optim.SGD(model.parameters(), lr=0.003)
  
  epochs = 10
  for e in range(epochs):
    running_loss = 0
    accuracy =0
    model.train()
    for images, labels in trainloader:
      images = images.view(images.shape[0], -1)
      optimizer.zero_grad()
      logps = model(images)
      loss = criterion(logps, labels)
      loss.backward()
      optimizer.step()
      running_loss += loss.item()
      
      ps = torch.exp(logps)
      top_p, top_class = ps.topk(1, dim=1)
      equals = top_class == labels.view(*top_class.shape)
      accuracy += torch.mean(equals.type(torch.FloatTensor)).item()
      print(f"Epoch {e+1}/{epochs}.. "
          f"Training loss: {running_loss/len(trainloader)}"
           f"Training Accuracy: {accuracy/len(trainloader)}")
      
      
  return model
 


Testing

In [0]:
def predict(model, dataloader):
  outputs = torch.zeros(0, dtype=torch.long)
  model.eval
  
  for images, labels in dataloader:
    images = images.view(images.shape[0], -1)
    logps = model.forward(images)
    ps = torch.argmax(torch.exp(logps), dim=1)
    outputs = torch.cat((outputs, ps))
    
  return outputs

In [6]:
models = []

for i in range(num_teachers):
  print("For Teacher " + str(i))
  model = fit(teacher_loaders[i])
  models.append(model)

For Teacher 0
Epoch 1/10.. Training loss: 0.12104791089108116Training Accuracy: 0.009868421052631578
Epoch 1/10.. Training loss: 0.24140223703886332Training Accuracy: 0.013157894736842105
Epoch 1/10.. Training loss: 0.36169758595918355Training Accuracy: 0.02138157894736842
Epoch 1/10.. Training loss: 0.4839438388222142Training Accuracy: 0.02138157894736842
Epoch 1/10.. Training loss: 0.6068425304011295Training Accuracy: 0.023026315789473683
Epoch 1/10.. Training loss: 0.7284213116294459Training Accuracy: 0.029605263157894735
Epoch 1/10.. Training loss: 0.8483263191423918Training Accuracy: 0.039473684210526314
Epoch 1/10.. Training loss: 0.9702903596978438Training Accuracy: 0.044407894736842105
Epoch 1/10.. Training loss: 1.091093289224725Training Accuracy: 0.049342105263157895
Epoch 1/10.. Training loss: 1.2130902566407855Training Accuracy: 0.05592105263157895
Epoch 1/10.. Training loss: 1.334546816976447Training Accuracy: 0.06085526315789474
Epoch 1/10.. Training loss: 1.4554408224005

In [0]:
import numpy as np
epsilon = 0.2

def aggregated_teacher(models, dataloader, epsilon):
  preds = torch.torch.zeros((len(models), 9000), dtype=torch.long)
  for i, model in enumerate(models):
    results = predict(model, dataloader)
    preds[i] = results
    
  labels = np.array([]).astype(int)
  for image_preds in np.transpose(preds):
    label_counts = np.bincount(image_preds, minlength=10)
    beta = 1/epsilon
    
    for i in range(len(label_counts)):
      label_counts[i] += np.random.laplace(0, beta, 1)
      
    new_label = np.argmax(label_counts)
    labels = np.append(labels, new_label)
    
  return preds.numpy(), labels
    

In [0]:
teacher_models = models
preds, student_labels = aggregated_teacher(teacher_models, student_trainloader, epsilon)

In [0]:
def student_loader(student_train_loader, labels):
    for i, (data, _) in enumerate(iter(student_train_loader)):
        yield data, torch.from_numpy(labels[i*len(data): (i+1)*len(data)])

In [18]:
student_model = nn.Sequential(nn.Linear(784,128),
                       nn.ReLU(),
                       nn.Dropout(p=0.2),
                       nn.Linear(128,64),
                       nn.ReLU(),
                       nn.Dropout(p=0.2),
                       nn.Linear(64,10),
                       nn.LogSoftmax(dim=1))
  
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)
  
steps = 0
epochs = 10
for e in range(epochs):
  running_loss = 0
  accuracy =0
  model.train()
  train_loader = student_loader(student_trainloader, student_labels)
  for images, labels in train_loader:
    images = images.view(images.shape[0], -1)
    steps += 1
    optimizer.zero_grad()
    logps = model(images)
    loss = criterion(logps, labels)
    loss.backward()
    optimizer.step()
    running_loss += loss.item()
      
    if steps % 50 == 0:
      test_loss = 0
      accuracy = 0
      student_model.eval()
      with torch.no_grad():
        for images, labels in student_testloader:
          images = images.view(images.shape[0], -1)
          log_ps = student_model(images)
          test_loss += criterion(log_ps, labels).item()

           # Accuracy
          ps = torch.exp(log_ps)
          top_p, top_class = ps.topk(1, dim=1)
          equals = top_class == labels.view(*top_class.shape)
          accuracy += torch.mean(equals.type(torch.FloatTensor))
      student_model.train()
      print("Epoch: {}/{}.. ".format(e+1, epochs),
                    "Training Loss: {:.3f}.. ".format(running_loss/len(student_trainloader)),
                    "Test Loss: {:.3f}.. ".format(test_loss/len(student_testloader)),
                    "Test Accuracy: {:.3f}".format(accuracy/len(student_testloader)))
      running_loss = 0

Epoch: 1/10..  Training Loss: 0.197..  Test Loss: 2.336..  Test Accuracy: 0.131
Epoch: 1/10..  Training Loss: 0.210..  Test Loss: 2.336..  Test Accuracy: 0.128
Epoch: 1/10..  Training Loss: 0.190..  Test Loss: 2.335..  Test Accuracy: 0.134
Epoch: 1/10..  Training Loss: 0.213..  Test Loss: 2.337..  Test Accuracy: 0.128
Epoch: 1/10..  Training Loss: 0.214..  Test Loss: 2.337..  Test Accuracy: 0.125
Epoch: 2/10..  Training Loss: 0.068..  Test Loss: 2.334..  Test Accuracy: 0.125
Epoch: 2/10..  Training Loss: 0.198..  Test Loss: 2.336..  Test Accuracy: 0.131
Epoch: 2/10..  Training Loss: 0.205..  Test Loss: 2.335..  Test Accuracy: 0.134
Epoch: 2/10..  Training Loss: 0.196..  Test Loss: 2.337..  Test Accuracy: 0.125
Epoch: 2/10..  Training Loss: 0.212..  Test Loss: 2.334..  Test Accuracy: 0.134
Epoch: 2/10..  Training Loss: 0.214..  Test Loss: 2.336..  Test Accuracy: 0.131
Epoch: 3/10..  Training Loss: 0.140..  Test Loss: 2.338..  Test Accuracy: 0.125
Epoch: 3/10..  Training Loss: 0.206..  T

In [19]:
!pip install syft



In [21]:
from syft.frameworks.torch.differential_privacy import pate
preds[:,0:50] *= 0
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=student_labels, noise_eps=epsilon, delta=1e-5)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 1451.5129254649705
Data Dependent Epsilon: 1.4422683692548723
