<a href="https://colab.research.google.com/github/karankishinani/Training-MNIST-using-Differential-Privacy-and-PATE-Analysis/blob/master/PATE_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Private Aggregation of Teacher Ensembles (PATE)

![title](pate.jpeg)

## Importing Libraries

In [1]:
!pip install syft
import torch
from torch.utils.data import Subset

from torchvision import datasets, transforms

import numpy as np

from syft.frameworks.torch.differential_privacy import pate

Collecting syft
[?25l  Downloading https://files.pythonhosted.org/packages/38/2e/16bdefc78eb089e1efa9704c33b8f76f035a30dc935bedd7cbb22f6dabaa/syft-0.1.21a1-py3-none-any.whl (219kB)
[K     |████████████████████████████████| 225kB 2.8MB/s 
[?25hCollecting websocket-client>=0.56.0 (from syft)
[?25l  Downloading https://files.pythonhosted.org/packages/29/19/44753eab1fdb50770ac69605527e8859468f3c0fd7dc5a76dd9c4dbd7906/websocket_client-0.56.0-py2.py3-none-any.whl (200kB)
[K     |████████████████████████████████| 204kB 41.2MB/s 
[?25hCollecting zstd>=1.4.0.0 (from syft)
[?25l  Downloading https://files.pythonhosted.org/packages/22/37/6a7ba746ebddbd6cd06de84367515d6bc239acd94fb3e0b1c85788176ca2/zstd-1.4.1.0.tar.gz (454kB)
[K     |████████████████████████████████| 460kB 48.3MB/s 
[?25hCollecting tf-encrypted>=0.5.4 (from syft)
[?25l  Downloading https://files.pythonhosted.org/packages/55/ff/7dbd5fc77fcec0df1798268a6b72a2ab0150b854761bc39c77d566798f0b/tf_encrypted-0.5.7-py3-none-manyli

W0726 02:07:57.878336 139943764256640 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0726 02:07:57.894934 139943764256640 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



## Loading the Dataset

In [2]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])

# Load the dataset
train_data = datasets.MNIST(root='./data', train=True, 
                                download=True, transform=transform)
test_data = datasets.MNIST(root='./data', train=False,
                                  download=True, transform=transform)

  0%|          | 16384/9912422 [00:00<01:10, 140100.30it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


9920512it [00:00, 31677725.80it/s]                          


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz


32768it [00:00, 452002.20it/s]
  1%|          | 16384/1648877 [00:00<00:11, 140918.22it/s]

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


1654784it [00:00, 7419475.59it/s]                           
8192it [00:00, 185271.65it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz
Processing...
Done!


## Create Data Loaders for each teacher

In [0]:
num_teachers = 100 # we're working with x teachers
num_examples = len(train_data) // num_teachers # the size of each teacher's dataset
num_workers = 0 # number of subprocesses for data loading
batch_size = 64 # number of samples per batch

# Split the data among all teachers
teacher_loaders = []

for i in range(num_teachers):
    indices = list(range(i * num_examples, (i+1) * num_examples))
    data = Subset(train_data, indices)
    loader = torch.utils.data.DataLoader(data, batch_size=batch_size, num_workers=num_workers)
    teacher_loaders.append(loader)

## Create Student Training and Test Set

In [0]:
student_train_size = int(len(test_data) * 0.1) # 90% of data for training
student_test_size = int(len(test_data) * 0.1)  # 10% of data for testing

student_train_data = Subset(test_data, list(range(student_train_size)))
student_test_data = Subset(test_data, list(range(student_train_size, student_train_size + student_test_size)))

student_train_loader = torch.utils.data.DataLoader(student_train_data, batch_size=batch_size, num_workers=num_workers)
student_test_loader = torch.utils.data.DataLoader(student_test_data, batch_size=batch_size, num_workers=num_workers)

## Defining the Neural Network model

In [0]:
from torch import nn
from torch import optim
import torch.nn.functional as F

'''
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(784, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 10)

    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        x = F.relu(x)
        x = F.log_softmax(x, dim=1)
        return x
'''
      
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)

In [0]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train(model, trainloader, criterion, optimizer, epochs=10):
    
    model.to(device)
    running_loss = 0
    
    for e in range(epochs):
        
        model.train()
        
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)

            # 1) erase previous gradients (if they exist)
            optimizer.zero_grad()

            # 2) make a prediction
            pred = model.forward(images)

            # 3) calculate how much we missed
            loss = criterion(pred, labels)

            # 4) figure out which weights caused us to miss
            loss.backward()

            # 5) change those weights
            optimizer.step()

            # 6) log our progress
            running_loss += loss.item()

In [0]:
def predict(model, dataloader):
    outputs = torch.zeros(0, dtype=torch.long).to(device)
    model.to(device)
    model.eval()
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        output = model.forward(images)
        ps = torch.argmax(torch.exp(output), dim=1)
        outputs = torch.cat((outputs, ps))
    
    return outputs

## Training the Teachers

In [8]:
teacher_models = []

for i in range(num_teachers):
    print("Training Teacher #", i+1)
    model = Net()
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    train(model, teacher_loaders[i], criterion, optimizer)
    teacher_models.append(model)

Training Teacher # 1




Training Teacher # 2
Training Teacher # 3
Training Teacher # 4
Training Teacher # 5
Training Teacher # 6
Training Teacher # 7
Training Teacher # 8
Training Teacher # 9
Training Teacher # 10
Training Teacher # 11
Training Teacher # 12
Training Teacher # 13
Training Teacher # 14
Training Teacher # 15
Training Teacher # 16
Training Teacher # 17
Training Teacher # 18
Training Teacher # 19
Training Teacher # 20
Training Teacher # 21
Training Teacher # 22
Training Teacher # 23
Training Teacher # 24
Training Teacher # 25
Training Teacher # 26
Training Teacher # 27
Training Teacher # 28
Training Teacher # 29
Training Teacher # 30
Training Teacher # 31
Training Teacher # 32
Training Teacher # 33
Training Teacher # 34
Training Teacher # 35
Training Teacher # 36
Training Teacher # 37
Training Teacher # 38
Training Teacher # 39
Training Teacher # 40
Training Teacher # 41
Training Teacher # 42
Training Teacher # 43
Training Teacher # 44
Training Teacher # 45
Training Teacher # 46
Training Teacher #

## Aggregate Teacher
Make a prediction for each of the teacher's models, add noise, and return the majority vote

In [0]:
# Define epsilon
epsilon = 0.2

In [10]:
preds = torch.torch.zeros((len(teacher_models), student_train_size), dtype=torch.long)

for i, model in enumerate(teacher_models):
    results = predict(model, student_train_loader)
    preds[i] = results

labels = np.array([]).astype(int)
for image_preds in np.transpose(preds):
    label_counts = np.bincount(image_preds, minlength=10)
    beta = 1 / epsilon

    for i in range(len(label_counts)):
        label_counts[i] += np.random.laplace(0, beta, 1)

    new_label = np.argmax(label_counts)
    labels = np.append(labels, new_label)



In [11]:
PATE_labels = labels
PATE_preds = preds

print(preds.shape)
print(labels.shape)
print(preds)
print(labels)

torch.Size([100, 1000])
(1000,)
tensor([[7, 2, 1,  ..., 0, 9, 9],
        [7, 6, 1,  ..., 0, 9, 9],
        [7, 2, 1,  ..., 0, 9, 9],
        ...,
        [7, 2, 1,  ..., 0, 9, 9],
        [7, 8, 1,  ..., 0, 7, 9],
        [7, 0, 1,  ..., 0, 7, 9]])
[7 2 1 0 4 1 4 9 6 9 0 6 9 0 1 5 9 7 3 4 9 6 6 5 4 0 7 4 0 1 3 1 3 6 7 2 7
 1 2 1 1 7 4 2 3 5 3 2 4 4 6 3 5 5 6 0 4 1 9 5 7 8 4 3 7 4 2 4 3 0 7 0 2 8
 1 7 3 7 9 7 9 6 2 7 8 4 7 3 6 1 3 6 4 3 1 4 1 7 6 9 6 0 5 4 9 9 2 1 9 4 8
 1 3 9 7 4 4 4 9 2 5 6 7 6 9 9 0 5 8 5 6 6 5 7 8 1 0 1 6 4 6 7 3 1 7 1 8 2
 0 4 9 9 5 5 1 5 6 0 3 4 4 6 5 4 6 5 4 4 1 4 4 7 2 3 2 1 1 8 1 8 1 8 5 0 8
 4 2 5 0 1 1 1 0 9 0 3 1 6 4 2 3 6 1 1 1 3 9 5 2 9 4 5 9 3 9 0 3 6 5 5 7 2
 2 7 1 2 8 4 1 7 3 3 8 7 7 9 2 2 4 1 5 9 8 9 2 5 0 6 4 2 9 1 9 5 7 7 2 1 2
 0 8 1 7 7 9 1 8 1 3 0 3 0 1 9 9 4 1 8 2 1 2 9 7 5 9 2 6 4 1 5 4 2 9 2 0 4
 0 0 2 8 6 7 1 2 9 0 2 7 4 3 3 0 0 5 1 9 6 5 3 5 1 7 9 3 0 4 2 0 7 1 1 2 1
 5 3 3 9 7 8 6 3 4 1 3 8 1 0 5 1 7 1 5 0 6 1 8 5 1 7 9 4 6 7 2 5 0 6 5 6 3


## PATE Analysis

In [0]:

data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=PATE_preds, indices=np.array(list(range(1000))), noise_eps=epsilon, delta=1e-5)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)
'''

PATE_indices = np.array(list(range(1024)))

total_data_dep_eps = 0
total_data_ind_eps = 0
for i in range(int((9000-1)/1024)):
  print(i/1024*100, "%")
  data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=PATE_preds[:,i*1024:(i+1)*1024], indices=PATE_indices, noise_eps=epsilon, delta=1e-5)
  total_data_dep_eps += data_dep_eps
  total_data_ind_eps += data_ind_eps
  
print("Data Independent Epsilon:", total_data_ind_eps)
print("Data Dependent Epsilon:", total_data_dep_eps)
'''

0.0 %
0.09765625 %


## Training the Student

In [0]:
data = torch.FloatTensor(test_data.data.clone().detach().numpy()[:student_train_size])
targets = torch.tensor(labels)   

test_data.targets[:student_train_size] = targets


student_train_data = Subset(test_data, list(range(student_train_size)))
student_train_loader = torch.utils.data.DataLoader(student_train_data, batch_size=batch_size, num_workers=num_workers)

In [22]:
print(targets)

tensor([7, 2, 1,  ..., 6, 9, 0])


In [23]:
def train_student(model, train_loader, test_loader, criterion, optimizer, epochs=10):
    
    model.to(device)
    running_loss = 0
    steps = 0
    
    for e in range(epochs):
        
        model.train()
        
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            steps += 1
            
            # 1) erase previous gradients (if they exist)
            optimizer.zero_grad()

            # 2) make a prediction
            pred = model.forward(images)

            # 3) calculate how much we missed
            loss = criterion(pred, labels)

            # 4) figure out which weights caused us to miss
            loss.backward()

            # 5) change those weights
            optimizer.step()

            # 6) log our progress
            running_loss += loss.item()
            
            if steps % 50 == 0:
                test_loss = 0
                accuracy = 0
                model.eval()
                with torch.no_grad():
                    for images, labels in test_loader:
                        images, labels = images.to(device), labels.to(device)
                        log_ps = model(images)
                        test_loss += criterion(log_ps, labels).item()

                        # Accuracy
                        ps = torch.exp(log_ps)
                        top_p, top_class = ps.topk(1, dim=1)
                        equals = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equals.type(torch.FloatTensor))
                model.train()
                print("Epoch: {}/{}.. ".format(e+1, epochs),
                      "Training Loss: {:.3f}.. ".format(running_loss/len(train_loader)),
                      "Test Loss: {:.3f}.. ".format(test_loss/len(test_loader)),
                      "Test Accuracy: {:.3f}".format(accuracy/len(test_loader)))
                running_loss = 0
             
            


student_model = Net()
criterion = nn.NLLLoss()
optimizer = optim.Adam(student_model.parameters(), lr=0.001)
epochs = 10
train_student(student_model, student_train_loader, student_test_loader, criterion, optimizer, epochs)



Epoch: 1/10..  Training Loss: 0.717..  Test Loss: 1.178..  Test Accuracy: 0.703
Epoch: 1/10..  Training Loss: 0.327..  Test Loss: 0.547..  Test Accuracy: 0.826
Epoch: 2/10..  Training Loss: 0.192..  Test Loss: 0.386..  Test Accuracy: 0.886
Epoch: 2/10..  Training Loss: 0.197..  Test Loss: 0.378..  Test Accuracy: 0.890
Epoch: 2/10..  Training Loss: 0.136..  Test Loss: 0.350..  Test Accuracy: 0.894
Epoch: 3/10..  Training Loss: 0.126..  Test Loss: 0.327..  Test Accuracy: 0.906
Epoch: 3/10..  Training Loss: 0.158..  Test Loss: 0.348..  Test Accuracy: 0.911
Epoch: 3/10..  Training Loss: 0.112..  Test Loss: 0.314..  Test Accuracy: 0.910
Epoch: 4/10..  Training Loss: 0.111..  Test Loss: 0.313..  Test Accuracy: 0.913
Epoch: 4/10..  Training Loss: 0.139..  Test Loss: 0.327..  Test Accuracy: 0.905
Epoch: 4/10..  Training Loss: 0.094..  Test Loss: 0.320..  Test Accuracy: 0.912
Epoch: 5/10..  Training Loss: 0.109..  Test Loss: 0.290..  Test Accuracy: 0.918
Epoch: 5/10..  Training Loss: 0.125..  T

KeyboardInterrupt: ignored

## Testing the Student model

In [0]:
test_loss = 0
accuracy = 0
student_model.eval()
with torch.no_grad():
    for images, labels in student_test_loader:
        images, labels = images.to(device), labels.to(device)
        log_ps = student_model(images)
        test_loss += criterion(log_ps, labels).item()

        # Accuracy
        ps = torch.exp(log_ps)
        top_p, top_class = ps.topk(1, dim=1)
        equals = top_class == labels.view(*top_class.shape)
        accuracy += torch.mean(equals.type(torch.FloatTensor))
student_model.train()
print("Test Loss: {:.3f}.. ".format(test_loss/len(student_test_loader)),
      "Test Accuracy: {:.3f}".format(accuracy/len(student_test_loader)))
running_loss = 0
