# Setting Up Enviroment. 

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from sklearn.model_selection import KFold
import utils

## Load MNIST Data

In [2]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,)),
                              ])

# Download and load the training data`b
teacher_data = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
student_data = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)

Divide the public data to train/test sets

In [3]:
kf = KFold(n_splits=2)

In [4]:
student_train = None
student_test = None
for i, (train_index, test_index) in enumerate(kf.split(student_data)):
    student_train = torch.utils.data.Subset(student_data, train_index)
    student_test = torch.utils.data.Subset(student_data, test_index)

## Hyper Parameters

In [5]:
num_teachers = 200
num_labels = 10
num_examples = 2048
batch_size = 2048

For training

In [6]:
epochs = 15
steps = 0
running_loss = 0
print_every = 5

## CUDA device

In [7]:
# Use GPU if it's available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Training Teachers

Teachers

In [6]:
class Teacher(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)
        
        return x
    
classifiers = [None for _ in range(num_teachers)]
for i in range(num_teachers):
    classifiers[i] = Teacher()

Training teachers

In [9]:
kf = KFold(n_splits=num_teachers)

In [10]:
for t_i, (_, data_index) in enumerate(kf.split(teacher_data)):
    print ("Training model: {}".format(t_i))
    classifier = classifiers[t_i]
    
    data = torch.utils.data.Subset(teacher_data, data_index)
    
    kf_train = KFold(n_splits=2)
    for i, (train_index, test_index) in enumerate(kf_train.split(data)):
        train = torch.utils.data.Subset(data, train_index)
        test = torch.utils.data.Subset(data, test_index)
        
        trainloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
        testloader = torch.utils.data.DataLoader(test, batch_size=batch_size)

        criterion = nn.NLLLoss()

        # Only train the classifier parameters, feature parameters are frozen
        optimizer = optim.Adam(classifier.parameters(), lr=0.003)

        classifier.to(device);

        for epoch in range(epochs):
            for inputs, labels in trainloader:
                steps += 1
                # Move input and label tensors to the default device
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                logps = classifier.forward(inputs)
                loss = criterion(logps, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

            if (epoch+1) % print_every == 0:
                test_loss = 0
                accuracy = 0
                classifier.eval()
                with torch.no_grad():
                    for inputs, labels in testloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        logps = classifier.forward(inputs)
                        batch_loss = criterion(logps, labels)

                        test_loss += batch_loss.item()

                        # Calculate accuracy
                        ps = torch.exp(logps)
                        top_p, top_class = ps.topk(1, dim=1)
                        equals = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

                print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {running_loss/print_every:.3f}.. "
                      f"Test loss: {test_loss/len(testloader):.3f}.. "
                      f"Test accuracy: {accuracy/len(testloader):.3f}")
                running_loss = 0
                classifier.train()
        break

Training model: 0
Epoch 5/30.. Train loss: 2.203.. Test loss: 2.020.. Test accuracy: 0.533
Epoch 10/30.. Train loss: 1.613.. Test loss: 1.339.. Test accuracy: 0.667
Epoch 15/30.. Train loss: 0.846.. Test loss: 0.854.. Test accuracy: 0.760
Epoch 20/30.. Train loss: 0.361.. Test loss: 0.760.. Test accuracy: 0.800
Epoch 25/30.. Train loss: 0.117.. Test loss: 0.849.. Test accuracy: 0.813
Epoch 30/30.. Train loss: 0.029.. Test loss: 0.993.. Test accuracy: 0.820
Training model: 1
Epoch 5/30.. Train loss: 2.168.. Test loss: 2.032.. Test accuracy: 0.247
Epoch 10/30.. Train loss: 1.638.. Test loss: 1.566.. Test accuracy: 0.553
Epoch 15/30.. Train loss: 0.998.. Test loss: 0.981.. Test accuracy: 0.713
Epoch 20/30.. Train loss: 0.494.. Test loss: 0.772.. Test accuracy: 0.753
Epoch 25/30.. Train loss: 0.203.. Test loss: 0.786.. Test accuracy: 0.760
Epoch 30/30.. Train loss: 0.057.. Test loss: 0.843.. Test accuracy: 0.760
Training model: 2
Epoch 5/30.. Train loss: 2.202.. Test loss: 2.051.. Test acc

Epoch 15/30.. Train loss: 0.832.. Test loss: 0.796.. Test accuracy: 0.667
Epoch 20/30.. Train loss: 0.454.. Test loss: 0.623.. Test accuracy: 0.787
Epoch 25/30.. Train loss: 0.229.. Test loss: 0.610.. Test accuracy: 0.800
Epoch 30/30.. Train loss: 0.100.. Test loss: 0.594.. Test accuracy: 0.847
Training model: 36
Epoch 5/30.. Train loss: 2.143.. Test loss: 1.964.. Test accuracy: 0.253
Epoch 10/30.. Train loss: 1.491.. Test loss: 1.309.. Test accuracy: 0.620
Epoch 15/30.. Train loss: 0.804.. Test loss: 0.864.. Test accuracy: 0.747
Epoch 20/30.. Train loss: 0.397.. Test loss: 0.745.. Test accuracy: 0.793
Epoch 25/30.. Train loss: 0.168.. Test loss: 0.752.. Test accuracy: 0.760
Epoch 30/30.. Train loss: 0.059.. Test loss: 0.897.. Test accuracy: 0.780
Training model: 37
Epoch 5/30.. Train loss: 2.170.. Test loss: 1.990.. Test accuracy: 0.520
Epoch 10/30.. Train loss: 1.582.. Test loss: 1.428.. Test accuracy: 0.513
Epoch 15/30.. Train loss: 0.841.. Test loss: 1.022.. Test accuracy: 0.667
Ep

Epoch 25/30.. Train loss: 0.090.. Test loss: 1.024.. Test accuracy: 0.793
Epoch 30/30.. Train loss: 0.032.. Test loss: 1.094.. Test accuracy: 0.773
Training model: 71
Epoch 5/30.. Train loss: 2.106.. Test loss: 1.822.. Test accuracy: 0.433
Epoch 10/30.. Train loss: 1.296.. Test loss: 1.038.. Test accuracy: 0.653
Epoch 15/30.. Train loss: 0.629.. Test loss: 0.736.. Test accuracy: 0.747
Epoch 20/30.. Train loss: 0.281.. Test loss: 0.579.. Test accuracy: 0.827
Epoch 25/30.. Train loss: 0.113.. Test loss: 0.686.. Test accuracy: 0.800
Epoch 30/30.. Train loss: 0.041.. Test loss: 0.712.. Test accuracy: 0.820
Training model: 72
Epoch 5/30.. Train loss: 2.104.. Test loss: 1.968.. Test accuracy: 0.433
Epoch 10/30.. Train loss: 1.287.. Test loss: 1.210.. Test accuracy: 0.620
Epoch 15/30.. Train loss: 0.553.. Test loss: 0.870.. Test accuracy: 0.720
Epoch 20/30.. Train loss: 0.203.. Test loss: 0.827.. Test accuracy: 0.753
Epoch 25/30.. Train loss: 0.065.. Test loss: 0.900.. Test accuracy: 0.767
Ep

Epoch 5/30.. Train loss: 2.143.. Test loss: 1.905.. Test accuracy: 0.387
Epoch 10/30.. Train loss: 1.502.. Test loss: 1.314.. Test accuracy: 0.600
Epoch 15/30.. Train loss: 0.745.. Test loss: 0.985.. Test accuracy: 0.693
Epoch 20/30.. Train loss: 0.318.. Test loss: 1.064.. Test accuracy: 0.713
Epoch 25/30.. Train loss: 0.122.. Test loss: 1.283.. Test accuracy: 0.733
Epoch 30/30.. Train loss: 0.035.. Test loss: 1.429.. Test accuracy: 0.713
Training model: 107
Epoch 5/30.. Train loss: 2.200.. Test loss: 2.055.. Test accuracy: 0.327
Epoch 10/30.. Train loss: 1.610.. Test loss: 1.427.. Test accuracy: 0.547
Epoch 15/30.. Train loss: 0.913.. Test loss: 1.049.. Test accuracy: 0.633
Epoch 20/30.. Train loss: 0.465.. Test loss: 0.875.. Test accuracy: 0.760
Epoch 25/30.. Train loss: 0.201.. Test loss: 0.862.. Test accuracy: 0.753
Epoch 30/30.. Train loss: 0.069.. Test loss: 1.020.. Test accuracy: 0.753
Training model: 108
Epoch 5/30.. Train loss: 2.156.. Test loss: 2.058.. Test accuracy: 0.313
E

Epoch 15/30.. Train loss: 0.893.. Test loss: 1.083.. Test accuracy: 0.700
Epoch 20/30.. Train loss: 0.367.. Test loss: 1.193.. Test accuracy: 0.693
Epoch 25/30.. Train loss: 0.136.. Test loss: 1.329.. Test accuracy: 0.713
Epoch 30/30.. Train loss: 0.047.. Test loss: 1.533.. Test accuracy: 0.707
Training model: 142
Epoch 5/30.. Train loss: 2.113.. Test loss: 1.916.. Test accuracy: 0.347
Epoch 10/30.. Train loss: 1.446.. Test loss: 1.428.. Test accuracy: 0.467
Epoch 15/30.. Train loss: 0.799.. Test loss: 1.056.. Test accuracy: 0.647
Epoch 20/30.. Train loss: 0.441.. Test loss: 1.017.. Test accuracy: 0.667
Epoch 25/30.. Train loss: 0.223.. Test loss: 0.962.. Test accuracy: 0.760
Epoch 30/30.. Train loss: 0.095.. Test loss: 1.022.. Test accuracy: 0.747
Training model: 143
Epoch 5/30.. Train loss: 2.185.. Test loss: 2.008.. Test accuracy: 0.287
Epoch 10/30.. Train loss: 1.561.. Test loss: 1.475.. Test accuracy: 0.447
Epoch 15/30.. Train loss: 0.871.. Test loss: 1.163.. Test accuracy: 0.660


Epoch 25/30.. Train loss: 0.198.. Test loss: 1.461.. Test accuracy: 0.640
Epoch 30/30.. Train loss: 0.077.. Test loss: 1.732.. Test accuracy: 0.627
Training model: 177
Epoch 5/30.. Train loss: 2.154.. Test loss: 2.105.. Test accuracy: 0.293
Epoch 10/30.. Train loss: 1.487.. Test loss: 1.575.. Test accuracy: 0.467
Epoch 15/30.. Train loss: 0.811.. Test loss: 1.272.. Test accuracy: 0.640
Epoch 20/30.. Train loss: 0.365.. Test loss: 1.248.. Test accuracy: 0.727
Epoch 25/30.. Train loss: 0.147.. Test loss: 1.291.. Test accuracy: 0.713
Epoch 30/30.. Train loss: 0.048.. Test loss: 1.357.. Test accuracy: 0.740
Training model: 178
Epoch 5/30.. Train loss: 2.169.. Test loss: 1.964.. Test accuracy: 0.293
Epoch 10/30.. Train loss: 1.574.. Test loss: 1.318.. Test accuracy: 0.620
Epoch 15/30.. Train loss: 0.859.. Test loss: 0.833.. Test accuracy: 0.733
Epoch 20/30.. Train loss: 0.402.. Test loss: 0.722.. Test accuracy: 0.740
Epoch 25/30.. Train loss: 0.175.. Test loss: 0.743.. Test accuracy: 0.780


# Saving/Loading

Save model's parameters

In [11]:
all_points = []

for i in range(len(classifiers)):
    all_points += [classifiers[i].state_dict()]

checkpoint = {'classifiers_list': all_points,}
torch.save(checkpoint, 'checkpoints/teachers_checkpoint.pth') ###############################################	Save

Load model's parameters

In [8]:
class Teacher(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)
        
        return x

classifiers = [None for _ in range(num_teachers)]
for i in range(num_teachers):
    classifiers[i] = Teacher()

checkpoint = torch.load('checkpoints/teachers_checkpoint.pth')
classifiers_list = checkpoint['classifiers_list']
for i in range(len(classifiers_list)):
    classifiers[i].load_state_dict(classifiers_list[i])

In [9]:
batch_size = 64

# PATE Analysis

In [10]:
teacher_preds = np.zeros((num_teachers, num_examples)).astype(int)
truths = np.zeros((num_examples)).astype(int)

In [11]:
student_train_loader = torch.utils.data.DataLoader(student_train, batch_size=batch_size)
student_test_loader = torch.utils.data.DataLoader(student_test, batch_size=batch_size)

Get teachers' predictions for 2048 samples in student_train

In [12]:
for i in range(num_teachers):
    classifier = classifiers[i]
    criterion = nn.NLLLoss()
    classifier.to(device)
    
    test_loss = 0
    accuracy = 0
    classifier.eval()
    with torch.no_grad():
        preds_l1 = 0
        for inputs, labels in student_train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            logps = classifier.forward(inputs)
            batch_loss = criterion(logps, labels)

            test_loss += batch_loss.item()

            # Calculate accuracy
            ps = torch.exp(logps)
            top_p, top_class = ps.topk(1, dim=1)
            
            teacher_preds[i,preds_l1:(preds_l1 + batch_size)] = top_class.cpu().numpy().squeeze(1).astype(int)
            truths[preds_l1:(preds_l1 + batch_size)] = labels.cpu().numpy()
            if (preds_l1 + batch_size) >= num_examples:
                break
            preds_l1 += batch_size
            
    classifier.train()

Compute most votes

In [13]:
indices = utils.cal_max(teacher_preds, num_labels)

Analysis

In [14]:
from syft.frameworks.torch.differential_privacy import pate

data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=teacher_preds, indices=indices, noise_eps=2.5, delta=1e-6, moments=140)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 10240.098682218271
Data Dependent Epsilon: 0.8644983575706948


#  Training student model

Choose privacy loss level same as noise_eps in the Analysis

In [15]:
privacy_loss_lv = 2.5

Apply Report Noisy Max algorithm

In [16]:
top_class = utils.noisy_max(teacher_preds, privacy_loss_lv, num_labels)
equals = top_class == truths
accuracy = np.mean(equals)
accuracy

0.8447265625

Student

In [17]:
class Student(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)
        
        return x
    
student_model = Student()

Training student

In [18]:
print_every = 100

criterion = nn.NLLLoss()

# Only train the classifier parameters, feature parameters are frozen
optimizer = optim.Adam(student_model.parameters(), lr=0.003)
student_model.to(device);

for epoch in range(epochs):
    steps = 0
    preds_l1 = 0
    for inputs, true_labels in student_train_loader:
        # Move input and label tensors to the default device
        inputs = inputs.to(device)
        
        # Return Report Noisy Max outputs
        labels = torch.tensor(utils.noisy_max(teacher_preds=teacher_preds[:,steps*batch_size:(steps+1)*batch_size]
                                              , privacy_loss_lv=privacy_loss_lv, n_labels=num_labels)).long()
        
        labels = labels.to(device)
        
        if steps % print_every != 0:
            optimizer.zero_grad()

            logps = student_model.forward(inputs)
            loss = criterion(logps, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        else:
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {running_loss/print_every:.3f}.. ")
            running_loss = 0
            
        if (preds_l1 + batch_size) >= num_examples:
            break
        preds_l1 += batch_size
            
        steps += 1

Epoch 1/15.. Train loss: 0.000.. 
Epoch 2/15.. Train loss: 0.503.. 
Epoch 3/15.. Train loss: 0.168.. 
Epoch 4/15.. Train loss: 0.104.. 
Epoch 5/15.. Train loss: 0.068.. 
Epoch 6/15.. Train loss: 0.056.. 
Epoch 7/15.. Train loss: 0.048.. 
Epoch 8/15.. Train loss: 0.041.. 
Epoch 9/15.. Train loss: 0.049.. 
Epoch 10/15.. Train loss: 0.041.. 
Epoch 11/15.. Train loss: 0.048.. 
Epoch 12/15.. Train loss: 0.052.. 
Epoch 13/15.. Train loss: 0.028.. 
Epoch 14/15.. Train loss: 0.033.. 
Epoch 15/15.. Train loss: 0.042.. 


Accuracy on the test set

In [19]:
criterion = nn.NLLLoss()
student_model.to(device);

test_loss = 0
accuracy = 0
student_model.eval()
with torch.no_grad():
    for inputs, labels in student_test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        logps = student_model.forward(inputs)
        batch_loss = criterion(logps, labels)

        test_loss += batch_loss.item()

        # Calculate accuracy
        ps = torch.exp(logps)
        top_p, top_class = ps.topk(1, dim=1)
        equals = top_class == labels.view(*top_class.shape)
        accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

print(f"Test loss: {test_loss/len(student_test_loader):.3f}.. "
      f"Test accuracy: {accuracy/len(student_test_loader):.3f}")

Test loss: 0.704.. Test accuracy: 0.871
