# Setting Up Enviroment. 

In [28]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import numpy as np
import matplotlib.pyplot as plt

import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from sklearn.model_selection import KFold
import utils, aggregation

## Load MNIST Data

In [2]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,)),
                              ])

# Download and load the training data`b
teacher_data = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=True, transform=transform)
student_data = datasets.MNIST('~/.pytorch/MNIST_data/', download=True, train=False, transform=transform)

Divide the public data to train/test sets

In [3]:
kf = KFold(n_splits=2)

In [4]:
student_train = None
student_test = None
for i, (train_index, test_index) in enumerate(kf.split(student_data)):
    student_train = torch.utils.data.Subset(student_data, train_index)
    student_test = torch.utils.data.Subset(student_data, test_index)

## Hyper Parameters

In [5]:
num_teachers = 250
num_labels = 10
num_examples = 2048
batch_size = 64

For training

In [6]:
epochs = 15
steps = 0
running_loss = 0
print_every = 5

## CUDA device

In [7]:
# Use GPU if it's available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

# Training Teachers

Teachers

In [10]:
class Teacher(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)
        
        return x
    
classifiers = [None for _ in range(num_teachers)]
for i in range(num_teachers):
    classifiers[i] = Teacher()

Training teachers

In [11]:
kf = KFold(n_splits=num_teachers)

In [None]:
for t_i, (_, data_index) in enumerate(kf.split(teacher_data)):
    print ("Training model: {}".format(t_i))
    classifier = classifiers[t_i]
    
    data = torch.utils.data.Subset(teacher_data, data_index)
    
    kf_train = KFold(n_splits=2)
    for i, (train_index, test_index) in enumerate(kf.split(data)):
        train = torch.utils.data.Subset(data, train_index)
        test = torch.utils.data.Subset(data, test_index)
        
        trainloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
        testloader = torch.utils.data.DataLoader(test, batch_size=batch_size)

        criterion = nn.NLLLoss()

        # Only train the classifier parameters, feature parameters are frozen
        optimizer = optim.Adam(classifier.parameters(), lr=0.003)

        classifier.to(device);

        for epoch in range(epochs):
            for inputs, labels in trainloader:
                steps += 1
                # Move input and label tensors to the default device
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()

                logps = classifier.forward(inputs)
                loss = criterion(logps, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()

            if (epoch+1) % print_every == 0:
                test_loss = 0
                accuracy = 0
                classifier.eval()
                with torch.no_grad():
                    for inputs, labels in testloader:
                        inputs, labels = inputs.to(device), labels.to(device)
                        logps = classifier.forward(inputs)
                        batch_loss = criterion(logps, labels)

                        test_loss += batch_loss.item()

                        # Calculate accuracy
                        ps = torch.exp(logps)
                        top_p, top_class = ps.topk(1, dim=1)
                        equals = top_class == labels.view(*top_class.shape)
                        accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

                print(f"Epoch {epoch+1}/{epochs}.. "
                      f"Train loss: {running_loss/print_every:.3f}.. "
                      f"Test loss: {test_loss/len(testloader):.3f}.. "
                      f"Test accuracy: {accuracy/len(testloader):.3f}")
                running_loss = 0
                classifier.train()
        break

# Saving/Loading

Save model's parameters

In [None]:
all_points = []

for i in range(len(classifiers)):
    all_points += [classifiers[i].state_dict()]

checkpoint = {'classifiers_list': all_points,}
torch.save(checkpoint, 'checkpoints/teachers_checkpoint.pth') ###############################################	Save

Load model's parameters

In [11]:
classifiers = [None for _ in range(num_teachers)]
for i in range(num_teachers):
    classifiers[i] = Teacher()

checkpoint = torch.load('checkpoints/teachers_checkpoint.pth')
classifiers_list = checkpoint['classifiers_list']
for i in range(len(classifiers_list)):
    classifiers[i].load_state_dict(classifiers_list[i])

# PATE Analysis

In [12]:
teacher_preds = np.zeros((num_teachers, num_examples)).astype(int)
truths = np.zeros((num_examples)).astype(int)

In [13]:
student_train_loader = torch.utils.data.DataLoader(student_train, batch_size=batch_size)
student_test_loader = torch.utils.data.DataLoader(student_test, batch_size=batch_size)

Get teachers' predictions for 2048 samples in student_train

In [51]:
for i in range(num_teachers):
    classifier = classifiers[i]
    criterion = nn.NLLLoss()
    classifier.to(device)
    
    test_loss = 0
    accuracy = 0
    classifier.eval()
    with torch.no_grad():
        preds_l1 = 0
        for inputs, labels in student_train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            logps = classifier.forward(inputs)
            batch_loss = criterion(logps, labels)

            test_loss += batch_loss.item()

            # Calculate accuracy
            ps = torch.exp(logps)
            top_p, top_class = ps.topk(1, dim=1)
            
            teacher_preds[i,preds_l1:(preds_l1 + batch_size)] = top_class.cpu().numpy().squeeze(1).astype(int)
            truths[preds_l1:(preds_l1 + batch_size)] = labels.cpu().numpy()
            if (preds_l1 + batch_size) >= num_examples:
                break
            preds_l1 += batch_size
            
    classifier.train()

Compute most votes

In [52]:
indices = utils.cal_max(teacher_preds, num_labels)

Analysis

In [53]:
from syft.frameworks.torch.differential_privacy import pate

data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=teacher_preds, indices=indices, noise_eps=0.3, delta=1e-6, moments=140)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 751.0955105579642
Data Dependent Epsilon: 0.7707510458439465


#  Training student model

Choose privacy loss level same as noise_eps in the Analysis

In [57]:
privacy_loss_lv = 0.3

Apply Report Noisy Max algorithm

In [60]:
top_class = utils.noisy_max(teacher_preds, privacy_loss_lv, num_labels)
equals = top_class == truths
accuracy = np.mean(equals)
accuracy

0.875

Student

In [61]:
class Student(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)
        
        return x
    
student_model = Student()

Training student

In [62]:
print_every = 100

criterion = nn.NLLLoss()

# Only train the classifier parameters, feature parameters are frozen
optimizer = optim.Adam(student_model.parameters(), lr=0.003)
student_model.to(device);

for epoch in range(epochs):
    steps = 0
    preds_l1 = 0
    for inputs, true_labels in student_train_loader:
        # Move input and label tensors to the default device
        inputs = inputs.to(device)
        
        # Return Report Noisy Max outputs
        labels = torch.tensor(utils.noisy_max(teacher_preds=teacher_preds[:,steps*batch_size:(steps+1)*batch_size]
                                              , privacy_loss_lv=privacy_loss_lv, n_labels=num_labels)).long()
        
        labels = labels.to(device)
        
        if steps % print_every != 0:
            optimizer.zero_grad()

            logps = student_model.forward(inputs)
            loss = criterion(logps, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()

        else:
            print(f"Epoch {epoch+1}/{epochs}.. "
                  f"Train loss: {running_loss/print_every:.3f}.. ")
            running_loss = 0
            
        if (preds_l1 + batch_size) >= num_examples:
            break
        preds_l1 += batch_size
            
        steps += 1

Epoch 1/15.. Train loss: 0.000.. 
Epoch 2/15.. Train loss: 0.451.. 
Epoch 3/15.. Train loss: 0.137.. 
Epoch 4/15.. Train loss: 0.082.. 
Epoch 5/15.. Train loss: 0.075.. 
Epoch 6/15.. Train loss: 0.052.. 
Epoch 7/15.. Train loss: 0.036.. 
Epoch 8/15.. Train loss: 0.050.. 
Epoch 9/15.. Train loss: 0.051.. 
Epoch 10/15.. Train loss: 0.058.. 
Epoch 11/15.. Train loss: 0.038.. 
Epoch 12/15.. Train loss: 0.014.. 
Epoch 13/15.. Train loss: 0.013.. 
Epoch 14/15.. Train loss: 0.012.. 
Epoch 15/15.. Train loss: 0.016.. 


Accuracy on the test set

In [63]:
criterion = nn.NLLLoss()
student_model.to(device);

test_loss = 0
accuracy = 0
student_model.eval()
with torch.no_grad():
    for inputs, labels in student_test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        logps = student_model.forward(inputs)
        batch_loss = criterion(logps, labels)

        test_loss += batch_loss.item()

        # Calculate accuracy
        ps = torch.exp(logps)
        top_p, top_class = ps.topk(1, dim=1)
        equals = top_class == labels.view(*top_class.shape)
        accuracy += torch.mean(equals.type(torch.FloatTensor)).item()

print(f"Test loss: {test_loss/len(student_test_loader):.3f}.. "
      f"Test accuracy: {accuracy/len(student_test_loader):.3f}")

Test loss: 0.757.. Test accuracy: 0.888
