# PATE application on Deep Learning (MNIST data set)

## Load Data

In [1]:
# import libraries
import torch
import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler # for validation

# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 20
# percentage of training set to use as validation
valid_size = 0.2

# transforms
train_transforms = transforms.Compose([#transforms.RandomRotation(30),
                                       # transforms.RandomResizedCrop(224),
                                       # transforms.RandomHorizontalFlip(),
                                       transforms.ToTensor(),
                                       transforms.Normalize([0.5,], [0.5,])]) # mean, std
 

test_transforms = transforms.Compose([#transforms.Resize(255),
                                      #transforms.CenterCrop(224),
                                      transforms.ToTensor(),
                                      transforms.Normalize([0.5,], [0.5,])]) # mean, std


# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True,
                                   download=True, transform=train_transforms)
test_data = datasets.MNIST(root='data', train=False,
                                  download=True, transform=test_transforms)



In [2]:
# helper function for prepare_teachers()
# return dataloader for each teacher
def partition_teachers(train_data, partition_indices, batch_size, num_workers=0):
    num_train = len(partition_indices)
    indices = list(range(num_train))
    split = int(np.floor(valid_size * num_train))
    train_idx, valid_idx = indices[split:], indices[:split]

    # define samplers for obtaining training and validation batches
    train_sampler = SubsetRandomSampler(train_idx)
    valid_sampler = SubsetRandomSampler(valid_idx)

    # prepare data loaders
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
        sampler=train_sampler, num_workers=num_workers) # for validation: sampler = train_sampler
    valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
        sampler=valid_sampler, num_workers=num_workers) # for validation: sampler = train_sampler
    return train_loader, valid_loader
    
# create dataloaders
# param train_data: dataset object for train & validation set
# param test_data: dataset object for test set
# param batch_size: batch size
# param num_teachers: number of teachers
# reutrn dataloader array for teachers, dataloader array for valid set, dataloader for one test set
def prepare_teachers(train_data, test_data, batch_size, num_teachers, num_workers=0):
    # obtain training indices that will be used for validation
    length = len(train_data)
    length_per_teachers = int(length / num_teachers)
    train_loaders = []
    valid_loaders = []
    
    indices = list(range(length))
    np.random.shuffle(indices)
    
    for i in range(num_teachers):
        start = i * length_per_teachers
        end = (i+1) * length_per_teachers
        train_loader, valid_loader = partition_teachers(train_data, indices[start:end], batch_size)
        train_loaders.append(train_loader)
        valid_loaders.append(valid_loader)
    
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
        num_workers=num_workers)
    
    return train_loaders, valid_loaders

## Define Network - CONV 2 layers, MLP 2 layers

In [4]:
import torch.nn as nn
import torch.nn.functional as F

# define the CNN architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
       # convolutional layer (sees 28x28x1 image tensor)
        self.conv1 = nn.Conv2d(1, 4, 3, padding=1)
        # convolutional layer (sees 14x14x4 tensor after MaxPool)
        self.conv2 = nn.Conv2d(4, 16, 3, padding=1)
        # max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        # linear layer (7 * 7 * 16)
        self.fc1 = nn.Linear(7 * 7 * 16, 512)
        # linear layer (512 -> 10)
        self.fc2 = nn.Linear(512, 10)
        # dropout layer (p=0.20)
        self.dropout = nn.Dropout(0.20)
        
        
    def forward(self, x):
        # add sequence of convolutional and max pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        # flatten image input
        x = x.view(-1, 7 * 7 * 16)
        # add dropout layer
        x = self.dropout(x)
        # add 1st hidden layer, with relu activation function
        x = F.relu(self.fc1(x))
        # add dropout layer
        x = self.dropout(x)
        # add 2nd hidden layer, with relu activation function
        x = self.fc2(x)
        # LogSoftMax
        return F.log_softmax(x, dim=1)

In [5]:
from torch import optim

def train(train_loader, valid_loader, filename):
    # create a complete CNN
    model = Net()
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.003)

    train_on_gpu = torch.cuda.is_available()

    if train_on_gpu:
        print('CUDA is available!  Training on GPU ...')
        model.cuda()

    valid_loss_min = np.Inf # track change in validation loss
    epochs = 5

    train_losses, valid_losses = [], []
    for epoch in range(epochs):

        train_loss = 0.0
        valid_loss = 0.0

        model.train()

        for images, labels in train_loader:

            if train_on_gpu:
                images, labels = images.cuda(), labels.cuda()

            optimizer.zero_grad()

            log_ps = model(images)
            loss = criterion(log_ps, labels)
            loss.backward()
            optimizer.step()

            train_loss += loss.item()

        model.eval()
        with torch.no_grad():
            for images, labels in valid_loader:
                # move tensors to GPU if CUDA is available
                if train_on_gpu:
                    images, labels = images.cuda(), labels.cuda()

                # forward pass: compute predicted outputs by passing inputs to the model
                output = model(images)
                # calculate the batch loss
                loss = criterion(output, labels)
                # update average validation loss 
                valid_loss += loss.item()

        # calculate average losses
        train_loss = train_loss/len(train_loader.sampler)
        valid_loss = valid_loss/len(valid_loader.sampler)

        train_losses.append(train_loss)
        valid_losses.append(valid_loss)

        # print training/validation statistics 
        print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
            epoch, train_loss, valid_loss))
        
            # save model if validation loss has decreased
        if valid_loss <= valid_loss_min:
            print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
            valid_loss_min,
            valid_loss))
            torch.save(model.state_dict(), filename)
            valid_loss_min = valid_loss    

In [6]:
num_teachers = 100
batch_size = 20

train_loaders, valid_loaders = prepare_teachers(train_data, test_data, batch_size, num_teachers)


for i in range(num_teachers):
    filename = 'teacher' + str(i)
    train(train_loaders[i], valid_loaders[i], filename)


Epoch: 0 	Training Loss: 0.020020 	Validation Loss: 0.008309
Validation loss decreased (inf --> 0.008309).  Saving model ...
Epoch: 1 	Training Loss: 0.007747 	Validation Loss: 0.006081
Validation loss decreased (0.008309 --> 0.006081).  Saving model ...
Epoch: 2 	Training Loss: 0.005485 	Validation Loss: 0.005231
Validation loss decreased (0.006081 --> 0.005231).  Saving model ...
Epoch: 3 	Training Loss: 0.004490 	Validation Loss: 0.006396
Epoch: 4 	Training Loss: 0.003621 	Validation Loss: 0.004928
Validation loss decreased (0.005231 --> 0.004928).  Saving model ...
Epoch: 0 	Training Loss: 0.019387 	Validation Loss: 0.008474
Validation loss decreased (inf --> 0.008474).  Saving model ...
Epoch: 1 	Training Loss: 0.006946 	Validation Loss: 0.005447
Validation loss decreased (0.008474 --> 0.005447).  Saving model ...
Epoch: 2 	Training Loss: 0.005052 	Validation Loss: 0.005190
Validation loss decreased (0.005447 --> 0.005190).  Saving model ...
Epoch: 3 	Training Loss: 0.004044 	Vali

## Aggregation

In [108]:
def predict(model, criterion, optimizer, test_loader):
    
    
    pred_results = np.array([])
    accuracy = 0
    test_loss = 0
    
    with torch.no_grad(): 
        model.eval()       
        for image,label in test_loader:
            #image = image.view(image.shape[0],-1)
            output = model(image)
            loss = criterion(output,label)
            ps = torch.exp(output) # logSoftMax -> probability
            top_p,top_class = ps.topk(1,dim=1) #find the class that the model predicted
            equals = top_class == label.view(*top_class.shape)
            accuracy += torch.mean(equals.type(torch.FloatTensor))
            pred_results = np.concatenate( (pred_results, top_class.view(1,-1).numpy()), axis=None)
                # make a row of array
            test_loss+=loss.item()
        #print(f'Accuracy: {accuracy/len(test_loader)}')
        #print(f"Loss:{test_loss/len(test_loader)}")
        
        return pred_results
        

    
def get_pred_teachers(num_teachers, test_data, batch_size, num_workers=0 ):
    model = Net()
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.003)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
                num_workers=num_workers)
    result = np.zeros( (num_teachers, len(test_data)) )
    for i in range(num_teachers):
        filename = 'teacher'+str(i)
        model.load_state_dict(torch.load(filename))
        result[i] = predict(model, criterion, optimizer, test_loader)
    return result

preds = get_pred_teachers(5, test_data, 20)

In [104]:
def aggregation(preds, num_labels, epsilon = 0.1):

    preds = preds.transpose(1,0)
    
    new_labels = []
    
    for an_image in preds:
        an_image = an_image.astype(int)

        label_counts = np.bincount(an_image, minlength=num_labels)

        beta = 1 / epsilon

        for i in range(len(label_counts)):
            label_counts[i] += np.random.laplace(0, beta, 1)

        new_label = np.argmax(label_counts)

        new_labels.append(new_label)    
    
    return new_labels

new_labels_small_epsilon = aggregation(preds , 10, 0.1)
print(new_labels_small_epsilon[:10])
new_labels_large_epsilon = aggregation(preds , 10, 10)

print(new_labels_large_epsilon[:10])

[2, 8, 0, 3, 0, 3, 0, 3, 0, 6]
[7, 2, 1, 0, 4, 1, 4, 9, 5, 9]


In [105]:
true_labels = test_data.targets.clone()
print(true_labels[:10])

tensor([7, 2, 1, 0, 4, 1, 4, 9, 5, 9])


In [92]:
def train_student(test_data, student_labels, batch_size):

    test_data.targets = torch.tensor(student_labels)
    student_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
                                                 num_workers=0)
    model = Net()
    criterion = nn.NLLLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.003)

    
    epochs = 5
    steps = 0

    
    for e in range(epochs):
        for images, labels in student_loader:
            # images = images.to(device)
            # labels = labels.to(device)
            #images = images.view(images.shape[0], -1)

            optimizer.zero_grad()

            log_ps = model(images)
            loss = criterion(log_ps, labels)
            loss.backward()
            optimizer.step()

    return predict(model, criterion, optimizer, student_loader)

student_pred_small_epsilon = train_student(test_data, new_labels_small_epsilon, 20)
print(student_pred_small_epsilon)
student_pred_large_epsilon = train_student(test_data, new_labels_large_epsilon, 20)
print(student_pred_large_epsilon)

Accuracy: 0.1697000116109848
Loss:2.2592882080078125
[2. 2. 1. ... 4. 5. 5.]
Accuracy: 0.9807003140449524
Loss:0.05552749804426548
[7. 2. 1. ... 4. 5. 6.]


In [93]:
from syft.frameworks.torch.differential_privacy import pate

W0630 15:17:05.829890 140009956505408 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/home/sijoonlee/anaconda3/envs/pysyft/lib/python3.7/site-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0630 15:17:05.845176 140009956505408 deprecation_wrapper.py:119] From /home/sijoonlee/anaconda3/envs/pysyft/lib/python3.7/site-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



In [113]:
teacher_preds = preds
indices = true_labels

#data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=teacher_preds, indices=indices, noise_eps=0.1, delta=1e-5)

#assert data_dep_eps < data_ind_eps

data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=teacher_preds, indices=indices, noise_eps=0.1, delta=1e-5, moments=20)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 411.5129254649703
Data Dependent Epsilon: 411.51292546502725
