In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import SubsetRandomSampler

import numpy as np
from syft.frameworks.torch.differential_privacy import pate

In [2]:
# Train 10 models on training data
# Use ten model to label the test data
# Train a model on the test data with label
# Evaluate it

In [3]:
def create_data_loaders (num_training_loaders):

    # Define a transform to normalize the data
    transform=transforms.Compose([transforms.ToTensor(),
                                  transforms.Normalize((0.1307,), (0.3081,))])
    # Download and load the training data
    mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    # Set up the trainloaders list
    trainloaders = []
    data_per_set = int(len(mnist_trainset) / num_training_loaders)
    for i in range(num_training_loaders):
        train_indices = range(i*data_per_set,i*data_per_set+data_per_set)
        trainloaders.append(torch.utils.data.DataLoader(mnist_trainset, 
                                                         batch_size=32, 
                                                         sampler=SubsetRandomSampler(train_indices)))

    # Download and load the test data
    mnist_testset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    # Set up the testloader
    testloader = torch.utils.data.DataLoader(mnist_testset, batch_size=1000, shuffle=False)
    test_targets = mnist_testset.test_labels

    return trainloaders, testloader, test_targets

In [4]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(784, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, 10)
        
    def forward(self, x):
        # make sure input tensor is flattened
        x = x.view(x.shape[0], -1)
        
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.log_softmax(self.fc4(x), dim=1)
        
        return x

In [5]:
def create_models(n) : return [Net() for i in range(n)]

# Train the models

In [14]:
trainloaders, testloader, test_target = create_data_loaders(10)

In [7]:
models = create_models(10)
criterion = nn.NLLLoss()

In [13]:
def train(model,trainloader,epochs=4):
    optimizer = optim.Adam(model.parameters(),lr=1e-2)
    for e in range(epochs):
        running_loss = 0
        for images,labels in trainloader:
            optimizer.zero_grad()
            
            out = model(images)
            loss = criterion(out,labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss
            
        print(f'Epoch : {e} loss : {running_loss/len(trainloader)}')


In [9]:
for i,(model,trainloader)  in enumerate(zip(models,trainloaders)):
    print('-'*20)
    print(f"Model {i}")
    train(model,trainloader)

--------------------
Model 0
Epoch : 0 loss : 0.711211085319519
Epoch : 1 loss : 0.33079639077186584
Epoch : 2 loss : 0.3247036337852478
Epoch : 3 loss : 0.3064037561416626
--------------------
Model 1
Epoch : 0 loss : 0.7082605361938477
Epoch : 1 loss : 0.3686869740486145
Epoch : 2 loss : 0.2889716923236847
Epoch : 3 loss : 0.2659831643104553
--------------------
Model 2
Epoch : 0 loss : 0.773611843585968
Epoch : 1 loss : 0.37768498063087463
Epoch : 2 loss : 0.32594940066337585
Epoch : 3 loss : 0.24650651216506958
--------------------
Model 3
Epoch : 0 loss : 0.6658878326416016
Epoch : 1 loss : 0.33114922046661377
Epoch : 2 loss : 0.2833079397678375
Epoch : 3 loss : 0.2809716761112213
--------------------
Model 4
Epoch : 0 loss : 0.6730855107307434
Epoch : 1 loss : 0.36100196838378906
Epoch : 2 loss : 0.27570703625679016
Epoch : 3 loss : 0.27351415157318115
--------------------
Model 5
Epoch : 0 loss : 0.6354440450668335
Epoch : 1 loss : 0.33787599205970764
Epoch : 2 loss : 0.31433707

# Label test set

In [10]:
def label_data(models,dataloader):
    labels = []
    for model in models:
        l = []
        for img, label in dataloader: 
            out = model(img)
            out = torch.argmax(out,dim=1).tolist()
            l.extend(out)
        labels.append(l)
    labels = np.array(labels)
    return labels

In [11]:
def DP(labels,eps):
    assert labels.shape == (10,10000), "Shape ain't right"
    labels = labels.transpose(1,0) # (10000,10)
    new_labels = []
    epsilon = 0.1
    beta = 1 / eps
    # loop through samples
    for label in labels:
        # count the num of labels
        label_counts = np.bincount(label, minlength=10).astype(np.float64)
        # add noise
        label_counts += np.random.laplace(0,beta,len(label_counts)) 
        
        new_label = np.argmax(label_counts)
        new_labels.append(new_label)
        
    new_labels = np.array(new_labels)
    return new_labels


In [12]:
labels = label_data(models,testloader)

In [18]:
np.mean(labels == test_target.numpy())

0.91172

In [20]:
eps = 1

In [21]:
noise_labels = DP(labels,eps)

In [22]:
np.mean(noise_labels == test_target.numpy())

0.9465

In [19]:
# labels = labels[:,:9000]
# noise_labels = noise_labels[:9000]

In [23]:
pate.perform_analysis(teacher_preds=labels,indices=noise_labels,noise_eps=eps)

l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compute sensitivity
l too large to compu

(468.6354750574952, 20001.439115683123)

In [21]:
model_test = create_models(1)[0]

In [None]:
train(model_test,testloader)