In [1]:
import numpy as np
import torch
from torchvision import datasets, transforms
from torch.utils.data import Subset
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
data_transformer = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

train_data = datasets.MNIST('/content/drive/MyDrive/ESL/data',
                            train = True,
                            transform = data_transformer,
                            target_transform = None,
                            download = True)

test_data = datasets.MNIST('/content/drive/MyDrive/ESL/data',
                            train = False,
                            transform = data_transformer,
                            target_transform = None,
                            download = True)

In [4]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: /content/drive/MyDrive/ESL/data
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )

In [5]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: /content/drive/MyDrive/ESL/data
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )

In [6]:
n_teachers = 200
batch_size = 50

In [7]:
def make_data_loaders(data, n_teach):
    data_loaders = []
    data_len = len(train_data) // n_teach

    for i in range(data_len):
        indices = list(range(i*data_len, (i+1)*data_len))
        sub_data = Subset(train_data, indices)
        loader = torch.utils.data.DataLoader(sub_data, batch_size = batch_size)
        data_loaders.append(loader)

    return data_loaders

In [8]:
teacher_loaders = make_data_loaders(train_data, n_teachers)

In [9]:
train_data_student = Subset(test_data, list(range(9000)))
test_data_student = Subset(test_data, list(range(9000, 10000)))

train_loader_student = torch.utils.data.DataLoader(train_data_student, batch_size=batch_size)
test_loader_student = torch.utils.data.DataLoader(test_data_student, batch_size=batch_size)

In [10]:
class MNISTClassifier(nn.Module):

    def __init__(self):
        super().__init__()

        self.c1 = nn.Conv2d(1, 10, kernel_size = 5)
        self.c2 = nn.Conv2d(10, 20, kernel_size = 5)
        self.c2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.c1(x), 2))
        x = F.relu(F.max_pool2d(self.c2_drop(self.c2(x)), 2))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training = self.training)
        x = self.fc2(x)

        return F.log_softmax(x)

In [11]:
def trainer(model, trainloader, criterion, optim, epochs = 10):
    cum_loss = 0

    for e in range(epochs):
        model.train()

        for data, labels in trainloader:
            optim.zero_grad()
            pred = model.forward(data)
            loss = criterion(pred, labels)
            loss.backward()
            optim.step()
            cum_loss += loss.item()

In [12]:
def predict(model, dataloader):
    outputs = torch.zeros(0, dtype = torch.long)
    model.eval()

    for data, labels in dataloader:
        pred = model.forward(data)
        vote = torch.argmax(torch.exp(pred), dim = 1)
        outputs = torch.cat((outputs, vote))

    return outputs

In [13]:
def model_trainer(num_teachers):
    model_list = []

    for i in range(num_teachers):
        model = MNISTClassifier()
        criterion = nn.NLLLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.003)
        trainer(model, teacher_loaders[i], criterion, optimizer)
        model_list.append(model)

    return model_list

In [14]:
models = model_trainer(n_teachers)

  return F.log_softmax(x)


In [15]:
gamma = 0.2

In [16]:
def noisy_aggregation(teacher_models, dataloader, gamma):

    preds = torch.torch.zeros((len(teacher_models), 9000), dtype = torch.long)

    for i, model in enumerate(teacher_models):
        res = predict(model, dataloader)
        preds[i] = res

    labels = np.array([]).astype(int)

    for j in np.transpose(preds):
        n_labels = np.bincount(j, minlength = 10)

        beta = 1 / gamma
        for k in range(len(n_labels)):
            n_labels[k] += np.random.laplace(0, beta, 1)

        vote = np.argmax(n_labels)
        labels = np.append(labels, vote)

    return preds.numpy(), labels

In [17]:
teacher_models = models
preds, student_labels = noisy_aggregation(teacher_models, train_loader_student, gamma)

  return F.log_softmax(x)


In [18]:
def student_data_generator(student_train_loader, labels):
    for batch_index, (data, _) in enumerate(iter(student_train_loader)):
        start_index = batch_index * len(data)
        end_index = (batch_index + 1) * len(data)
        yield data, torch.from_numpy(labels[start_index:end_index])

In [19]:
model_student = MNISTClassifier()
criterion = nn.NLLLoss()
optimizer = optim.Adam(model_student.parameters(), lr = 0.001)
epochs = 10
steps = 0
cum_loss = 0

In [20]:
for e in range(epochs):
    model_student.train()
    train_loader = student_data_generator(train_loader_student, student_labels)
    for input, labels in train_loader:
        steps += 1

        optimizer.zero_grad()

        pred = model_student.forward(input)
        loss = criterion(pred, labels)

        loss.backward()
        optimizer.step()

        cum_loss += loss.item()

        if steps % 50 == 0:
            test_loss = 0
            accuracy = 0
            model_student.eval()
            with torch.no_grad():
                for input, labels in test_loader_student:
                    log_vote = model_student(input)
                    test_loss += criterion(log_vote, labels).item()

                    ps = torch.exp(log_vote)
                    top_p, top_class = ps.topk(1, dim = 1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor))

            model_student.train()

            print("Epoch: {}/{} | ".format(e+1, epochs),
                  "Train Loss: {:.3f} | ".format(cum_loss / len(train_loader_student)),
                  "Test Loss: {:.3f} | ".format(test_loss / len(test_loader_student)),
                  "Accuracy: {:.3f}".format(accuracy / len(test_loader_student)))

            cum_loss = 0

  return F.log_softmax(x)


Epoch: 1/10 |  Train Loss: 0.566 |  Test Loss: 1.349 |  Accuracy: 0.654
Epoch: 1/10 |  Train Loss: 0.301 |  Test Loss: 0.661 |  Accuracy: 0.790
Epoch: 1/10 |  Train Loss: 0.187 |  Test Loss: 0.449 |  Accuracy: 0.866
Epoch: 2/10 |  Train Loss: 0.153 |  Test Loss: 0.353 |  Accuracy: 0.904
Epoch: 2/10 |  Train Loss: 0.144 |  Test Loss: 0.374 |  Accuracy: 0.886
Epoch: 2/10 |  Train Loss: 0.120 |  Test Loss: 0.355 |  Accuracy: 0.901
Epoch: 2/10 |  Train Loss: 0.100 |  Test Loss: 0.338 |  Accuracy: 0.906
Epoch: 3/10 |  Train Loss: 0.111 |  Test Loss: 0.302 |  Accuracy: 0.912
Epoch: 3/10 |  Train Loss: 0.119 |  Test Loss: 0.319 |  Accuracy: 0.906
Epoch: 3/10 |  Train Loss: 0.086 |  Test Loss: 0.312 |  Accuracy: 0.902
Epoch: 4/10 |  Train Loss: 0.082 |  Test Loss: 0.288 |  Accuracy: 0.921
Epoch: 4/10 |  Train Loss: 0.103 |  Test Loss: 0.300 |  Accuracy: 0.915
Epoch: 4/10 |  Train Loss: 0.099 |  Test Loss: 0.322 |  Accuracy: 0.913
Epoch: 4/10 |  Train Loss: 0.074 |  Test Loss: 0.294 |  Accuracy

In [21]:
def calculate_data_dependent_epsilon(teacher_predictions, private_labels):
  np_labels = []

  for i in np.transpose(teacher_predictions):
          lab_freq = np.bincount(i, minlength = 10)
          vote = np.argmax(lab_freq)
          np_labels.append(vote)

  print(f'The data dependent epsilon for {n_teachers} teachers and laplace parameter {1/gamma} is {np.max(np.abs(np_labels - private_labels))}.')

In [22]:
calculate_data_dependent_epsilon(preds, student_labels)

The data dependent epsilon for 200 teachers and laplace parameter 5.0 is 7.
