In [25]:
epochs = 5

# Example - Simple Vertically Partitioned Split Neural Network

- <b>Alice</b>
    - Has model Segment 1
    - Has the handwritten Images
- <b>Bob</b>
    - Has model Segment 2
    - Has the image Labels
    
Based on [SplitNN - Tutorial 3](https://github.com/OpenMined/PySyft/blob/master/examples/tutorials/advanced/split_neural_network/Tutorial%203%20-%20Folded%20Split%20Neural%20Network.ipynb) from Adam J Hall - Twitter: [@AJH4LL](https://twitter.com/AJH4LL) · GitHub:  [@H4LL](https://github.com/H4LL)

Authors:
- Pavlos Papadopoulos · GitHub:  [@pavlos-p](https://github.com/pavlos-p)
- Tom Titcombe · GitHub:  [@TTitcombe](https://github.com/TTitcombe)
- Robert Sandmann · GitHub: [@rsandmann](https://github.com/rsandmann)


In [3]:

import sys
sys.path.append('../')

import torch
from torchvision import datasets, transforms
from torch import nn, optim
from torchvision.datasets import MNIST
from torchvision.transforms import ToTensor

import syft as sy

from src.dataloader import VerticalDataLoader
from src.psi.util import Client, Server
from src.splitnn import SplitNN
from src.utils import add_ids
from src.distribute_data import Distribute_MNIST

hook = sy.TorchHook(torch)




In [28]:
# Data preprocessing
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                              ])
trainset = datasets.MNIST('mnist', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# create some workers
client_1 = sy.VirtualWorker(hook, id="client_1")
client_2 = sy.VirtualWorker(hook, id="client_2")
server = sy.VirtualWorker(hook, id= "server") 

data_owners = (client_1, client_2)
model_locations = [client_1, client_2, server]

#Split each image and send one part to client_1, and other to client_2
distributed_trainloader = Distribute_MNIST(data_owners=data_owners, data_loader=trainloader)

14.5%

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./VerticalDataset/raw/train-images-idx3-ubyte.gz


100.1%

Extracting ./VerticalDataset/raw/train-images-idx3-ubyte.gz to ./VerticalDataset/raw


100.4%

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./VerticalDataset/raw/train-labels-idx1-ubyte.gz
Extracting ./VerticalDataset/raw/train-labels-idx1-ubyte.gz to ./VerticalDataset/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./VerticalDataset/raw/t10k-images-idx3-ubyte.gz


180.4%

Extracting ./VerticalDataset/raw/t10k-images-idx3-ubyte.gz to ./VerticalDataset/raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./VerticalDataset/raw/t10k-labels-idx1-ubyte.gz
Extracting ./VerticalDataset/raw/t10k-labels-idx1-ubyte.gz to ./VerticalDataset/raw
Processing...
Done!


In [33]:
torch.manual_seed(0)

# Define our model segments

input_size= [28*14, 28*14]
hidden_sizes= {"client_1": [32, 64], "client_2":[32, 64], "server":[128, 64]}
output_size = 10

models = {
    "client_1": nn.Sequential(
                nn.Linear(input_size[0], hidden_sizes["client_1"][0]),
                nn.ReLU(),
                nn.Linear(hidden_sizes["client_1"][0], hidden_sizes["client_1"][1]),
                nn.ReLU(),
    ),
    "client_2":  nn.Sequential(
                nn.Linear(input_size[1], hidden_sizes["client_2"][0]),
                nn.ReLU(),
                nn.Linear(hidden_sizes["client_2"][0], hidden_sizes["client_2"][1]),
                nn.ReLU(),
    ),
    "server": nn.Sequential(
                nn.Linear(hidden_sizes["server"][0], hidden_sizes["server"][1]),
                nn.ReLU(),
                nn.Linear(hidden_sizes["server"][1], 10),
                nn.LogSoftmax(dim=1)
    )
}

# Create optimisers for each segment and link to them
optimizers = [
    optim.SGD(models[location.id].parameters(), lr=0.05,)
    for location in model_locations
]

for location in model_locations:
    models[location.id].send(location)


#Instantiate a SpliNN class with our distributed segments and their respective optimizers
splitNN = SplitNN(models, server, data_owners, optimizers)

In [10]:
for i in range(epochs):
    running_loss = 0
    
    #iterate over each datapoint 
    for data_ptr, label in distributed_trainloader:
        
        #send labels to server's location for training
        label = label.send(server)
        
        loss = splitNN.train(data_ptr, label)
        running_loss += loss

    else:
        print("Epoch {} - Training loss: {}".format(i, running_loss/len(trainloader)))

Epoch 0 - Training loss: 1.141 - Accuracy: 73.202
Epoch 1 - Training loss: 0.384 - Accuracy: 89.162
Epoch 2 - Training loss: 0.317 - Accuracy: 90.818
Epoch 3 - Training loss: 0.281 - Accuracy: 91.948
Epoch 4 - Training loss: 0.254 - Accuracy: 92.738
