**Vertically Partitioned Split Learning**

For demonstration of this structure, MNIST model has been split vertically according to the integer assigned to the variable 'partition'.

In [20]:
import syft, torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

Create hook and virtual workers - alice and bob

In [21]:
hook = syft.TorchHook(torch)
alice = syft.VirtualWorker(hook, id='alice')
bob = syft.VirtualWorker(hook, id='bob')

**Creating class SplitNN**

Create a class SplitNN containing all the functionalities.

In [34]:
class SplitNN(nn.Module):
    def __init__(self, models, optimizers, partition):
        super().__init__()
        self.models = models
        self.optimizers = optimizers
        self.output = [None] * (partition)

    def zero_grads(self):
        for opt in self.optimizers:
            opt.zero_grad()

    def forward(self, x):                   # Here x is a list having a batch of diffent partitioned datasets. 
        for i in range(len(x)):
            self.output[i] = self.models[i](x[i])

        total_out = torch.cat(tuple(self.output[i] for i in range(len(self.output))), dim=1)    # Concatenating the output of various structures in bottom part (alice's location)
        if self.output[-1].location == self.models[-1].location:
            second_layer_inp = total_out.detach().requires_grad_()
        else:
            second_layer_inp = total_out.detach().move(self.models[-1].location).requires_grad_()

        self.second_layer_inp = second_layer_inp
        pred = self.models[-1](second_layer_inp)
        return pred

    def backward(self):
        second_layer_inp = self.second_layer_inp
        if self.output[-1].location == second_layer_inp.location:
            grad = second_layer_inp.grad.copy()
        else:
            grad = second_layer_inp.grad.copy().move(self.output[-1].location)

        i = 0
        while i < partition-1:
            self.output[i].backward(grad[:, hidden_sizes[1]*i : hidden_sizes[1]*(i+1)])
            i += 1
        self.output[i].backward(grad[:, hidden_sizes[1]*i : ])         # This is because it is not necessary that last batch is of exact same size as partitioned.


    def step(self):
        for opt in self.optimizers:
            opt.step()

Here function create_models, creates models according to the partitions of the model in the bottom part (alice's model).
  
First we iterate 1 less than the partition size because of their same model structure. Since we divided the data in batches, last part may or may not have the size of the batch. So for this we calculate the remaining size of the dataset, and accordingly create the model.

Then a model on the bob's machine is created having labels.

In [23]:
def create_models(partition, input_size, hidden_sizes, output_size):
    models = list()
    for i in range(1, partition):
        models.append(nn.Sequential(nn.Linear(int(input_size/partition * i), hidden_sizes[0]),
                                    nn.ReLU(),
                                    nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                                    nn.ReLU()))
    rem = input_size - int(input_size/partition * (partition-1))
    models.append(nn.Sequential(nn.Linear(rem, hidden_sizes[0]),
                                nn.ReLU(),
                                nn.Linear(hidden_sizes[0], hidden_sizes[1]),
                                nn.ReLU()))

    models.append(nn.Sequential(nn.Linear(hidden_sizes[1]*partition, hidden_sizes[2]),
                                nn.ReLU(),
                                nn.Linear(hidden_sizes[2], output_size),
                                nn.LogSoftmax(dim=1)))
    return models

Assign an integer to the partition variable indicating number of partitions available.

In [24]:
partition = 2

Load the dataset 

In [25]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])

trainset = datasets.MNIST('..output/mnist', download=True, train=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

Initialize the sizes

In [26]:
input_size = 784
hidden_sizes = [128, 256, 512]
output_size = 10

Partition the data as needed to imitate that we have different datasets having different features for our image.

In [28]:
image_set1 = list()
image_set2 = list()
labels = list()
distr = int(input_size/partition)              # Segment the datalength in number of partitions
for image, label in trainloader:
    image = image.view(image.shape[0], -1)
    image_set1.append(image[:, 0:distr])
    image_set2.append(image[:, distr:])
    labels.append(label)

Here we assign output from create_models to a variable models.

Now, according to these models, we are creating different optimizers for different model. 

In [29]:
models = create_models(partition, input_size, hidden_sizes, output_size)

optimizers = [optim.SGD(model.parameters(), lr=0.01) for model in models]

Now append the model locations. All the partitioned models are situated in alice's machine and the model with labels in bob's machine.

After this, send each model to its location.

In [30]:
model_locations = list()
for i in range(partition):
    model_locations.append(alice)
model_locations.append(bob)

for model, location in zip(models, model_locations):
    model.send(location)

Create a object for class SplitNN.

In [31]:
splitNN = SplitNN(models, optimizers, partition)

Define a train function.

In [32]:
def train(x, target, splitnn):
    splitnn.zero_grads()
    pred = splitnn.forward(x)
    criterion = nn.NLLLoss()
    loss = criterion(pred, target)
    loss.backward()
    splitnn.backward()
    splitnn.step()
    return loss

Since we have 2 partitions, we will run the lists in parallel which is made by splitting the dataset from trainloader. Send them to their respective machines i.e. images in alice's machine and labels in bob's machine.

In [35]:
epochs = 20

for i in range(epochs):
    total_loss = 0
    for x1, x2, y in zip(image_set1, image_set2, labels):
        x1, x2 = x1.send(models[0].location), x2.send(models[0].location)
        y = y.send(models[-1].location)
        loss = train([x1, x2], y, splitNN)
        total_loss += loss.get()

    else:
        print(f"Epoch: {i+1}... Training Loss: {total_loss/len(image_set1)}")

RuntimeError: Mismatch in shape: grad_output[0] has a shape of torch.Size([64, 512]) and output[0] has a shape of torch.Size([64, 256]).