In [1]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import copy
from torch import nn
from torch import optim
import torch.nn.functional as F
import syft as sy
import torch as th
from helpers import Model, connect_to_workers
from sklearn.metrics import confusion_matrix
from tqdm import tqdm, tqdm_notebook
# BEWARE, ignoreing warnings is not always a good idea
# I am doing it for presentation

W0828 20:18:32.488130 139805837973312 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/home/mkucz/p_venv/lib/python3.6/site-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0828 20:18:32.496996 139805837973312 deprecation_wrapper.py:119] From /home/mkucz/p_venv/lib/python3.6/site-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



In [2]:
features = np.load('../data/features.npy')
labels = np.load('../data/labels_dim.npy')
data = th.tensor(features, dtype=th.float32, requires_grad=True)
target = th.tensor(labels, dtype=th.float32, requires_grad=False).reshape(-1,2)

hook = sy.TorchHook(th)

In [3]:
class Arguments():
    def __init__(self, in_size, out_size, hidden_layers,
                       activation=F.softmax, dim=-1):
        self.batch_size = 1
        self.drop_p = None
        self.epochs = 1
        self.lr = 0.001
        self.in_size = in_size
        self.out_size = out_size
        self.hidden_layers = hidden_layers
        self.precision_fractional=10
        self.activation = activation
        self.dim = dim

In [4]:
checkpoint = th.load('base_model.pt')  # use model trained earlier to save time

dataset = [(data[i], target[i]) for i in range(len(data))]

# instantiate model
in_size = checkpoint['in_size']
out_size = checkpoint['out_size']
hidden_layers = checkpoint['hidden_layers']

# for MSE loss, we want to use softmax and not log_softmax
args = Arguments(in_size, out_size, hidden_layers,
                 activation=F.softmax, dim=1)
# PyTorch's softmax activation only works with floats
workers, trusted_aggregator = connect_to_workers(len(dataset), hook, True)

<a id="fl_model_avg"></a>
### Federated Learning with Model Averaging
We can perform federated learning in a way that trains a model on the data of each remote worker, and uses a *'trusted aggregator'* to combine the models into one. In this way, the non-trusted party, me for example, cannot tell which remote worker has updated gradients in what way. Gradient updates can be reverse engineered to understand what data has been passed through the network. This is an added layer of privacy protection in federated learning. The downside of this approach, however, is that it requires all parties to trust said aggregator.

A couple of quick things to note before starting this notebook, is that based on the way model averaging is performed, we cannot use [NLLLoss](https://pytorch.org/docs/stable/nn.html#nllloss), and therefore we are using [MSELoss](https://pytorch.org/docs/stable/nn.html#mseloss). This also means that we have to get the labels in a little differently (one-hot-encoded labels versus just the label). This also has an implication in terms of the activation function (if any) that we want to use. Instead of using [LogSoftmax](https://pytorch.org/docs/stable/nn.html#logsoftmax) which returns the log of the softmax function output, we would want to use the normal [Softmax](https://pytorch.org/docs/stable/nn.html#softmax).

#### Send Data to Remote Worker
In this step we need to send a copy of the model to each remote worker, as well as a new optimizer object

In [5]:
# Send data to remote workers
# Cast the result in BaseDatasets
remote_dataset_list = []
for i in range(len(dataset)):

    d, t = data[i], target[i]
    # send to worker before adding to dataset
    r_d = d.reshape(1, -1).send(workers[i])
    r_t = t.reshape(1, -1).send(workers[i])

    dtset = sy.BaseDataset(r_d, r_t)
    remote_dataset_list.append(dtset)

# Build the FederatedDataset object
n_train_items = int(len(dataset)*0.7)
n_test_items = len(dataset) - n_train_items
# split into train/test
train_remote_dataset = sy.FederatedDataset(remote_dataset_list[:n_train_items])
test_remote_dataset = sy.FederatedDataset(remote_dataset_list[n_train_items:])

print(train_remote_dataset.workers[:5])

['w_0', 'w_1', 'w_2', 'w_3', 'w_4']


In [6]:
model = Model(args)
model.load_state_dict(checkpoint['model_state'])
#send copy of model to remote client worker
models = [model.copy().send(w) for w in workers] 
optimizers = [optim.SGD(params=m.parameters(), lr=args.lr) for m in models]

In [50]:
# PARALLEL federated learning with trusted aggregator
def federated_train_trusted_agg(models, datasets, optimizers):
    for e in range(1, args.epochs+1):
        running_loss = 0
        for i in range(n_train_items):  # train each model concurrently
            model = models[i] # choose remote model to use
            opt = optimizers[i] #choose remote optimizer to use
            
            # get remote dataset loc (by worker id)
            _d = datasets.datasets[model.location.id]  

            # NB the steps below all happen remotely
            opt.zero_grad()  
            # zero out gradients so that one forward pass
            # doesnt pick up previous forward's gradients
            outputs = model.forward(_d.data)  # make prediction
            # get shape of (1,2) as we need at least two dimension
            outputs = outputs.reshape(1, -1)
            
            # NllLoss does not work well with federation...
            loss = ((outputs - _d.targets)**2).sum()
            
            #or
            #loss = F.mse_loss(outputs, _d.targets)
            loss.backward()
            opt.step()

            # FEDERATION STEP
            _loss = loss.get().data  # get loss from remote worker
            if th.isnan(_loss) or _loss > 10:
                print(model.location.id, outputs.get(), _d.targets.get(), _loss)
                continue

            running_loss += _loss
        print('Epoch: {} \tLoss: {:.6f}'.format(
            e, running_loss/i))

    # move trained models to trusted thrid party
    for m in models:
        m.move(trusted_aggregator)

In [51]:
federated_train_trusted_agg(models, train_remote_dataset, optimizers)

Epoch: 1 	Loss: 0.142975


Now that we have parallel training implemented, we want to add logic that averages the models of each remote worker after each iteration.

In [22]:
def set_model_avg(base_model, models):
    '''
    Average weights and biases of models on trusted aggregator

    Parameters
    ::models - list: pointers to remote models, should be on trusted aggregator

    Returns
    ::avg_weights
    ::avg_bias
    '''
    # average out each hidden layer individually
    for i in range(len(base_model.hidden_layers)):
        weights, biases = zip(*[(m.hidden_layers[i].weight.data,
                                 m.hidden_layers[i].bias.data) for m in models])
        base_model.hidden_layers[i].weight\
                                   .set_((sum(weights)/len(models)).get())
        base_model.hidden_layers[i].bias.set_((sum(biases)/len(models)).get())

    # average out output layer
    weights, biases = zip(*[(m.output.weight.data,
                             m.output.bias.data) for m in models])
    base_model.output.weight.set_((sum(weights)/len(models)).get())
    base_model.output.bias.set_((sum(biases)/len(models)).get())

In [23]:
# Average the model on trusted aggregator
with th.no_grad():
    set_model_avg(model, models)

#### Putting it Together
Now put together the training and averaging step into one, where the overall model is averaged on a trusted aggregator after every epoch.

In [27]:
%%time
print((f'Federated Training \n {len(workers)} remote workers'
       f'\n {len(remote_dataset)} datum'
        '\n 1 Trusted Aggregator'))
model = Model(args)
model.load_state_dict(checkpoint['model_state'])

for i in range(1, args.epochs+1):
    models = [model.copy().send(w) for w in workers]
    optimizers = [optim.SGD(params=m.parameters(), lr=args.lr) for m in models]

    federated_train_trusted_agg(models, train_remote_dataset, optimizers)

    # Average the model on trusted aggregator
    with th.no_grad():
        set_model_avg(model, models)

Federated Training 
 653 remote workers
 1 Trusted Aggregator
 653 datum
Epoch: 1 	Loss: 0.177108
CPU times: user 9.56 s, sys: 127 ms, total: 9.68 s
Wall time: 9.68 s


We have now trained a deep learning model using federated learning with a trusted aggregator! Make sure to test the model on a hold-out dataset. For the purpose of these examples, I will exclude testing sets for the sake of time.
Nevertheless, this **data is not yet encrypted** and we could deduce things specific to the applicant just by getting or looking at the remote data. <br>
In comes **encrypted deep learning**! Here we want to encrypt gradients such that no trusted aggregator is needed! To check out this exciting code [click here](https://github.com/mkucz95/private_ai_finance#encrypted-deep-learning).
***