In [1]:
from dataloader import DatasetLoader
from preprocessing import Imputer
from models import Net
import syft as sy
import torch
from config import cfg
import torch.optim as optim
from models import update_model
from models import test_model
from models import train_on_devices
import time
import copy

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Preprocessing and conversion to pytorch form

In [2]:
impute_data = False # if set false previously imputed data will be used

# create an instance of the class DatasetLoader
# This is essentially your data
data = DatasetLoader()

if impute_data:
    imputer = Imputer(data)
    data.df = imputer.transform()

# load imputed data (saved from the imputation process)
data.load_imputed()

# split data into training and testing set
data.train_test_split()

# since there is a class imbalance problem oversampling technique is used
data.oversample_training_data()

# the dataset is scaled using scikitlearn's standard scaler
data.standard_scale()

# convert pandas df to pytorch form (pytorch tensors, data loaders etc)
data.to_PyTorch()

### Devices for training
hook to virtual workers

In [3]:
hook = sy.TorchHook(torch)
bob_device = sy.VirtualWorker(hook, id='bob')
alice_device = sy.VirtualWorker(hook, id='alice')
devices = [bob_device, alice_device]

### Sending data over to the devices for training with on device capability

In [None]:
remote_dataset = (list(), list())
train_distributed_dataset = []

for batch_idx, (data, target) in enumerate(data.train_loader):
    device_id = batch_idx % len(devices)
    data = data.send(devices[device_id])
    target = target.send(devices[device_id])
    remote_dataset[device_id].append((data, target))

### Create models and optimizers for each device

In [None]:
model_bob_device = Net()
model_alice_device = Net()
optimizer_bob = optim.SGD(model_bob_device.parameters(), lr=cfg.ARGS.LR)
optimizer_alice = optim.SGD(model_alice_device.parameters(), lr=cfg.ARGS.LR)

models = [model_bob_device, model_alice_device]
optimizers = [optimizer_bob, optimizer_alice]

### Train models on devices and test

In [None]:
epoch_loss = []
precision = []
accuracy = []
recall = []
for epoch in range(cfg.ARGS.EPOCHS):
    start_time = time.time()
    print("Epoch Number = {}".format(epoch+1))
    federated_model = train_on_devices(remote_dataset, devices, models, optimizers)
    model = federated_model
    test_loss, avg_precision, avg_accuracy, avg_recall = test_model(model, data)
    epoch_loss.append(test_loss)
    precision.append(avg_precision)
    accuracy.append(avg_accuracy)
    recall.append(avg_recall)
    total_time = time.time() - start_time
    print('Communication time over the network {} s\n'.format(round(total_time, 2)))