In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from arguments import Arguments
from cnn import CNN
from collections import defaultdict
from distributor import get_cluster_sizes, get_distributed_data, get_fog_graph
from train import fog_train as train
from numpy import array
from numpy.random import permutation, randint
import os
from sklearn.model_selection import train_test_split
import syft as sy
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms


Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.15.3.so'





In [3]:
# Setups
args = Arguments()
hook = sy.TorchHook(torch)
USE_CUDA = not args.no_cuda and torch.cuda.is_available()

torch.manual_seed(args.seed)
device = torch.device("cuda" if USE_CUDA else "cpu")
kwargs = {'num_workers': 1, 'pin_memory': True} if USE_CUDA else {}
kwargs = {}

In [4]:
ckpt_path = '../ckpts'
dataset = 'mnist'
clf_type = 'cnn'
paradigm = 'fog'
model_name = '{}_{}_{}'.format(dataset, clf_type, paradigm)
init_path = os.path.join(ckpt_path, 'mnist_cnn_fl.init')
best_path = os.path.join(ckpt_path, model_name + '.best')
stop_path = os.path.join(ckpt_path, model_name + '.stop')

In [5]:
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.num_train, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=args.test_batch_size, shuffle=True, **kwargs)

for data, target in train_loader:
    X_train = data
    y_train = target

for data, target in test_loader:
    X_test = data
    y_test = target

print('X_train: {}'.format(X_train.shape))
print('y_train: {}'.format(y_train.shape))

print('X_test: {}'.format(X_test.shape))
print('y_test: {}'.format(y_test.shape))

X_train: torch.Size([60000, 1, 28, 28])
y_train: torch.Size([60000])
X_test: torch.Size([10000, 1, 28, 28])
y_test: torch.Size([10000])


In [6]:
# prepare graph and data
fog_graph, workers = get_fog_graph(hook, args.num_workers, args.num_clusters,
                                 args.shuffle_workers, args.uniform_clusters)
X_trains, y_trains = get_distributed_data(X_train, y_train, args.num_workers)
fog_graph

{'L1_W0': ['L0_W20', 'L0_W28', 'L0_W42', 'L0_W41', 'L0_W29'],
 'L1_W1': ['L0_W36', 'L0_W22', 'L0_W13', 'L0_W12', 'L0_W30'],
 'L1_W2': ['L0_W34', 'L0_W18', 'L0_W15', 'L0_W43', 'L0_W24'],
 'L1_W3': ['L0_W8', 'L0_W16', 'L0_W44', 'L0_W46', 'L0_W11'],
 'L1_W4': ['L0_W37', 'L0_W48', 'L0_W5', 'L0_W7', 'L0_W9'],
 'L1_W5': ['L0_W3', 'L0_W26', 'L0_W35', 'L0_W23', 'L0_W47'],
 'L1_W6': ['L0_W17', 'L0_W25', 'L0_W2', 'L0_W10', 'L0_W40'],
 'L1_W7': ['L0_W21', 'L0_W1', 'L0_W19', 'L0_W38', 'L0_W0'],
 'L1_W8': ['L0_W27', 'L0_W4', 'L0_W31', 'L0_W6', 'L0_W14'],
 'L1_W9': ['L0_W45', 'L0_W32', 'L0_W39', 'L0_W49', 'L0_W33'],
 'L2_W0': ['L1_W3', 'L1_W0'],
 'L2_W1': ['L1_W7', 'L1_W8'],
 'L2_W2': ['L1_W5', 'L1_W9'],
 'L2_W3': ['L1_W2', 'L1_W6'],
 'L2_W4': ['L1_W1', 'L1_W4'],
 'L3_W0': ['L2_W1', 'L2_W4', 'L2_W2'],
 'L3_W1': ['L2_W0', 'L2_W3'],
 'L4_W0': ['L3_W1', 'L3_W0']}

In [8]:
# Train
def train(args, model, graph, nodes, X_trains, y_trains, device, epoch):
    model.train()

    total = args.num_train

    worker_data = {}
    worker_targets = {}
    worker_num_samples = {}
    worker_models = {}
    worker_optims = {}
    worker_losses = {}
    
    # send data, model to workers
    # setup optimizer for each worker

    workers = [_ for _ in nodes.keys() if 'L0' in _]
    for w, x, y in zip(workers, X_trains, y_trains):
        worker_data[w] = x.send(nodes[w])
        worker_targets[w] = y.send(nodes[w])
        worker_num_samples[w] = x.shape[0]
        
    total = 0

    for w in workers:
        worker_models[w] = model.copy().send(nodes[w])
        worker_optims[w] = optim.SGD(params=worker_models[w].parameters(), lr=args.lr)

        data = worker_data[w]
        target = worker_targets[w]
        total += data.shape[0]
        data, target = data.to(device), target.to(device)
        worker_optims[w].zero_grad()
        output = worker_models[w](data)
        loss = F.nll_loss(output, target)
        loss.backward()
        worker_optims[w].step()
        worker_losses[w] = loss.get().data

    for l in range(1, len(args.num_clusters)+1):
        aggregators = [_ for _ in nodes.keys() if 'L{}'.format(l) in _]
        for a in aggregators:
            worker_models[a] = model.copy().send(nodes[a])
            worker_num_samples[a] = 1
            children = fog_graph[a]

            for child in children:
                worker_models[child].move(nodes[a])

            with torch.no_grad():
                weighted_models = [get_model_weights(worker_models[_], worker_num_samples[_])for _ in children]
                model_sum = weighted_models[0]
                for m in weighted_models[1:]:
                    model_sum = add_model_weights(model_sum, m)
                worker_models[a].load_state_dict(model_sum)

    assert len(aggregators) == 1
    master = get_model_weights(worker_models[aggregators[0]].get(), 1/args.num_train)
    model.load_state_dict(master)

    loss = array([_.cpu().numpy() for dump, _ in worker_losses.items()])
    print('Train Epoch: {} \tLoss: {:.6f} +- {:.6f}'.format(
        epoch,
        loss.mean(), loss.std()
    ))


# Test
def test(args, model, device, test_loader, best):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = correct / len(test_loader.dataset)
    if accuracy > best:
        best = accuracy

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%) ==> {:.2f}%'.format(
        test_loss, correct, len(test_loader.dataset), 100.*accuracy, 100.*best))

    return best

In [9]:
best = 0
# Fire the engines
model = CNN().to(device)
if args.load_init:
    model.load_state_dict(torch.load(init_path))
    print('Load init: {}'.format(init_path))
elif args.save_init:
    torch.save(model.state_dict(), init_path)
    print('Save init: {}'.format(init_path))


Load init: ../ckpts/mnist_cnn_fl.init


In [10]:
for epoch in range(1, args.epochs + 1):
    train(args, model, fog_graph, workers, X_trains, y_trains, device, epoch)
    best = test(args, model, device, test_loader, best)
    if args.save_model:
        torch.save(model.state_dict(), best_path)
        print('Model best: {}\n'.format(best_path))
    
if (args.save_model):
    torch.save(model.state_dict(), stop_path)
    print('Model stop: {}'.format(stop_path))


Train Epoch: 1 	Loss: 2.306139 +- 0.000868



Test set: Average loss: 2.2681, Accuracy: 2650/10000 (26.50%) ==> 26.50%


Train Epoch: 2 	Loss: 2.268448 +- 0.000971



Test set: Average loss: 2.2310, Accuracy: 4168/10000 (41.68%) ==> 41.68%


Train Epoch: 3 	Loss: 2.232030 +- 0.001229



Test set: Average loss: 2.1905, Accuracy: 5562/10000 (55.62%) ==> 55.62%


Train Epoch: 4 	Loss: 2.192104 +- 0.001600



Test set: Average loss: 2.1421, Accuracy: 6443/10000 (64.43%) ==> 64.43%


Train Epoch: 5 	Loss: 2.144583 +- 0.002072



Test set: Average loss: 2.0814, Accuracy: 6920/10000 (69.20%) ==> 69.20%


Train Epoch: 6 	Loss: 2.085014 +- 0.002663



Test set: Average loss: 2.0029, Accuracy: 7220/10000 (72.20%) ==> 72.20%


Train Epoch: 7 	Loss: 2.008063 +- 0.003426



Test set: Average loss: 1.9001, Accuracy: 7369/10000 (73.69%) ==> 73.69%


Train Epoch: 8 	Loss: 1.907259 +- 0.004428



Test set: Average loss: 1.7661, Accuracy: 7437/10000 (74.37%) ==> 74.37%


Train Epoch: 9 	Loss: 1.775962 +- 0.005710



Test set: Average loss: 1.5957, Accuracy: 7637/10000 (76.37%) ==> 76.37%


Train Epoch: 10 	Loss: 1.609271 +- 0.007225



Test set: Average loss: 1.3928, Accuracy: 7879/10000 (78.79%) ==> 78.79%
