In [1]:
import torch
import torch.nn.functional as F
import torch.nn as nn

import torch.optim as optim
import time
from opacus import PrivacyEngine
from vantage6.tools.util import info, warn
from torchvision import transforms
import argparse
from torchvision import datasets, transforms

# Own modules
# import v6simplemodel as sm
# import util.parser as parser
import parser as parse
# import db as db

In [2]:
# simple model 

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.dropout1 = nn.Dropout(0.25)
        self.dropout2 = nn.Dropout(0.5)
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x)
        x = F.relu(x)
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dropout1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dropout2(x)
        x = self.fc2(x)
        output = F.log_softmax(x, dim=1)
        return output


In [3]:
# initialises training

def RPC_initialize_training(data, gamma=0.7, learning_rate=0.01, local_dp=True):
    """
    Initializes the model, optimizer and scheduler and shares the parameters
    with all the workers in the group.

    This should be sent from server to all nodes.

    Args:
        data: contains the local data from the node
        gamma: Learning rate step gamma (default: 0.7)
        learning_rate: The learning rate for training.
        cuda: Should we use CUDA?
        local_dp: bool whether to apply local_dp or not.

    Returns:
        Returns the device, model, optimizer and scheduler.
    """
    # Load local dataset
    # first:
        # torch.save(dataset_train, './dataset.pt')
#     data = torch.load('./training.pt')

    # Determine the device to train on
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")
#     print("\033[0;{};49m Rank {} is training on {}".format(color, rank, device))

    # Initialize model and send parameters of server to all workers
    model = Net()
    model.to(device)

    # TODO: load local? train_loader, test_loader from locally stored local

    # use Opacus for DP: Opacus is a library that enables training PyTorch models
    # with differential privacy. Taken from: https://github.com/pytorch/opacus

    # intializing optimizer and scheduler
    optimizer = optim.SGD(model.parameters(), lr=0.1)
    
    # adding DP if true
    if local_dp == True:
        privacy_engine = PrivacyEngine(model, batch_size=64,
            sample_size=60000, alphas=range(2,32), noise_multiplier=1.3,
            max_grad_norm=1.0,)
        privacy_engine.attach(optimizer)

    # returns device, model, optimizer which will be needed in train and test
    return device, model, optimizer

In [4]:
# basic training of the model

# Question: train gets model, device, optimizer from initialize_training, which is specified within train function, 
# why do I need to call it again before executing the function? Because in vantage6 when I sent the tasks I cannot define that but only in the master function


def RPC_train(data, model, device, optimizer, epoch=1, delta=1e-5, local_dp=True):
    """
    Training the model on all batches.
    Args:
        model: A model to run training on.
        device: The device to run training on.
        train_loader: Data loader for training local.
        optim: Optimization algorithm used for training. (not optimizer because double use)
        epoch: The number of the epoch the training is in.
        round: The number of the round the training is in.
        local_dp: Training with local DP?
        delta: The delta value of DP to aim for (default: 1e-5).
    """
    # TODO: define train_loader again from local local
#     data = torch.load('./dataset.pt')
#     train_loader, test_loader, data_size = data

#     train = torch.load('/Users/simontokloth/PycharmProjects/torch-vantage6/v6-ppsdg-py/local/MNIST/processed/training.pt')
#     test = torch.load('/Users/simontokloth/PyCharmProjects/torch-vantage6/v6-ppsdg-py/local/MNIST/processed/test.pt')
#     train_loader, test_loader = train, test
    
#     transform = transforms.Compose([transforms.ToTensor(),
#     transforms.Normalize((0.1307,), (0.3081,))])

    train_loader = torch.utils.data.DataLoader(datasets.MNIST('../mnist_data', 
                                                          download=True, 
                                                          train=True,
                                                          transform=transforms.Compose([
                                                              transforms.ToTensor(), # first, convert image to PyTorch tensor
                                                              transforms.Normalize((0.1307,), (0.3081,)) # normalize inputs
                                                          ])), 
                                           batch_size=10, 
                                           shuffle=True)

    device, model, optimizer = RPC_initialize_training(data) # is this allowed in vantage6? calling one RPC_method in another?
    
    model.train()

    for batch_idx, (data, target) in enumerate(train_loader):
#     for batch_idx, sample in enumerate(train_loader): 
#         data, target = sample['data'].cuda(), sample['target'].cuda()
        
        # Send the local and target to the device (cpu/gpu) the model is at (either send to the cpu or to the gpu, but the local is already on the worker node); model.send(local.location)
        data, target = data.to(device), target.to(device)
        
        batch = (data, target)
        # Clear gradient buffers
        optimizer.zero_grad()
        # Run the model on the local
        output = model(data)
        # Calculate the loss
        loss = F.nll_loss(output, target)
        # Calculate the gradients
        loss.backward()
        optimizer.step()


   
    
    # Logging needed if want the same output as torch.dist
#     print('\033[0;{};49m Train on Rank {}, Round {}, Epoch {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
#         round, epoch, batch_idx * len(batch[0]), len(train_loader.dataset),
#         100. * batch_idx / len(train_loader), loss.item()))
    print(loss.item())
    
#     optimizer = optim.SGD(model.parameters(), lr=0.1)
#     if local_dp==True:
#         privacy_engine = PrivacyEngine(model, batch_size=64,
#             sample_size=60000, alphas=range(2,32), noise_multiplier=1.3,
#             max_grad_norm=1.0,)
#         privacy_engine.attach(optimizer)
        
    epsilon, alpha = optimizer.privacy_engine.get_privacy_spent(delta)
#     print("\033[0;{};49m Epsilon {}, best alpha {}".format(epsilon, alpha))

In [5]:
# Model Evaluation

def RPC_test(data, model, device):
    """
    Tests the model.

    Args:
        color: The color for the terminal output for this worker.
        model: The model to test.
        device: The device to test the model on.
        test_loader: The local loader for test local. -> no inside function
    """
    # TODO: load local dataset as test_loader
#     data = torch.load('./dataset.pt')
#     train_loader, test_loader, data_size = data

#     train = torch.load('/Users/simontokloth/PycharmProjects/torch-vantage6/v6-ppsdg-py/local/MNIST/processed/training.pt')
#     test = torch.load('/Users/simontokloth/PyCharmProjects/torch-vantage6/v6-ppsdg-py/local/MNIST/processed/test.pt')
#     train_loader, test_loader = train, test

#     transform = transforms.Compose([transforms.ToTensor(),
#     transforms.Normalize((0.1307,), (0.3081,))])

    test_loader = torch.utils.data.DataLoader(datasets.MNIST('../mnist_data', 
                                                          download=True, 
                                                          train=False,
                                                          transform=transforms.Compose([
                                                              transforms.ToTensor(), # first, convert image to PyTorch tensor
                                                              transforms.Normalize((0.1307,), (0.3081,)) # normalize inputs
                                                          ])), 
                                           batch_size=10, 
                                           shuffle=True)
    
    device, model, optimizer = RPC_initialize_training(data)
    
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            # Send the local and target to the device (cpu/gpu) the model is at
            data, target = data.to(device), target.to(device)
            # Run the model on the local
            output = model(data)
            # Calculate the loss
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            # Check whether prediction was correct
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print(test_loss)
#     print('\033[0;{};49m \nAverage loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
#         test_loss, correct, len(test_loader.dataset),
#         100. * correct / len(test_loader.dataset)))

In [None]:
# FedAvg gathering of parameters 

def RPC_get_parameters(data, model, parameters, weights):
    """
    Get parameters from nodes
    """
    for parameters in model.parameters():
        return {
        "params": parameters,
        }
    
    '''
    this might need to be combined with training, so that train returns the parameters or that it at least calls the results of training function
    '''

In [None]:
# averaging of returned parameters 

def RPC_average_parameters_weighted(data, model, parameters, weights):
    """
    Get parameters from nodes and calculate the average
    :param model: torch model
    :param parameters: parameters of model
    :param weights:
    :return:
    """
    
    parameters = RPC_get_parameters() # makes returned parameters from RPC_get_parameters the parameters used in this function
    
    # TODO: local: since we usually just get the parameters, this well be an entire task, therefore, we might need to train for each individually
   
    
    with torch.no_grad():
        for parameters in model.parameters():
            average = sum(x * y for x, y in zip(parameters[i], weights)) / sum(weights)
            parameters.data = average
            i = i + 1
        return parameters

In [None]:
# training with those averaged parameters

def RPC_fed_avg(data, args, model, optimizer, train_loader, test_loader, device):
    """
    Training and testing the model on the workers concurrently using federated
    averaging, which means calculating the average of the local model
    parameters after a number of (local) epochs each training round.

    In vantage6, this method will be the training of the model with the average parameters (weighted)

    Returns:
        Returns the final model
    """
    # TODO: local: since we usually just get the parameters, this well be an entire task, therefore, we might need to train for each individually

    for epoch in range(1, args.epochs + 1):
        # Train the model on the workers again
        RPC_train(data, model, device, train_loader, optimizer, epoch, args.local_dp, delta=1e-5)
        # Test the model on the workers
        RPC_test(data, model, device, test_loader)

    gather_params = model.get_parameters() # or model.parameters()

    RPC_train(model.RPC_average_parameters_weighted(gather_params))

    return model


## OR 

    parameters = RPC_average_parameters_weighted(data, model, parameters, weights) # then uses those parameters for training
    
    
    
    # # Gather the parameters after the training round on the server
    #     gather_params = coor.gather_parameters(rank, model, group_size + 1, subgroup)
    #
    #     # If the server
    #     if rank == 0:
    #         # Calculate the average of the parameters and adjust global model
    #         coor.average_parameters_weighted(model, gather_params, weights)
    #
    #     # Send the new model parameters to the workers
    #     coor.broadcast_parameters(model, group)


In [18]:
# why do I have to specify these?

data_loader = torch.utils.data.DataLoader(datasets.MNIST('../mnist_data', 
                                                          download=True, 
                                                          transform=transforms.Compose([
                                                              transforms.ToTensor(), # first, convert image to PyTorch tensor
                                                              transforms.Normalize((0.1307,), (0.3081,)) # normalize inputs
                                                          ])), 
                                           batch_size=10, 
                                           shuffle=True)
data = data_loader

model = Net()

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

optimizer = optim.SGD(model.parameters(), lr=0.1)

epoch = 10

# RPC_initialize_training(data)

RPC_train(data, model, device, optimizer, epoch)

  "The sample rate will be defined from ``batch_size`` and ``sample_size``."
  "Secure RNG turned off. This is perfectly fine for experimentation as it allows "


5.035912990570068


In [95]:
RPC_test(data, model, device)

  "The sample rate will be defined from ``batch_size`` and ``sample_size``."
  "Secure RNG turned off. This is perfectly fine for experimentation as it allows "


2.3067076303482055
