# Import necessary libraries

In [1]:
# General
import os
import cv2
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import pickle
import time
import copy
import pandas as pd


# Pytorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from torchvision import transforms, datasets


# PySyft
import syft as sy
from syft.frameworks.torch.fl import utils
from syft.workers.websocket_client import WebsocketClientWorker

# Pre-processing the Data

In [2]:
# Set the image size Y where Y represents YxY 
IMG_SIZE = 50
BATCH_SIZE = 100
LR = 0.001

In [3]:
train = datasets.MNIST(r"/media/wilfredo/Willie931GB/EURECOM_SLU_Linux/II_SEMESTER/SLU/PAPER_KDD2022/EXPERIMENTS/PySyft/Datasets/MNIST", 
                      train = True, download = True, 
                      transform = transforms.Compose([transforms.Resize(IMG_SIZE),
                                                      transforms.ToTensor()]))

test = datasets.MNIST(r"/media/wilfredo/Willie931GB/EURECOM_SLU_Linux/II_SEMESTER/SLU/PAPER_KDD2022/EXPERIMENTS/PySyft/Datasets/MNIST", 
                      train = False, download = True, 
                      transform = transforms.Compose([transforms.Resize(IMG_SIZE),
                                                      transforms.ToTensor()]))

In [4]:
# Load the data from the file it was saved in. Take the ENTIRE dataset!
training_data = torch.utils.data.DataLoader(train, batch_size = int(len(train)/2), shuffle = True)
test_data = torch.utils.data.DataLoader(test, batch_size = int(len(test)/2), shuffle = True)

In [5]:
# # If this line works, then you loaded the data successfully!
# print(next(iter(training_data))[1])

# Define your custom CNN
Ours is baed on VGG-11. Source: Page 3/14, Table 1, Configuration A, https://arxiv.org/pdf/1409.1556.pdf

## NN Mini

In [6]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        IMG_SIZE = 50
        # Define your first convolutional layer: input = 1, output = 32 convolutional features, kernel size = 5
        # Remember that kernel = 5 means that the "window" used to scan for features will be 5x5
        self.conv1 = nn.Conv2d(1, 16, 5)
        self.conv2 = nn.Conv2d(16, 32, 5)
        self.conv3 = nn.Conv2d(32, 64, 5)
#         self.conv4 = nn.Conv2d(128, 256, 3)
        
        
        # Now we need to get the output of the convolution processed to get it into a fully connected layer
        # To know what dimensions to use for our fully connected layers, the only known way in PyTorch is to 
        # pass some data through the convolutional layers, and then examine their output. Then we manually 
        # input the dimensions of this output into the next fully connected layer and voila.
        
        # Create random dummy data, run the convolution layers, check the size of the output of conv3, and then 
        # give that dimension info to the fully connected layers. This will only be done ONCE
        x = torch.randn(IMG_SIZE,IMG_SIZE).view(-1, 1, IMG_SIZE, IMG_SIZE)
        self._to_linear = None
        self.convs(x)
        
        # Run the fully connected layers
#         self.fc1 = nn.Linear(self._to_linear, 512)
#         self.fc2 = nn.Linear(512, 2)
        self.fc1 = nn.Linear(self._to_linear, 32)
        self.fc2 = nn.Linear(32, 2)
#         self.fc3 = nn.Linear(256, 2)

    # Function defining only one part of the forward pass (the convolution layers only). This will also write
    # the output dimensions of the conv layers to self._to_linear ONCE, and this information will then be used 
    # as the input data flattened dimensions of the next fully connected layers 
    def convs(self, x):
        # Convolutional layer 1 + activation + max_pooling
        x = self.conv1(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
        x = self.conv3(x)
        x = F.relu(x)
        x = F.max_pool2d(x, (2, 2))
#         x = self.conv4(x)
#         x = F.relu(x)
#         x = F.max_pool2d(x, (2, 2))
#         x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
#         x = F.max_pool2d(F.relu(self.conv2(x)), (2, 2))
#         x = F.max_pool2d(F.relu(self.conv3(x)), (2, 2))
        
#         print("x[0].shape = ", x[0].shape)
        if self._to_linear is None:
            self._to_linear = x[0].shape[0] * x[0].shape[1] * x[0].shape[2]
        return x
    
    # Function defining the rest of the forward pass
    def forward(self, x):
        # Run the convs layers first
        x = self.convs(x)
        # Reshape the output data from the convs to be flattened
        x = x.view(-1, self._to_linear)
        # Pass the data through the fully connected layers now
        x = F.relu(self.fc1(x))
#         x = F.relu(self.fc2(x))
        # Pass it through the final layer
        x = self.fc2(x)
        # One final softmax function to make the output vector look nicer
        x = F.softmax(x, dim = 1)
        return x

net = Net()

In [7]:
# Take a look at our model
model = net
model

Net(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=256, out_features=32, bias=True)
  (fc2): Linear(in_features=32, out_features=2, bias=True)
)

# Establish your loss function

In [8]:
# Set your loss function (MSE for images!)
loss_function = nn.MSELoss()

# Separate your data into data, labels, training, testing, and scale it

In [9]:
# ########################## TRAIN DATA ##########################
# # Separate the x's and the y's
# X = torch.Tensor([i[0] for i in X_no_img_size]).view(-1, 1, IMG_SIZE, IMG_SIZE)

# # Scale the images. The pixel values are between 0-255, but we want them to be between 0-1
# X = X/255.0

# # Get your y's
# y = torch.Tensor([i[1] for i in training_data])

# ########################## TEST DATA ##########################
# # Separate the x's and the y's
# X_test = torch.Tensor([i[0] for i in test_data]).view(-1, 1, IMG_SIZE, IMG_SIZE)

# # Scale the images. The pixel values are between 0-255, but we want them to be between 0-1
# X_test = X_test/255.0

# # Get your y's
# y_test = torch.Tensor([i[1] for i in test_data])

In [10]:
X = next(iter(training_data))[0]
y_unformatted = next(iter(training_data))[1].type(torch.FloatTensor)
X_test = next(iter(test_data))[0]
y_test_unformatted = next(iter(test_data))[1].type(torch.FloatTensor)

# The two other cases in this paper use 2 dimensional labels (0, 1), not only (0)
# MNIST by default comes with labels in the format (9) instead of (9, 0). To change this:
# Create tensors with all zeros of the same size
y_unformatted_addition = torch.zeros(y_unformatted.size())
y_test_unformatted_addition = torch.zeros(y_test_unformatted.size())
# Then stack them together (0 for vertically, -1 for horizontally)
y = torch.stack((y_unformatted, y_unformatted_addition), -1)
y_test = torch.stack((y_test_unformatted, y_test_unformatted_addition), -1)


In [11]:
# # Check the image size of the MNIST dataset samples to then modify the variable IMG_SIZE
# y_test.size()
# max(y_test)
# # for tensor in y_test:
# #     tensor = [tensor, 0]
# # y_test[0]
# # y_test.size()
# # print(y)
# # print(X[0][0].shape)

In [12]:
# y_test_addition = torch.zeros(y_test.size()).type(torch.LongTensor)


# # y_test_addition[0].type()    
# y_test_2 = torch.stack((y_test, y_test_addition), -1)
# y_test_2


In [13]:
# ########################## TRAIN DATA ##########################
# # Separate the x's and the y's
# X = torch.Tensor([i for i in X]).view(-1, 1, IMG_SIZE, IMG_SIZE)

# # Scale the images. The pixel values are between 0-255, but we want them to be between 0-1
# X = X/255.0

# # Get your y's
# y = torch.Tensor([i[1] for i in training_data])

# ########################## TEST DATA ##########################
# # Separate the x's and the y's
# X_test = torch.Tensor([i[0] for i in test_data]).view(-1, 1, IMG_SIZE, IMG_SIZE)

# # Scale the images. The pixel values are between 0-255, but we want them to be between 0-1
# # X_test = X_test/255.0

# # Get your y's
# y_test = torch.Tensor([i[1] for i in test_data])

In [14]:
# len(y_test)

In [15]:
# Set your validation data percentage
# VAL_PCT = 0.1
# val_size = int(len(X)*VAL_PCT)

In [16]:
# Define your training data
# train_X = X[:-val_size]
# train_y = y[:-val_size]
train_X = X
train_y = y

# Define your testing (validation) data
# test_X = X[-val_size:]
# test_y = y[-val_size:]
test_X = X_test
test_y = y_test

# Federated Learning

## Establish the virtual workers, their data, their NNs, and their optimizers

In [17]:
# Start the hook
hook = sy.TorchHook(torch)

# Create your virtual workers
worker1 = sy.VirtualWorker(hook, id="worker1")
worker2 = sy.VirtualWorker(hook, id="worker2")
worker3 = sy.VirtualWorker(hook, id="worker3")

compute_nodes = [worker1, worker2, worker3]

In [18]:
train_X_worker1 = train_X[:int(len(train_X)/len(compute_nodes))].view(-1, 1, IMG_SIZE, IMG_SIZE)
train_X_worker2 = train_X[int(len(train_X)/len(compute_nodes)):int(2 * len(train_X)/len(compute_nodes))].view(-1, 1, IMG_SIZE, IMG_SIZE)
train_X_worker3 = train_X[int(2 * len(train_X)/len(compute_nodes)):int(3 * len(train_X)/len(compute_nodes))].view(-1, 1, IMG_SIZE, IMG_SIZE)

train_y_worker1 = train_y[:int(len(train_X)/len(compute_nodes))]
train_y_worker2 = train_y[int(len(train_X)/len(compute_nodes)):int(2 * len(train_X)/len(compute_nodes))]
train_y_worker3 = train_y[int(2 * len(train_X)/len(compute_nodes)):int(3 * len(train_X)/len(compute_nodes))]

In [19]:
# Clear the workers of any objects, just in case you forgot some were still there from a previous run
worker1.clear_objects()
worker2.clear_objects()
worker3.clear_objects()

<VirtualWorker id:worker3 #objects:0>

In [20]:
# # Establish the NN model for each worker. This is model-centric FL, so it is the same model for all workers
worker1_model = model.copy()
worker2_model = model.copy()
worker3_model = model.copy()

# # Establish the optimizer for each worker
worker1_optimizer = optim.SGD(worker1_model.parameters(), lr=LR)
worker2_optimizer = optim.SGD(worker2_model.parameters(), lr=LR)
worker3_optimizer = optim.SGD(worker3_model.parameters(), lr=LR)

In [21]:
# # Organize the models and optimizers into lists
models = [worker1_model, worker2_model, worker3_model]
optimizers = [worker1_optimizer, worker2_optimizer, worker3_optimizer]

## Functions used for Training

In [22]:
def train():
    batch_count = 0
    total_epoch_time = 0
    batch_times = []
    for i in tqdm(range(0, int(len(train_X)/len(compute_nodes)), BATCH_SIZE)):
        
        # Send the models to the worker. This step is not necessary in real life, as this is only done once. 
        # However, because the .rm_obj() method is not working to remove the batches at the end of each loop,
        # we are forced to remove ALL objects from each worker, including the models, so the batches don't
        # constantly add up, consuming more memory each time!
        worker1_model.send(worker1)
        worker2_model.send(worker2)
        worker3_model.send(worker3)
        
        # Get the batches for each of the workers
        batch_X_1 = train_X_worker1[i : i + BATCH_SIZE]
        batch_y_1 = train_y_worker1[i : i + BATCH_SIZE]
        
        batch_X_2 = train_X_worker2[i : i + BATCH_SIZE]
        batch_y_2 = train_y_worker2[i : i + BATCH_SIZE]
        
        batch_X_3 = train_X_worker3[i : i + BATCH_SIZE]
        batch_y_3 = train_y_worker3[i : i + BATCH_SIZE]
#         print("Got the batches for all workers (not sent yet)")
        
        # Send the data and labels to each of the workers. This does not have to be done in real life, but 
        # must be done in this simulation
        batch_X_1 = batch_X_1.send(worker1)
        batch_y_1 = batch_y_1.send(worker1)
        
        batch_X_2 = batch_X_2.send(worker2)
        batch_y_2 = batch_y_2.send(worker2)
        
        batch_X_3 = batch_X_3.send(worker3)
        batch_y_3 = batch_y_3.send(worker3)
#         print("Sent the batches for all workers")
        
        # Train the models
        # The following in real life would be done in parallel
        FP_start_time = time.time()
        pred1 = worker1_model(batch_X_1)
        pred2 = worker2_model(batch_X_2)
        pred3 = worker3_model(batch_X_3)
        FP_end_time = (time.time() - FP_start_time)/len(compute_nodes)
#         print("FP time = ", round(FP_end_time, 4), " s")
#         print("Trained the models for all workers")
        
        # Calculate the loss functions
        BP_start_time = time.time()
        loss1 = loss_function(pred1, batch_y_1)
        loss2 = loss_function(pred2, batch_y_2)
        loss3 = loss_function(pred3, batch_y_3)
#         print("Calculates the loss function for all workers")
        
        # Do the backwards pass
        loss1.backward()
        loss2.backward()
        loss3.backward()
#         print("Did the backwards propagation on all workers")
        
        # Update the weights
        worker1_optimizer.step()
        worker2_optimizer.step()
        worker3_optimizer.step()
        BP_end_time = (time.time() - BP_start_time)/len(compute_nodes)
#         print("BP time = ", round(BP_end_time, 4), " s")
        
        total_batch_time = FP_end_time + BP_end_time
        batch_times.append(total_batch_time)
        total_epoch_time += total_batch_time
#         print("Total time for this batch = ", round(total_batch_time, 4), " s \n \n")
#         print("Updated all the weights for all workers")
            
        # Save the total training time for this batch
    
        # Get the models back from the workers
        worker1_model.get()
        worker2_model.get()
        worker3_model.get()
        
        # Clear all batches from worker before next batch comes
        worker1.clear_objects()
        worker2.clear_objects()
        worker3.clear_objects()
#         print("Successfully finished training of all workers for batch ", counter)
#         batch_count += 1
#         if batch_count == 25:
#             break
        
    # OUTSIDE THE FOR LOOP: After all workers are done training...
    # Calculate the federated average and save the new model in "fed_avg"
    fed_start_time = time.time()
    fed_avg = utils.federated_avg({
        "worker1": worker1_model,
        "worker2": worker2_model,
        "worker3": worker3_model
    })
    fed_end_time = time.time() - fed_start_time
    total_epoch_time += fed_end_time
    print("TOTAL TIME FOR THIS EPOCH: ", round(total_epoch_time, 4), " s \n")
    
    # Return the new fed_avg
#     print("Obtained the federated model!")
    return fed_avg, batch_times, total_epoch_time

## Function used for testing

In [23]:
def test(federated_model):
    
    # Calculate the accuracy
    correct = 0
    total = 0

    # Do not update your gradients while testing
    with torch.no_grad():
        print("Initiated federated model testing:")
        for i in tqdm(range(len(test_X))):
            
            # Put the model into evaluation mode so it does not update its gradients during this test
            federated_model.eval()

            # Obtain the real class for the sample
            real_class = torch.argmax(test_y[i])

            # Obtain our prediction for said sample (not arg_maxed yet)
            output = federated_model(test_X[i].view(-1, 1, IMG_SIZE, IMG_SIZE))[0]

            # Obtain our arg_maxed prediction for said sample
            predicted_class = torch.argmax(output)

            # Update counters
            if predicted_class == real_class:
                correct += 1
            total += 1

    print("Accuracy of the federated model = ", round(correct/total, 3), "\n \n")

# RUN THE MODEL

In [24]:
# Define your number of epochs
epochs = 5
epoch_times = []

# Train all workers for the set number of epochs
for epoch in range(epochs):
    
    # Start counting the time for this epoch
#     epoch_start_time = time.time()
#     print(f"Epoch Number {epoch + 1}")
    
    # Train the individual models, and then obtain the federated averaged model
#     train_start_time = time.time()
    federated_model, batch_times, epoch_time = train()
    
    # Save your current epoch time
    epoch_times.append(epoch_time)
    
#     train_total_time = time.time() - train_start_time
#     print("Total TRAIN time for epoch ", epoch, " = ", 
#           round(train_total_time/(len(compute_nodes) * 60), 2), " min")
#     print("Finished training the federated model!")
    
    # The model to be send and then trained on all workers from now on is the new federated model
#     for model in models:
#         model = federated_model.copy()
    global worker1_model
    worker1_model = federated_model.copy()
    global worker2_model
    worker2_model = federated_model.copy()
    global worker3_model
    worker3_model = federated_model.copy()
#     print("Replaced original models with the new federated model!")
    
    # Stop counting the time
#     epoch_total_time = time.time() - epoch_start_time
#     print('Time for this epoch', round(epoch_total_time/60, 2), 'min')
    
    # Test your new model to keep a log of how good we're doing per epoch 
    test(federated_model)
#     print("Successfully tested the federated model!")
    
    # Save your batch times for this epoch
    df_batch = pd.DataFrame(batch_times)
    df_batch.to_csv("./Batch_times/batch_times_FL_MINI_epoch_" + str(epoch) + ".csv")

# OUTSIDE THE FOR LOOP
# Save all your epoch times into a CSV
df_epoch = pd.DataFrame(epoch_times)
df_epoch.to_csv("./Epoch_times/epoch_times_FL_MINI.csv")

# Clean the global namespace after run is done
%reset -f

100%|██████████| 100/100 [09:51<00:00,  5.91s/it]


TOTAL TIME FOR THIS EPOCH:  13.6087  s 

Initiated federated model testing:


100%|██████████| 5000/5000 [00:07<00:00, 712.43it/s]


Accuracy of the federated model =  0.898 
 



100%|██████████| 100/100 [08:28<00:00,  5.08s/it]


TOTAL TIME FOR THIS EPOCH:  10.6437  s 

Initiated federated model testing:


100%|██████████| 5000/5000 [00:07<00:00, 700.44it/s]


Accuracy of the federated model =  0.898 
 



 26%|██▌       | 26/100 [02:11<06:15,  5.07s/it]


KeyboardInterrupt: 