# Federated learning: using a PyTorch model

This notebook is a copy of the notebook [Federated learning basic concepts](./federated_learning_basic_concepts.ipynb). The difference is that, here, the model is built using PyTorch. However, apart from that, the structure is identical so the text has been removed for clearness. Please refer to the original notebook for the detailed description of the experiment. 

## The data

In [None]:
import shfl

database = shfl.data_base.Emnist()
train_data, train_labels, test_data, test_labels = database.load_data()

print(len(train_data))
print(len(test_data))
print(type(train_data[0]))
train_data[0].shape

import matplotlib.pyplot as plt

plt.imshow(train_data[0])

iid_distribution = shfl.data_distribution.IidDataDistribution(database)
federated_data, test_data, test_label = iid_distribution.get_federated_data(num_nodes=20, percent=10)

print(type(federated_data))
print(federated_data.num_nodes())
federated_data[0].private_data

## The model

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score, f1_score

def accuracy(y_pred, y_true):
    """
    # Arguments:
        y_pred: Predictions with shape BxC (B: batch lenght; C: number of classes). Sum 1 for row.
        y_true: Labels for data with One Hot Encoded format
    """
    return accuracy_score(np.argmax(y_pred, -1), np.argmax(y_true, -1))

def f1(y_pred, y_true):
    """
    # Arguments:
        y_pred: Predictions with shape BxC (B: batch lenght; C: number of classes). Sum 1 for row.
        y_true: Labels for data with One Hot Encoded format
    """
    return f1_score(np.argmax(y_pred, -1), np.argmax(y_true, -1), average='macro')


class Flatten(nn.Module):
    def forward(self, input):
        return input.view(input.size(0), -1)
    

def model_builder():
    model = nn.Sequential(
        nn.Conv2d(1, 32, kernel_size=(3, 3), stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        nn.Dropout(.4),
        nn.Conv2d(32, 32, kernel_size=(3, 3), stride=1, padding=1),
        nn.ReLU(),
        nn.MaxPool2d(kernel_size=2, stride=2),
        nn.Dropout(.3),
        Flatten(),
        nn.Linear(1568, 128),
        nn.ReLU(),
        nn.Dropout(.1),
        nn.Linear(128, 64),
        nn.ReLU(),
        nn.Linear(64, 10),
        nn.Softmax(dim=1)

    )
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.RMSprop(model.parameters(), lr=0.001, eps=1e-07)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    return shfl.model.DeepLearningModelPyTorch(model=model, criterion=criterion, optimizer=optimizer,
                                               device=device, metrics={'accuracy':accuracy, 'f1':f1})

In [None]:
aggregator = shfl.federated_aggregator.FedAvgAggregator()
federated_government = shfl.federated_government.FederatedGovernment(model_builder, federated_data, aggregator)

In [None]:
import numpy as np

class Reshape(shfl.private.FederatedTransformation):

    def apply(self, labeled_data):
        labeled_data.data = np.reshape(labeled_data.data, (
        labeled_data.data.shape[0], 1, labeled_data.data.shape[1], labeled_data.data.shape[2]))

class Normalize(shfl.private.FederatedTransformation):

    def __init__(self, mean, std):
        self.__mean = mean
        self.__std = std

    def apply(self, labeled_data):
        labeled_data.data = (labeled_data.data - self.__mean) / self.__std
        
        
shfl.private.federated_operation.apply_federated_transformation(federated_data, Reshape())
mean = np.mean(train_data.data)
std = np.std(train_data.data)
shfl.private.federated_operation.apply_federated_transformation(federated_data, Normalize(mean, std))

## Run the federated learning experiment

In [None]:
test_data = np.reshape(test_data, (test_data.shape[0], 1, test_data.shape[1], test_data.shape[2]))
test_data = (test_data - mean) / std
federated_government.run_rounds(3, test_data, test_label)