# Federated Learning

Federated learning in Pytorch without usig the PySyft framework

## Import Packages

In [1]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt

## Data Prep

Download the MNIST dataset

In [2]:
train_dataset = torchvision.datasets.MNIST(
    root='data/',
    train=True, 
    transform=transforms.ToTensor(), 
    download=True
)

In [3]:
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: data/
    Split: Train
    StandardTransform
Transform: ToTensor()

Split data ramdomly in 3 parts. One part each for each of the 2 nodes and retain 1 part for testing

In [4]:
node1_data,node2_data,server_data = torch.utils.data.random_split(train_dataset,[20000,20000,20000])

Sanity checks

In [5]:
len(node2_data)

20000

In [6]:
len(node1_data.indices)

20000

## Build

Build a simple ANN model

In [7]:
model = nn.Sequential(
    nn.Linear(784,128), #flatten 28X28 
    nn.ReLU(),
    nn.Linear(128,10) # 10 classes
)

Initialize remote models

In [8]:
node1_model=model
node2_model=model

Move models to GPU - if GPU is available

In [9]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)
model.to(device) # transfer the model parameters and data to the GPU (or RAM in case of CPU)
node1_model.to(device)
node2_model.to(device)

cuda:0


Sequential(
  (0): Linear(in_features=784, out_features=128, bias=True)
  (1): ReLU()
  (2): Linear(in_features=128, out_features=10, bias=True)
)

In [10]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss() 
optimizer = torch.optim.Adam(model.parameters())

## Training

Create dataloaders

In [11]:
batch_size=128
node1_loader = torch.utils.data.DataLoader(dataset=node1_data,
                                            batch_size=batch_size,
                                            shuffle=True)
node2_loader = torch.utils.data.DataLoader(dataset=node2_data,
                                            batch_size=batch_size,
                                            shuffle=True)
server_loader = torch.utils.data.DataLoader(dataset=server_data,
                                            batch_size=batch_size,
                                            shuffle=True)

In [12]:
def train(model,name,train_loader,test_loader):
    n_epochs = 10

    train_losses = np.zeros(n_epochs)
    test_losses = np.zeros(n_epochs)

    for it in range(n_epochs):
        train_loss = [] 
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device) # move data to GPU

            
            inputs = inputs.view(-1,784) 

            optimizer.zero_grad()

            
            outputs = model(inputs)
            loss = criterion(outputs,targets)

            
            loss.backward()
            optimizer.step()

            train_loss.append(loss.item()) 

        
        train_loss=np.mean(train_loss) # The mean can represent the loss for the epoch

        test_loss = []
        for inputs,targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device) 
            
            inputs = inputs.view(-1,784) 

            outputs = model(inputs)
            loss = criterion(outputs,targets)
            test_loss.append(loss.item()) 

        test_loss=np.mean(test_loss) # The mean can represent the loss for the epoch

        # save losses
        train_losses[it] = train_loss
        test_losses[it] = test_loss

        print(f'Epoch {it+1}/{n_epochs}, Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')
    
        
    torch.save(model.state_dict(), name)
        
    

**Remote Training**

Remote train 1

In [13]:
train(node1_model,"node1.pt",node1_loader,server_loader)

Epoch 1/10, Train Loss: 0.6819, Test Loss: 0.3422
Epoch 2/10, Train Loss: 0.2940, Test Loss: 0.2729
Epoch 3/10, Train Loss: 0.2362, Test Loss: 0.2377
Epoch 4/10, Train Loss: 0.1964, Test Loss: 0.2139
Epoch 5/10, Train Loss: 0.1658, Test Loss: 0.1906
Epoch 6/10, Train Loss: 0.1442, Test Loss: 0.1782
Epoch 7/10, Train Loss: 0.1234, Test Loss: 0.1679
Epoch 8/10, Train Loss: 0.1085, Test Loss: 0.1609
Epoch 9/10, Train Loss: 0.0967, Test Loss: 0.1518
Epoch 10/10, Train Loss: 0.0840, Test Loss: 0.1486


Remote Training 2

In [14]:
train(node2_model,"node2.pt",node1_loader,server_loader)

Epoch 1/10, Train Loss: 0.0738, Test Loss: 0.1477
Epoch 2/10, Train Loss: 0.0646, Test Loss: 0.1382
Epoch 3/10, Train Loss: 0.0565, Test Loss: 0.1391
Epoch 4/10, Train Loss: 0.0506, Test Loss: 0.1382
Epoch 5/10, Train Loss: 0.0445, Test Loss: 0.1322
Epoch 6/10, Train Loss: 0.0388, Test Loss: 0.1339
Epoch 7/10, Train Loss: 0.0334, Test Loss: 0.1333
Epoch 8/10, Train Loss: 0.0299, Test Loss: 0.1323
Epoch 9/10, Train Loss: 0.0250, Test Loss: 0.1385
Epoch 10/10, Train Loss: 0.0223, Test Loss: 0.1317


## Aggregate Models

Take the avarage of the layer weights and biases

In [15]:
state_1 = node1_model.state_dict()
state_2 = node2_model.state_dict()

for layer in state_1:
    state_1[layer] = (state_1[layer] + state_2[layer])/2

Initialize the server modesl and load it with the aggregated remote models

In [17]:
server_model = model 
server_model.load_state_dict(state_1)

<All keys matched successfully>

## Evaluate

In [20]:
def eval(model,test_loader):
        test_loss = []
        for inputs,targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device) # move data to GPU
        
            inputs = inputs.view(-1,784) 

            outputs = model(inputs)
            loss = criterion(outputs,targets)
            test_loss.append(loss.item()) # this will loss of the batch

        test_loss=np.mean(test_loss) # The mean can represent the loss for the epoch

        print(f'Test Loss: {test_loss:.4f}')
    

Evaluate the aggregated global model

In [21]:
eval(server_model,server_loader)

Test Loss: 0.1318


## Subsequent rounds 

Initialize remote models with the aggregated global model

In [22]:
node1_model=server_model
node2_model=server_model

Remote training 1

In [23]:
train(node1_model,"node1a.pt",node1_loader,server_loader)

Epoch 1/10, Train Loss: 0.0190, Test Loss: 0.1323
Epoch 2/10, Train Loss: 0.0162, Test Loss: 0.1328
Epoch 3/10, Train Loss: 0.0138, Test Loss: 0.1364
Epoch 4/10, Train Loss: 0.0125, Test Loss: 0.1350
Epoch 5/10, Train Loss: 0.0107, Test Loss: 0.1384
Epoch 6/10, Train Loss: 0.0097, Test Loss: 0.1378
Epoch 7/10, Train Loss: 0.0084, Test Loss: 0.1405
Epoch 8/10, Train Loss: 0.0078, Test Loss: 0.1438
Epoch 9/10, Train Loss: 0.0065, Test Loss: 0.1435
Epoch 10/10, Train Loss: 0.0057, Test Loss: 0.1434


Remote Training 2

In [24]:
train(node2_model,"node2a.pt",node2_loader,server_loader)

Epoch 1/10, Train Loss: 0.1349, Test Loss: 0.1222
Epoch 2/10, Train Loss: 0.0719, Test Loss: 0.1101
Epoch 3/10, Train Loss: 0.0489, Test Loss: 0.1071
Epoch 4/10, Train Loss: 0.0355, Test Loss: 0.1086
Epoch 5/10, Train Loss: 0.0288, Test Loss: 0.1083
Epoch 6/10, Train Loss: 0.0231, Test Loss: 0.1026
Epoch 7/10, Train Loss: 0.0174, Test Loss: 0.1043
Epoch 8/10, Train Loss: 0.0140, Test Loss: 0.1023
Epoch 9/10, Train Loss: 0.0117, Test Loss: 0.1061
Epoch 10/10, Train Loss: 0.0102, Test Loss: 0.1047


Aggregation

In [25]:
state_1 = node1_model.state_dict()
state_2 = node2_model.state_dict()

for layer in state_1:
    state_1[layer] = (state_1[layer] + state_2[layer])/2
    
server_model = model 
server_model.load_state_dict(state_1)

<All keys matched successfully>

Evaluate 

In [26]:
eval(server_model,server_loader)

Test Loss: 0.1046
