# Federated Learning Project
This notebook demonstrates how to set up and compare Federated Learning (FL) with Centralized Learning (CL) using the CIFAR-100 dataset and the modified version of the LeNet-5 model taken from [Hsu et al., Federated Visual Classification with Real-World Data Distribution, ECCV 2020].

## 1. Setup
We start by importing necessary libraries and setting global constants for the experiments.

In [1]:
import sys
import torch
import torch.nn as nn

from models.model import LeNet5 #import the model
import numpy as np
sys.path.append('../data/cifar100/')
from cifar100_loader import CIFAR100DataLoader

from utils.federated_utils import train_federated,plot_client_selection,test,plot_metrics,save_model,load_model

ModuleNotFoundError: No module named 'sklearn'

# Constants

In [None]:
# Constants for FL training
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

NUM_CLIENTS = 100  # Total number of clients in the federation
FRACTION_CLIENTS = 0.1  # Fraction of clients selected per round (C)
LOCAL_STEPS = 4  # Number of local steps (J)
GLOBAL_ROUNDS = 100  # Total number of communication rounds

BATCH_SIZE = 64  # Batch size for local training
LR = 1e-3  # Initial learning rate for local optimizers
MOMENTUM = 0.9  # Momentum for SGD optimizer
WEIGHT_DECAY = 0.0001  # Regularization term for local training

LOG_FREQUENCY = 10  # Frequency of logging training progress

## 2. Data Loading
We load the CIFAR-100 dataset and split it into training, validation, and test sets. This is done using the `data_loader.py` module.

In [None]:
#10% of the dataset kept for validation
data_loader = CIFAR100DataLoader(batch_size=32, validation_split=0.1, download=True, num_workers=4, pin_memory=True)
trainloader, validloader, testloader = data_loader.train_loader, data_loader.val_loader, data_loader.test_loader

print("Dimension of the training dataset:", len(trainloader.dataset))
print("Dimension of the validation dataset:", len(validloader.dataset))
print("Dimension of the test dataset:", len(testloader.dataset))

## 3. Federated Training
We simulate federated learning by splitting the dataset into shards and training with selected clients in each round.

### Initialize Model & Loss

In [4]:
global_model = LeNet5()
criterion = nn.NLLLoss()# our loss function for classification tasks on CIFAR-100

In [9]:
# Generate 3 values for the learning rate (lr) between 1e-4 and 1e-1 in log-uniform
lr_values = np.logspace(-4, -1, num=3)

# Generate 4 values for the weight decay (lr) between 1e-4 and 1e-1 in log-uniform
wd_values = np.logspace(-4, -1, num=3)

print("Learning Rate Values (log-uniform):", lr_values)
print("Weight Decay Values (log-uniform):", wd_values)

Learning Rate Values (log-uniform): [0.0001     0.00316228 0.1       ]
Weight Decay Values (log-uniform): [0.0001     0.00316228 0.1       ]


# Hyperparameter tuning for the first federated training baseline

In [None]:
lr = [0.05, 0.01, 0.005, 0.0001]
wd = [0.001, 0.0005, 0.0001]
rounds = 100 #fewer communication rounds for hyperparameter tuning
results = []
best_val_accuracy = 0
best_setting = None
for l in lr:
    for w in wd:
        print(f"Learning rate: {l}, Weight decay: {w}")
        global_model = LeNet5()
        #global_model,dataset, valid_dataset, num_clients,num_classes, rounds,lr,wd, C=0.1, local_steps=4,gamma=None
        global_model, val_accuracies, val_losses, train_accuracies, train_losses, client_selection_count = train_federated(global_model, criterion, trainloader, validloader, num_clients=NUM_CLIENTS, num_classes=100, rounds=GLOBAL_ROUNDS, lr=lr, momentum=MOMENTUM, batchsize=BATCH_SIZE, wd=wd, C=FRACTION_CLIENTS, local_steps=LOCAL_STEPS, log_freq=LOG_FREQUENCY, detailed_print=True)
        print(f"Validation accuracy: {val_accuracies[-1]} with lr: {l} and wd: {w}")
        max_val_accuracy = max(val_accuracies)
        if max_val_accuracy > best_val_accuracy:
            best_val_accuracy = max_val_accuracy
            best_setting = (l,w)
        results.append({
                'learning_rate': l,
                'weight_decay': w,
                'train_accuracies': train_accuracies,
                'train_losses': train_losses,
                'val_accuracies': val_accuracies,
                'val_losses': val_losses,
                'client_selection_count': client_selection_count
        })
print(f"Best setting: {best_setting} with validation accuracy: {best_val_accuracy}")
#filter the result with the best setting


# 2000 rounds using lr and wd found in the step before

In [None]:
#lr =
#wd = 

#just for now
lr = LR
wd = WEIGHT_DECAY                                                                                           

# Run Federated Learning
global_model, val_accuracies, val_losses, train_accuracies, train_losses, client_selection_count = train_federated(global_model, criterion, trainloader, validloader, num_clients=NUM_CLIENTS, num_classes=100, rounds=GLOBAL_ROUNDS, lr=lr, momentum=MOMENTUM, batchsize=BATCH_SIZE, wd=wd, C=FRACTION_CLIENTS, local_steps=LOCAL_STEPS, log_freq=LOG_FREQUENCY, detailed_print=True)

plot_client_selection(client_selection_count, "clientDistribution_iid")
test_accuracy = test(global_model, testloader)
print(f"Test accuracy: {test_accuracy}")
#Plot also training accuracy against validation accuracy and validation loss against training loss
plot_metrics(train_accuracies, val_accuracies, train_losses, val_losses, "iid")
#Save the model for the future
save_model(global_model, "iid")