## Importing Libraries

In [1]:
# Standard library imports
import os
import sys
import json
import random
from collections import OrderedDict

# Third-party library imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Google Colab specific imports
from google.colab import drive

# Set the working directory
DIR_DATA = '/content/'
os.chdir(DIR_DATA)


## Checkpoints

In [3]:
DIR_DATA = "./data"
CHECKPOINT_DIR = './checkpoints/'

os.makedirs(CHECKPOINT_DIR, exist_ok=True)

def save_checkpoint(model, optimizer, epoch, hyperparameters, subfolder=""):
    """Salva il checkpoint del modello e rimuove quello precedente."""
    subfolder_path = os.path.join(CHECKPOINT_DIR, subfolder)
    os.makedirs(subfolder_path, exist_ok=True)

    # File corrente e precedente
    filename = f"model_epoch_{epoch}_params_{hyperparameters}.pth"
    filepath = os.path.join(subfolder_path, filename)

    previous_filename = f"model_epoch_{epoch -1}_params_{hyperparameters}.pth"
    previous_filepath = os.path.join(subfolder_path, previous_filename)

    # Rimuove il checkpoint precedente
    if epoch > 1 and os.path.exists(previous_filepath):
        os.remove(previous_filepath)

    # Salva il nuovo checkpoint
    if optimizer is not None:
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),  # Salvataggio dello stato dell'ottimizzatore
            'epoch': epoch
        }, filepath)
    else:
        torch.save({
            'model_state_dict': model.state_dict(),
            'epoch': epoch
        }, filepath)
    print(f"Checkpoint salvato: {filepath}")


def load_checkpoint(model, optimizer, hyperparameters, subfolder=""):
    """Carica l'ultimo checkpoint disponibile basato sugli iperparametri."""
    subfolder_path = os.path.join(CHECKPOINT_DIR, subfolder)
    if not os.path.exists(subfolder_path):
        print("No checkpoint found, Starting now...")
        return 1  # Le epoche iniziano da 1

    # Cerca i file con gli iperparametri specificati
    files = [f for f in os.listdir(subfolder_path) if f"params_{hyperparameters}" in f]
    if files:
        # Trova il file con l'epoca più alta
        latest_file = max(files, key=lambda x: int(x.split('_')[2]))
        filepath = os.path.join(subfolder_path, latest_file)
        checkpoint = torch.load(filepath)

        model.load_state_dict(checkpoint['model_state_dict'])
        if optimizer is not None:
            optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
        print(f"Checkpoint found: Resume epoch {checkpoint['epoch'] + 1}")
        return checkpoint['epoch'] + 1

    print("No checkpoint found, Starting now...")
    return 1  # Le epoche iniziano da 1

## Shakespeare Dataset

In [58]:
class ShakespeareDataset(Dataset):
    def __init__(self, root, split, preprocess_params=None):
        """
        Args:
            root (str): Path to the dataset directory.
            split (str): Dataset split, either 'train' or 'test'.
            preprocess_params (dict, optional): Parameters for running preprocess.sh script. Keys include:
                - sharding (str): 'iid' or 'niid' for data partitioning.
                - iu (float): Fraction of users if i.i.d. sampling.
                - sf (float): Fraction of data to sample.
                - k (int): Minimum number of samples per user.
                - t (str): 'user' or 'sample' for train-test partition.
                - tf (float): Fraction of data in training set.
                - raw (bool): Include raw text data.
                - smplseed (int): Seed for sampling.
                - spltseed (int): Seed for splitting.
        """
        self.root = root
        self.split = split
        self.preprocess_params = preprocess_params or {}

        # Ensure the working directory is set to the dataset folder
        os.chdir(self.root)

        # Run preprocessing script if needed
        self._preprocess_data()

        # Load the dataset
        self.data = self._load_data()

    def _preprocess_data(self):
        """Runs preprocess.sh with the given parameters."""
        cmd = "bash preprocess.sh"

        if 'sharding' in self.preprocess_params:
            cmd += f" -s {self.preprocess_params['sharding']}"
        if 'iu' in self.preprocess_params:
            cmd += f" --iu {self.preprocess_params['iu']}"
        if 'sf' in self.preprocess_params:
            cmd += f" --sf {self.preprocess_params['sf']}"
        if 'k' in self.preprocess_params:
            cmd += f" -k {self.preprocess_params['k']}"
        if 't' in self.preprocess_params:
            cmd += f" -t {self.preprocess_params['t']}"
        if 'tf' in self.preprocess_params:
            cmd += f" --tf {self.preprocess_params['tf']}"
        if 'raw' in self.preprocess_params and self.preprocess_params['raw']:
            cmd += f" --raw"
        if 'smplseed' in self.preprocess_params:
            cmd += f" --smplseed {self.preprocess_params['smplseed']}"
        if 'spltseed' in self.preprocess_params:
            cmd += f" --spltseed {self.preprocess_params['spltseed']}"

        print(f"Running command: {cmd}")
        os.system(cmd)
        os.chdir(DIR_DATA)


    def _load_data(self):
        """Loads data from the JSON files into a pandas DataFrame."""
        file_path = os.path.join(self.root, "data/all_data/all_data.json")

        with open(file_path, 'r') as f:
            data = json.load(f)

        # Convert JSON structure to a pandas DataFrame
        records = []
        for user, user_data in data['user_data'].items():
            for x, y in zip(user_data['x'], user_data['y']):
                records.append({
                    'user': user,
                    'input': x,
                    'target': y
                })

        return pd.DataFrame(records)

    def get_dataframe(self):
        """Returns the dataset as a pandas DataFrame."""
        return self.data

    def __len__(self):
        """Returns the number of samples in the dataset."""
        return len(self.data)

    def __getitem__(self, idx):
        """Retrieves a single sample by index."""
        return self.data.iloc[idx]

In [60]:
if not os.path.isdir('./leaf'):
  !git clone https://github.com/maxfra01/leaf.git
os.chdir("/content/")
preprocess_params = {
        'sharding': 'niid',
        'sf': 0.2,
        'k': 0,
        't': 'sample',
        'tf': 0.8,
        'raw': True
    }

dataset = ShakespeareDataset(root="leaf/data/shakespeare/", split="train", preprocess_params=preprocess_params)
print(dataset.get_dataframe().head())


Running command: bash preprocess.sh -s niid --sf 0.2 -k 0 -t sample --tf 0.8 --raw


FileNotFoundError: [Errno 2] No such file or directory: 'leaf/data/shakespeare/data/all_data/all_data.json'

## Shakespeare Model Architecture

In [2]:
class ShakespeareRNN(nn.Module):
    def __init__(self, vocab_size, embed_dim, hidden_size, num_layers):
        super(ShakespeareRNN, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.lstm = nn.LSTM(embed_dim, hidden_size, num_layers, batch_first=True, dropout=0.2)
        self.fc = nn.Linear(hidden_size, vocab_size)

    def forward(self, x, hidden):
        x = self.embedding(x)  # Embed input
        out, hidden = self.lstm(x, hidden)  # Pass through LSTM layers
        out = self.fc(out)  # Fully connected layer for output
        return out, hidden

## Centralized training functions

In [7]:
def train_model(model, train_loader, test_loader, optimizer, scheduler, criterion, epochs, hyperparameters):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    # Carica checkpoint se esiste
    start_epoch = load_checkpoint(model, optimizer, hyperparameters,"Centralized/")

    train_losses, test_losses, test_accuracies = [], [], []

    for epoch in range(start_epoch, epochs):
        model.train()
        epoch_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        # Step the scheduler
        scheduler.step()

        # Salva checkpoint
        save_checkpoint(model, optimizer, epoch, hyperparameters,"Centralized/")

        # Evaluate on test set
        test_loss, test_accuracy = evaluate_model(model, test_loader, criterion, device)
        train_losses.append(epoch_loss / len(train_loader))
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {epoch_loss:.4f}, "
              f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

    return train_losses, test_losses, test_accuracies

def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += (predicted == targets).sum().item()
            total += targets.size(0)

    return total_loss / len(test_loader), correct / total


In [6]:
## Train the centralized model

## Federate Learning classes

In [8]:
def generate_skewed_probabilities(num_clients, gamma):
    """It generates skewed probabilities for clients using a Dirichlet distribution."""
    probabilities = np.random.dirichlet([gamma] * num_clients)
    return probabilities


class Client:

  def __init__(self, model, client_id, data, optimizer_params):
    self.client_id = client_id
    self.data = data
    self.model = model
    self.optimizer_params = optimizer_params

  def train(self, global_weights, epochs, batch_size):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.model.to(device)
    self.model.load_state_dict(global_weights)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(
        self.model.parameters(),
        lr=self.optimizer_params['lr'],
        momentum=self.optimizer_params['momentum'],
        weight_decay=self.optimizer_params['weight_decay']
        )
    trainloader = DataLoader(self.data, batch_size=batch_size, shuffle=True)
    for epoch in range(epochs):
      #print(f"Client {self.client_id}, Epoch {epoch+1}/{epochs}")
      for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = self.model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    return self.model.state_dict()



class Server:
  def __init__(self, model, clients, test_data):
    self.model = model
    self.clients = clients
    self.test_data = test_data
    self.round_losses = []
    self.round_accuracies = []

  def federated_averaging(self, epochs, batch_size, num_rounds, fraction_fit, skewness=None, hyperparameters = None, fedOptimizer=None):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.model.to(device)

    # Carica il checkpoint se esiste
    if skewness is  None:
      start_epoch = load_checkpoint(self.model,optimizer=None,hyperparameters=hyperparameters, subfolder="Federated_Uniform/")
    else:
      start_epoch = load_checkpoint(self.model,optimizer=None,hyperparameters=hyperparameters, subfolder="Federated_Skewed/")


    # Initialize variables for FedOptimizers
    if fedOptimizer in {"FedAdaGrad", "FedYogi", "FedAdam"}:
        optimizer_state = {
            "m": {key: torch.zeros_like(value, dtype=torch.float32) for key, value in self.model.state_dict().items()},
            "v": {key: torch.zeros_like(value, dtype=torch.float32) for key, value in self.model.state_dict().items()},
        }
        beta1 = 0.9  # Momentum parameter for Adam-based optimizers
        beta2 = 0.999  # RMS parameter for Adam-based optimizers
        lr = 0.01  # Learning rate
        eps = 1e-8  # Small constant to prevent division by zero

    for round in range(start_epoch, num_rounds):
        print(f"Round {round + 1}/{num_rounds}")

        if skewness is not None:
            probabilities = generate_skewed_probabilities(len(self.clients), skewness)
            selected_clients = np.random.choice(self.clients, size=max(1, int(fraction_fit * len(self.clients))),
                                                replace=False, p=probabilities)
        else:
            selected_clients = np.random.choice(self.clients, size=max(1, int(fraction_fit * len(self.clients))),
                                                replace=False)

        global_weights = self.model.state_dict()

        # Simulate parallel client training
        client_weights = {}
        for client in selected_clients:
            client_weights[client.client_id] = client.train(global_weights, epochs, batch_size)

        # Aggregate client updates
        total_data_size = sum([len(client.data) for client in selected_clients])
        aggregated_updates = {key: torch.zeros_like(value, dtype=torch.float32) for key, value in global_weights.items()}

        for client in selected_clients:
            scaling_factor = len(client.data) / total_data_size
            for key in aggregated_updates.keys():
                aggregated_updates[key] += scaling_factor * (client_weights[client.client_id][key] - global_weights[key])

        # Apply selected FedOptimizer
        if fedOptimizer == "FedAdaGrad":
            for key in global_weights.keys():
                optimizer_state["v"][key] += aggregated_updates[key] ** 2
                global_weights[key] += lr * aggregated_updates[key] / (torch.sqrt(optimizer_state["v"][key]) + eps)

        elif fedOptimizer == "FedYogi":
            for key in global_weights.keys():
                optimizer_state["v"][key] -= (1 - beta2) * aggregated_updates[key] ** 2 * torch.sign(
                    optimizer_state["v"][key] - aggregated_updates[key] ** 2)
                global_weights[key] += lr * aggregated_updates[key] / (torch.sqrt(optimizer_state["v"][key]) + eps)

        elif fedOptimizer == "FedAdam":
            for key in global_weights.keys():
                optimizer_state["m"][key] = beta1 * optimizer_state["m"][key] + (1 - beta1) * aggregated_updates[key]
                optimizer_state["v"][key] = beta2 * optimizer_state["v"][key] + (1 - beta2) * aggregated_updates[key] ** 2
                m_hat = optimizer_state["m"][key] / (1 - beta1 ** (round + 1))
                v_hat = optimizer_state["v"][key] / (1 - beta2 ** (round + 1))
                global_weights[key] += lr * m_hat / (torch.sqrt(v_hat) + eps)

        else:  # Default to FedAvg
            for key in global_weights.keys():
                global_weights[key] += aggregated_updates[key]

        # Update global model weights
        self.model.load_state_dict(global_weights)

        if skewness is  None:
          save_checkpoint(self.model, None, round , hyperparameters, "Federated_Uniform/")
        else:
          save_checkpoint(self.model, None, round , hyperparameters, "Federated_Skewed/")


        # Evaluate global model
        loss, accuracy = evaluate_model(self.model, DataLoader(self.test_data, batch_size=batch_size, shuffle=True),
                                        nn.CrossEntropyLoss(), device)
        self.round_losses.append(loss)
        self.round_accuracies.append(accuracy)
        print(f"Round {round + 1}/{num_rounds} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

    # Plot results
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(self.round_losses, label='Test Loss')
    plt.xlabel('Round')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(self.round_accuracies, label='Test Accuracy')
    plt.xlabel('Round')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.show()

## Solver