## Importing libraries

In [None]:
# Standard library imports
import os
import sys
import json
import random
from collections import OrderedDict

# Third-party library imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import CosineAnnealingLR
from torch.utils.data import Dataset, DataLoader, Subset
import torchvision
import torchvision.transforms as transforms
from torchvision import datasets, models
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# Google Colab specific imports
from google.colab import drive

# Set the working directory
DIR_DATA = '/content/'
os.chdir(DIR_DATA)


# CIFAR100 Dataset

In [None]:
class CIFAR100Dataset(Dataset):
    def __init__(self, root, split='train', transform=None, sharding=None, K=10, Nc=2):
        """
        CIFAR-100 Dataset with IID and non-IID sharding.

        Args:
        - root (str): Directory to store the dataset.
        - split (str): 'train' or 'test'.
        - transform (callable): Transformations applied to the images.
        - sharding (str): 'iid' or 'niid'.
        - K (int): Number of clients for the sharding.
        - Nc (int): Number of classes per client (used for non-iid sharding).
        """
        self.root = root
        self.split = split
        self.transform = transform
        self.sharding = sharding
        self.K = K
        self.Nc = Nc

        # Default transformations if none are provided
        if self.transform is None:
            self.transform = transforms.Compose([
              transforms.Resize((224, 224)),  # Resize CIFAR-100 (32x32) to ResNet's input size (224x224)
              transforms.ToTensor(),
              transforms.Normalize(mean=[0.5071, 0.4865, 0.4409], std=[0.2673, 0.2564, 0.2762]),  # CIFAR-100 normalization
        ])

        dataset = datasets.CIFAR100(
            root=self.root,
            train=(self.split == 'train'),
            download=True
        )

        self.data = pd.DataFrame({
            "image": [dataset[i][0] for i in range(len(dataset))],
            "label": [dataset[i][1] for i in range(len(dataset))]
        })

        if self.split == 'train' and self.sharding:
            self.data = self._apply_sharding()

    def _apply_sharding(self):
        """Apply IID or non-IID sharding to the training data."""
        if self.sharding == 'iid':
            return self._iid_sharding()
        elif self.sharding == 'niid':
            return self._non_iid_sharding()
        else:
            raise ValueError("Sharding must be 'iid' or 'niid'.")

    def _iid_sharding(self):
        """Split data IID: uniformly distribute samples across K clients."""
        data_split = []
        indices = self.data.index.tolist()
        random.shuffle(indices)

        # Split indices equally among K clients
        client_indices = [indices[i::self.K] for i in range(self.K)]

        for client_id, idxs in enumerate(client_indices):
            client_data = self.data.loc[idxs].copy()
            client_data['client_id'] = client_id
            data_split.append(client_data)

        return pd.concat(data_split, ignore_index=True)

    def _non_iid_sharding(self):
        """Split data non-IID: assign Nc classes per client."""
        data_split = []
        unique_classes = self.data['label'].unique()
        random.shuffle(unique_classes)

        # Divide classes into groups of Nc
        class_groups = [unique_classes[i:i + self.Nc] for i in range(0, len(unique_classes), self.Nc)]
        class_groups = class_groups[:self.K]  # Limit to K clients

        for client_id, class_group in enumerate(class_groups):
            client_data = self.data[self.data['label'].isin(class_group)].copy()
            client_data['client_id'] = client_id

            # Ensure approximately equal samples per client
            client_data = client_data.sample(n=len(self.data) // self.K, replace=True, random_state=42)
            data_split.append(client_data)

        return pd.concat(data_split, ignore_index=True)

    def __getitem__(self, index):
        row = self.data.iloc[index]
        image, label = row['image'], row['label']

        if self.transform:
            image = self.transform(image)
        return image, label

    def __len__(self):
        return len(self.data)


In [None]:
train_transform = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5071, 0.4867, 0.4408],
        std=[0.2675, 0.2565, 0.2761]
    )
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.5071, 0.4867, 0.4408],
        std=[0.2675, 0.2565, 0.2761]
    )
])

# Centralized training of CIFAR100

In [None]:
def train_model(model, train_loader, test_loader, optimizer, scheduler, criterion, epochs):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)

    train_losses, test_losses, test_accuracies = [], [], []

    for epoch in range(epochs):
        model.train()
        epoch_loss = 0
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()

        # Step the scheduler
        if scheduler is not None:
          scheduler.step()

        # Evaluate on test set
        test_loss, test_accuracy = evaluate_model(model, test_loader, criterion, device)
        train_losses.append(epoch_loss / len(train_loader))
        test_losses.append(test_loss)
        test_accuracies.append(test_accuracy)

        print(f"Epoch {epoch+1}/{epochs}, Train Loss: {epoch_loss:.4f}, "
              f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")

    return train_losses, test_losses, test_accuracies

def evaluate_model(model, test_loader, criterion, device):
    model.eval()
    total_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            total_loss += loss.item()
            _, predicted = outputs.max(1)
            correct += (predicted == targets).sum().item()
            total += targets.size(0)

    return total_loss / len(test_loader), correct / total


In [None]:
BATCH_SIZE = 96
LEARNING_RATE = 0.01
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-3
EPOCHS = 50

train_dataset_big = CIFAR100Dataset(DIR_DATA, split='train')
test_dataset = CIFAR100Dataset(DIR_DATA, split='test')

indexes = range(0, len(train_dataset_big))
splitting = train_test_split(indexes, train_size = 0.8, random_state = 42, stratify = train_dataset_big.data["label"], shuffle = True)
train_indexes = splitting[0]
val_indexes = splitting[1]

train_dataset = Subset(train_dataset_big, train_indexes)
val_dataset = Subset(train_dataset_big, val_indexes)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True, num_workers=2, drop_last=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

model_cifar = models.resnet18(pretrained=True)

for param in model_cifar.parameters():
    param.requires_grad = False

model_cifar.fc = nn.Linear(model_cifar.fc.in_features, 100)

for param in model_cifar.fc.parameters():
    param.requires_grad = True

optimizer_cifar = optim.SGD(model_cifar.parameters(), lr=LEARNING_RATE, momentum=MOMENTUM, weight_decay=WEIGHT_DECAY)

scheduler_cifar = CosineAnnealingLR(optimizer_cifar, T_max=200)

criterion = nn.CrossEntropyLoss()


train_losses_cifar, test_losses_cifar, test_accuracies_cifar = train_model(
    model=model_cifar,
    train_loader = train_dataloader,
    test_loader = test_dataloader,
    optimizer=optimizer_cifar,
    scheduler=scheduler_cifar,
    criterion=criterion,
    epochs=50
)

plt.figure(figsize=(12, 5))

plt.subplot(1, 2, 1)
plt.plot(test_losses_cifar, label='CIFAR-100 Test Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(test_accuracies_cifar, label='CIFAR-100 Test Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()

##FL Baseline

In [None]:
def generate_skewed_probabilities(num_clients, gamma):
    """It generates skewed probabilities for clients using a Dirichlet distribution."""
    probabilities = np.random.dirichlet([gamma] * num_clients)
    return probabilities


class Client:

  def __init__(self, model, client_id, data, optimizer_params):
    self.client_id = client_id
    self.data = data
    self.model = model
    self.optimizer_params = optimizer_params

  def train(self, global_weights, epochs, batch_size):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.model.to(device)
    self.model.load_state_dict(global_weights)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(
        self.model.parameters(),
        lr=self.optimizer_params['lr'],
        momentum=self.optimizer_params['momentum'],
        weight_decay=self.optimizer_params['weight_decay']
        )
    trainloader = DataLoader(self.data, batch_size=batch_size, shuffle=True)
    for epoch in range(epochs):
      print(f"Client {self.client_id}, Epoch {epoch+1}/{epochs}")
      for batch_idx, (inputs, targets) in enumerate(trainloader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        outputs = self.model(inputs)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
    return self.model.state_dict()




In [None]:
class Server:

  def __init__(self, model, clients, test_data):
    self.model = model
    self.clients = clients
    self.test_data = test_data
    self.round_losses = []
    self.round_accuracies = []

  def federated_averaging(self, epochs, batch_size, num_rounds, fraction_fit, skewness = None):

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    self.model.to(device)
    for round in range(num_rounds):
      print(f"Round {round+1}/{num_rounds}")

      if skewness is not None:
        probabilities = generate_skewed_probabilities(len(self.clients), skewness)
        selected_clients = np.random.choice(self.clients, size=max(1, int(fraction_fit*len(self.clients))), replace=False, p=probabilities)

      else:
        selected_clients = np.random.choice(self.clients, size=max(1, int(fraction_fit*len(self.clients))), replace=False)

      global_weights = self.model.state_dict()

      # Simulating parallel clients training
      client_weights = {}
      for client in selected_clients:
        client_weights[client.client_id] = client.train(global_weights, epochs, batch_size)

      new_global_weights = {key: torch.zeros_like(value).type(torch.float32) for key, value in global_weights.items()}

      total_data_size = sum([len(client.data) for client in selected_clients])
      for client in selected_clients:
        scaling_factor = len(client.data) / total_data_size
        for key in new_global_weights.keys():
          new_global_weights[key] += scaling_factor * client_weights[client.client_id][key]

      # Update global model weights
      self.model.load_state_dict(new_global_weights)

      # Evaluate global model
      loss, accuracy = evaluate_model(self.model, DataLoader(self.test_data, batch_size=batch_size, shuffle=True), nn.CrossEntropyLoss(), device)
      self.round_losses.append(loss)
      self.round_accuracies.append(accuracy)
      print(f"Round {round+1}/{num_rounds} - Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")

    plt.figure(figsize=(12,5))
    plt.subplot(1, 2, 1)
    plt.plot(self.round_losses, label='CIFAR-100 Test Loss')
    plt.xlabel('Round')
    plt.ylabel('Loss')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(self.round_accuracies, label='CIFAR-100 Test Accuracy')
    plt.xlabel('Round')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.show()



In [None]:
K = 100
LOCAL_EPOCHS = 4 # J
ROUNDS = 2000
C = 0.1
BATCH_SIZE = 16
LR = 0.01
MOMENTUM = 0.9
WEIGHT_DECAY = 1e-3

optimizer_params = {
    "lr": LR,
    "momentum": MOMENTUM,
    "weight_decay": WEIGHT_DECAY
}

model_cifar = models.resnet18(pretrained=True)
model_cifar.fc = nn.Linear(model_cifar.fc.in_features, 100)

train_dataset = CIFAR100Dataset(DIR_DATA, split='train', sharding='iid', K=K)
test_dataset = CIFAR100Dataset(DIR_DATA, split='test')

clients = []
for i in range(K):
  client_data = Subset(train_dataset, train_dataset.data[train_dataset.data["client_id"] == i].index)
  clients.append(Client(model_cifar, i, client_data, optimizer_params))

server_uniform = Server(model_cifar, clients, test_dataset)

server_uniform.federated_averaging(epochs=LOCAL_EPOCHS, batch_size=BATCH_SIZE, num_rounds=ROUNDS, fraction_fit=C)

#server_skewed = Server(model_cifar, clients, test_dataset)

#server_skewed.federated_averaging(epochs=LOCAL_EPOCHS, batch_size=BATCH_SIZE, num_rounds=ROUNDS, fraction_fit=C, skewness=0.5)