In [None]:
import torch
import numpy as np
import pandas as pd
from torchvision import datasets, transforms
import sklearn
from torch.utils.data import DataLoader, random_split, ConcatDataset, Subset
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import os
from google.colab import drive
import concurrent.futures
import time
import random
import csv

In [None]:
# Mount Google Drive for persistent storage
drive.mount('/content/drive')

In [None]:
NUM_NODES = 4
NUM_GLOBAL_EPOCHS = 5
NUM_LOCAL_EPOCHS = 5
train_size = 0.9
test_size = 0.1
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import pandas as pd

# Set random seed for reproducibility
torch.manual_seed(42)

# Define a simple neural network model
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(28 * 28, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = x.view(-1, 28 * 28)  # Flatten the input
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Client class for federated learning
class Client:
    def __init__(self, data):
        self.data = data
        self.model = SimpleNN()

    def train(self, num_epochs=2):
        data_loader = DataLoader(self.data, batch_size=32, shuffle=True)
        optimizer = optim.SGD(self.model.parameters(), lr=0.01)
        criterion = nn.CrossEntropyLoss()

        self.model.train()
        for epoch in range(num_epochs):
            for images, labels in data_loader:
                optimizer.zero_grad()
                outputs = self.model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

    def get_weights(self):
        return self.model.state_dict()

    def set_weights(self, weights):
        self.model.load_state_dict(weights)

    def evaluate(self, test_loader):
        self.model.eval()
        y_true, y_pred = [], []
        with torch.no_grad():
            for images, labels in test_loader:
                outputs = self.model(images)
                _, predicted = torch.max(outputs, 1)
                y_true.extend(labels.cpu().numpy())
                y_pred.extend(predicted.cpu().numpy())

        # Calculate metrics
        accuracy = accuracy_score(y_true, y_pred)
        precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
        recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
        f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)

        return accuracy, precision, recall, f1

# Simulate federated learning
def federated_learning(num_clients=5, num_epochs=5, global_rounds=5):
    # Load MNIST dataset
    transform = transforms.Compose([transforms.ToTensor()])
    dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    test_set = datasets.MNIST(root='./data', train=False, download=True, transform=transform)
    test_loader = DataLoader(test_set, batch_size=32, shuffle=False)

    # Total number of data points in the dataset
    total_size = len(dataset)

    # Ensure each client gets at least one data point
    remaining_size = total_size - num_clients  # Total size minus one datapoint for each client

    # Generate random sizes for remaining data, ensuring at least 1 datapoint per client
    random_sizes = [random.randint(0, remaining_size // num_clients) for _ in range(num_clients - 1)]
    random_sizes.append(remaining_size - sum(random_sizes))  # Adjust the last split to ensure total size matches

    # Add 1 to each random size to ensure each client gets at least one data point
    random_sizes = [size + 1 for size in random_sizes]

    # Split dataset into random sizes based on the generated sizes
    client_datasets = random_split(dataset, random_sizes)

    # Create client instances
    clients = [Client(data) for data in client_datasets]

    # Initialize global model
    global_model = SimpleNN()

    # Prepare to store metrics
    metrics_list = []

    for round in range(global_rounds):
        print(f"\nGlobal Round {round + 1}/{global_rounds}")

        # Local training for each client
        for client in clients:
            client.set_weights(global_model.state_dict())  # Load global model weights
            client.train(num_epochs=num_epochs)  # Train locally

        # Aggregate local weights to update global model
        global_weights = global_model.state_dict()
        for key in global_weights.keys():
            global_weights[key] = torch.mean(torch.stack([client.get_weights()[key] for client in clients]), dim=0)
        global_model.load_state_dict(global_weights)


    # Write metrics to CSV
            # Evaluate global model
    accuracy, precision, recall, f1 = evaluate_global_model(global_model, test_loader)
    metrics_list.append({
        'Round': round + 1,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1 Score': f1
    })
    metrics_df = pd.DataFrame(metrics_list)
    # Append the metrics to the CSV file, adding a new line
    metrics_df.to_csv('/content/drive/My Drive/Swarm_Learning/federated_learning_MNIST.csv', mode='a', header=False, index=False)
    print("Metrics saved to 'federated_learning_metrics.csv'.")

def evaluate_global_model(model, test_loader):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(predicted.cpu().numpy())

    # Calculate metrics
    accuracy = accuracy_score(y_true, y_pred)
    precision = precision_score(y_true, y_pred, average='macro', zero_division=0)
    recall = recall_score(y_true, y_pred, average='macro', zero_division=0)
    f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)

    return accuracy, precision, recall, f1


In [None]:
# Run the federated learning simulation
for i in range(30):
  federated_learning(num_clients=5, num_epochs=5, global_rounds=5)