<a href="https://colab.research.google.com/github/thegith45/Decentralized-learning/blob/main/HFL_VFL.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Part 1: Setup and Utilities
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
import numpy as np
import random

# Fix random seeds for reproducibility
seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)

# Use GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)


Using device: cuda


In [None]:
# Part 2: Define Model
class SimpleCNN(nn.Module):
    def __init__(self):
        super().__init__()
        # One convolution layer
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.pool = nn.MaxPool2d(2)
        # Fully connected layers
        self.fc1 = nn.Linear(10 * 12 * 12, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(-1, 10 * 12 * 12)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
# Part 3: Load Dataset
transform = transforms.Compose([transforms.ToTensor()])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset  = datasets.MNIST(root='./data', train=False, download=True, transform=transform)


100%|██████████| 9.91M/9.91M [00:01<00:00, 5.58MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 130kB/s]
100%|██████████| 1.65M/1.65M [00:01<00:00, 1.18MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 14.6MB/s]


In [None]:
# Part 4.1: IID Partitioning
def iid_partition(dataset, num_clients):
    """
    Split dataset equally at random among clients (IID).
    """
    num_items = int(len(dataset) / num_clients)
    all_indices = np.random.permutation(len(dataset))
    client_dict = {i: all_indices[i*num_items:(i+1)*num_items] for i in range(num_clients)}
    return client_dict

# Part 4.2: Non-IID Partitioning

def noniid_partition(dataset, num_clients, num_shards=20):
    labels = np.array(dataset.targets)
    indices = np.arange(len(dataset))
    idxs_labels = np.vstack((indices, labels))
    idxs_labels = idxs_labels[:, idxs_labels[1, :].argsort()]  # sort by labels

    shards_per_client = num_shards // num_clients
    shard_size = len(dataset) // num_shards

    client_dict = {i: np.array([], dtype=int) for i in range(num_clients)}
    shard_indices = np.arange(num_shards)
    np.random.shuffle(shard_indices)

    for i in range(num_clients):
        shard_ids = shard_indices[i*shards_per_client:(i+1)*shards_per_client]
        for sid in shard_ids:
            client_dict[i] = np.concatenate((client_dict[i], idxs_labels[0, sid*shard_size:(sid+1)*shard_size]), axis=0)

    return client_dict


In [None]:
# ------------------------------
# Part 4.3: HFL Partitioning
# ------------------------------
def hfl_partition(dataset, num_clients):
    """
    Horizontal FL = partition by rows (samples).
    Similar to IID but simulates client-specific datasets.
    """
    return iid_partition(dataset, num_clients)

# ------------------------------
# Part 4.4: VFL Partitioning
# ------------------------------
def vfl_partition(dataset, num_clients):
    """
    Vertical FL = partition by features (columns).
    Each client gets different parts of the image.
    For simplicity, split the 28x28 image into vertical chunks.
    """
    feature_splits = np.array_split(np.arange(28), num_clients)

    return feature_splits  # returns feature column ranges for each client


In [None]:
# Part 5: Local Training

def local_train(model, dataset, indices, epochs=1, batch_size=32, lr=0.01):
    """
    Train the model locally on a client's dataset.
    """
    model = copy.deepcopy(model)
    model.train()
    loader = DataLoader(Subset(dataset, indices), batch_size=batch_size, shuffle=True)

    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss()

    for _ in range(epochs):
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, target)
            loss.backward()
            optimizer.step()

    return model.state_dict()


In [None]:
# Part 6: FedAvg Aggregation

def fedavg(client_weights, client_sizes):
    # Weighted average of client weights (FedAvg).
    new_state = copy.deepcopy(client_weights[0])
    total_size = sum(client_sizes)

    for key in new_state.keys():
        new_state[key] = sum([client_weights[i][key] * (client_sizes[i]/total_size)
                              for i in range(len(client_weights))])
    return new_state


In [None]:
# Part 7: Evaluation

def evaluate(model, test_loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, preds = torch.max(output, 1)
            correct += (preds == target).sum().item()
            total += target.size(0)
    return 100.0 * correct / total


In [None]:
# ------------------------------
# Part 8: Federated Training Loop
# ------------------------------
import copy
import matplotlib.pyplot as plt

def federated_train(dataset, test_dataset, partition_type="iid", num_clients=5, rounds=5, local_epochs=1, batch_size=32, lr=0.01):
    """
    Run federated training under given partitioning scheme.
    """
    # --- Partition data ---
    if partition_type == "iid":
        client_dict = iid_partition(dataset, num_clients)
    elif partition_type == "noniid":
        client_dict = noniid_partition(dataset, num_clients)
    elif partition_type == "hfl":
        client_dict = hfl_partition(dataset, num_clients)
    elif partition_type == "vfl":
        # for simplicity, simulate VFL as IID for now (feature-split needs advanced handling)
        client_dict = iid_partition(dataset, num_clients)
    else:
        raise ValueError("Unknown partition type!")

    # --- Init global model ---
    global_model = SimpleCNN().to(device)
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

    acc_list = []

    # --- Federated rounds ---
    for r in range(rounds):
        client_weights, client_sizes = [], []

        for client_id in range(num_clients):
            indices = client_dict[client_id]
            local_state = local_train(global_model, dataset, indices, epochs=local_epochs, batch_size=batch_size, lr=lr)
            client_weights.append(local_state)
            client_sizes.append(len(indices))

        # Aggregate with FedAvg
        global_weights = fedavg(client_weights, client_sizes)
        global_model.load_state_dict(global_weights)

        # Evaluate
        acc = evaluate(global_model, test_loader)
        acc_list.append(acc)
        print(f"Round {r+1}/{rounds}, Test Accuracy = {acc:.2f}%")

    return acc_list


In [None]:
# Part 9: Run Experiments

rounds = 5
num_clients = 5
local_epochs = 2

results = {}

print("\n--- Training with IID ---")
results["IID"] = federated_train(train_dataset, test_dataset, partition_type="iid", num_clients=num_clients, rounds=rounds, local_epochs=local_epochs)

print("\n--- Training with Non-IID ---")
results["Non-IID"] = federated_train(train_dataset, test_dataset, partition_type="noniid", num_clients=num_clients, rounds=rounds, local_epochs=local_epochs)

print("\n--- Training with HFL ---")
results["HFL"] = federated_train(train_dataset, test_dataset, partition_type="hfl", num_clients=num_clients, rounds=rounds, local_epochs=local_epochs)

print("\n--- Training with VFL ---")
results["VFL"] = federated_train(train_dataset, test_dataset, partition_type="vfl", num_clients=num_clients, rounds=rounds, local_epochs=local_epochs)



--- Training with IID ---
Round 1/5, Test Accuracy = 89.14%
Round 2/5, Test Accuracy = 91.44%
Round 3/5, Test Accuracy = 92.56%
Round 4/5, Test Accuracy = 93.64%
Round 5/5, Test Accuracy = 94.36%

--- Training with Non-IID ---
Round 1/5, Test Accuracy = 53.87%
Round 2/5, Test Accuracy = 64.42%
Round 3/5, Test Accuracy = 75.14%
Round 4/5, Test Accuracy = 80.42%
Round 5/5, Test Accuracy = 85.18%

--- Training with HFL ---
Round 1/5, Test Accuracy = 89.61%
Round 2/5, Test Accuracy = 91.63%
Round 3/5, Test Accuracy = 93.26%
Round 4/5, Test Accuracy = 93.93%
Round 5/5, Test Accuracy = 94.78%

--- Training with VFL ---
Round 1/5, Test Accuracy = 89.41%
Round 2/5, Test Accuracy = 91.63%
Round 3/5, Test Accuracy = 92.34%
Round 4/5, Test Accuracy = 93.14%
Round 5/5, Test Accuracy = 94.16%
