In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from sklearn.decomposition import PCA
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import accuracy_score, f1_score
from skimage.feature import hog
import torch
import socket
import pickle
from collections import defaultdict
import threading
import time


In [6]:
# Hyperparameters
input_dim = 324
output_dim = 10
learning_rate = 0.001
epochs = 1000

# Download MNIST dataset

In [7]:
# 1. Load and Preprocess CIFAR10
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalization to [-1, 1]
])

full_train_data = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_data = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
# 2. Create Train-Validation Split (80-20 Split)
train_size = int(0.8 * len(full_train_data))
val_size = len(full_train_data) - train_size
train_data, val_data = random_split(full_train_data, [train_size, val_size])
# 2. Flatten Images
train_loader = DataLoader(train_data, batch_size=100, shuffle=True)
val_loader = DataLoader(val_data, batch_size=100, shuffle=False)
test_loader = DataLoader(test_data, batch_size=100, shuffle=False)

Files already downloaded and verified
Files already downloaded and verified


# Preprocess MNIST DATASET

In [8]:
# 3. Preprocess and Extract HOG Features
def extract_hog_features(loader):
    hog_features = []
    labels = []
    
    for img_batch, label_batch in loader:
        # Flatten each image to (batch_size, 3, 1024) and average RGB channels to get shape (batch_size, 1024)
        img_batch = img_batch.view(img_batch.size(0), 3, 1024).mean(dim=1)  # Average RGB channels
        
        # Reshape to (32, 32) and extract HOG features
        for img, label in zip(img_batch, label_batch):
            img_reshaped = img.view(32, 32).numpy()  # Reshape to 32x32
            
            # HOG feature extraction
            hog_feat = hog(img_reshaped, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
            hog_features.append(hog_feat)
            labels.append(label)
    
    return torch.tensor(hog_features), torch.tensor(labels)

train_images, train_labels = extract_hog_features(train_loader)
val_images, val_labels = extract_hog_features(val_loader)
test_images, test_labels = extract_hog_features(test_loader)

  return torch.tensor(hog_features), torch.tensor(labels)


# Model Architecture and Training Config

In [9]:
# Linear Regression Model using PyTorch
class SoftmaxRegression(nn.Module):
    def __init__(self, input_dim , output_dim):
        super(SoftmaxRegression, self).__init__()
        # Define a linear layer (input_dim -> 1)
        self.linear = nn.Linear(input_dim, output_dim)
        
    def forward(self, X):
        # Forward pass: apply the linear layer
        return self.linear(X)

In [10]:
# Initialize the model, loss function, and optimizer
model = SoftmaxRegression(input_dim , output_dim)
criterion = nn.CrossEntropyLoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Train and Evaluate Model

In [11]:
# 5. Train the Model
def train(model, data, labels, criterion, optimizer):
        model.train()
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
#         print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')
        # Evaluate on training data every epoch
        accuracy, f1 = evaluate(model, data, labels)
        return accuracy , f1 , model.state_dict()

# 6. Evaluation Function (Accuracy and F1-score)
def evaluate(model, data, labels):
    model.eval()
    with torch.no_grad():
        outputs = model(data)
        _, predicted = torch.max(outputs, 1)
        predicted = predicted.numpy()
        labels = labels.numpy()
        
    # Calculate accuracy and F1-score
    accuracy = accuracy_score(labels, predicted) * 100
    f1 = f1_score(labels, predicted, average='weighted')
    return accuracy, f1

# 7. Test Function to Assess Model on Test Set
def test(model, test_data, test_labels):
    accuracy, f1 = evaluate(model, test_data, test_labels)
    print(f'Test Accuracy: {accuracy:.2f}%, Test F1-score: {f1:.2f}')
        
test(model , test_images , test_labels)

Test Accuracy: 10.20%, Test F1-score: 0.04


In [12]:

# Initialize a variable to keep track of the best validation accuracy
best_val_acc = 0.0  # Set to 0 initially
best_model_path = 'best_model.pth'  # File path to save the best model

for epoch in range(epochs):
    # Train the model for one epoch
    train_acc, train_f1 , _ = train(model, train_images, train_labels, criterion, optimizer)
    
    # Evaluate on the validation set
    val_acc, val_f1  = evaluate(model, val_images, val_labels)
    
    # Print training and validation metrics for the current epoch
    print(f'Epoch [{epoch+1}/{epochs}] - Train Accuracy: {train_acc:.2f}%, Train F1-score: {train_f1:.2f}')
    print(f'Epoch [{epoch+1}/{epochs}] - Val Accuracy: {val_acc:.2f}%, Val F1-score: {val_f1:.2f}')
    
    # Check if the current validation accuracy is the best so far
    if val_acc > best_val_acc:
        best_val_acc = val_acc  # Update the best validation accuracy
        torch.save(model.state_dict(), best_model_path)  # Save the model's state_dict
        print(f'New best model saved with Val Accuracy: {best_val_acc:.2f}%')

# Load the best model after training is completed
model.load_state_dict(torch.load(best_model_path))

# Test the best model on the test set
# test(model, X_test_tensor, y_test_tensor)


Epoch [1/1000] - Train Accuracy: 10.30%, Train F1-score: 0.04
Epoch [1/1000] - Val Accuracy: 9.82%, Val F1-score: 0.04
New best model saved with Val Accuracy: 9.82%
Epoch [2/1000] - Train Accuracy: 11.07%, Train F1-score: 0.06
Epoch [2/1000] - Val Accuracy: 10.66%, Val F1-score: 0.05
New best model saved with Val Accuracy: 10.66%
Epoch [3/1000] - Train Accuracy: 12.00%, Train F1-score: 0.08
Epoch [3/1000] - Val Accuracy: 11.73%, Val F1-score: 0.07
New best model saved with Val Accuracy: 11.73%
Epoch [4/1000] - Train Accuracy: 13.04%, Train F1-score: 0.10
Epoch [4/1000] - Val Accuracy: 12.63%, Val F1-score: 0.10
New best model saved with Val Accuracy: 12.63%
Epoch [5/1000] - Train Accuracy: 14.00%, Train F1-score: 0.13
Epoch [5/1000] - Val Accuracy: 13.28%, Val F1-score: 0.12
New best model saved with Val Accuracy: 13.28%
Epoch [6/1000] - Train Accuracy: 15.12%, Train F1-score: 0.14
Epoch [6/1000] - Val Accuracy: 14.80%, Val F1-score: 0.14
New best model saved with Val Accuracy: 14.80%


  model.load_state_dict(torch.load(best_model_path))


<All keys matched successfully>

In [13]:
test(model, test_images, test_labels)

Test Accuracy: 47.99%, Test F1-score: 0.48


# Federate Training

In [37]:
# Class mappings for CIFAR-10
from torch.utils.data import DataLoader, Subset

class_map = {
    0: 'airplane', 1: 'automobile', 2: 'bird', 3: 'cat',
    4: 'deer', 5: 'dog', 6: 'frog', 7: 'horse', 8: 'ship', 9: 'truck'
}
# Filter dataset by selected classes
def filter_dataset_by_class(dataset, class_indices):
    filtered_indices = [i for i, (_, label) in enumerate(dataset) if label in class_indices]
    return Subset(dataset, filtered_indices)

# Cient 1 Model

In [38]:
# Initialize the model, loss function, and optimizer
model1 = SoftmaxRegression(input_dim , output_dim)
criterion1 = nn.CrossEntropyLoss()  # Mean Squared Error Loss
optimizer1 = optim.Adam(model1.parameters(), lr=learning_rate)

In [39]:
#filter dataset
selected_classes = [0 , 1 , 2 , 3 , 4]
client1_train_data = filter_dataset_by_class(full_train_data, selected_classes)
# split the train and val
client1_train_size = int(0.8 * len(client1_train_data))
client1_val_size = len(client1_train_data) - client1_train_size
client1_train_data, client1_val_data = random_split(client1_train_data, [client1_train_size, client1_val_size])
# move to dataloader
client1_train_dataloader = DataLoader(client1_train_data, batch_size=100, shuffle=True)
client1_val_dataloader = DataLoader(client1_val_data, batch_size=100, shuffle=True)


client1_train_images, client1_train_labels = extract_hog_features(client1_train_dataloader)
client1_val_images, client1_val_labels = extract_hog_features(client1_val_dataloader)

# Optionally, print out a summary for verification
print(f"Client 2 Train Dataset: {len(client1_train_data)} samples")
print(f"Client 2 Val Dataset: {len(client1_val_data)} samples")

Client 2 Train Dataset: 20000 samples
Client 2 Val Dataset: 5000 samples


In [40]:

# Initialize a variable to keep track of the best validation accuracy
best_val_acc = 0.0  # Set to 0 initially
best_model_path = 'best_model_1.pth'  # File path to save the best model

for epoch in range(epochs):
    # Train the model for one epoch
    train_acc, train_f1 , _ = train(model1, client1_train_images, client1_train_labels, criterion1, optimizer1)
    
    # Evaluate on the validation set
    val_acc, val_f1  = evaluate(model1, client1_val_images, client1_val_labels)
    
    # Print training and validation metrics for the current epoch
    print(f'Epoch [{epoch+1}/{epochs}] - Train Accuracy: {train_acc:.2f}%, Train F1-score: {train_f1:.2f}')
    print(f'Epoch [{epoch+1}/{epochs}] - Val Accuracy: {val_acc:.2f}%, Val F1-score: {val_f1:.2f}')
    
    # Check if the current validation accuracy is the best so far
    if val_acc > best_val_acc:
        best_val_acc = val_acc  # Update the best validation accuracy
        torch.save(model1.state_dict(), best_model_path)  # Save the model's state_dict
        print(f'New best model saved with Val Accuracy: {best_val_acc:.2f}%')

# Load the best model after training is completed
model1.load_state_dict(torch.load(best_model_path))

# Test the best model on the test set
# test(model, X_test_tensor, y_test_tensor)

Epoch [1/1000] - Train Accuracy: 18.31%, Train F1-score: 0.10
Epoch [1/1000] - Val Accuracy: 17.64%, Val F1-score: 0.10
New best model saved with Val Accuracy: 17.64%
Epoch [2/1000] - Train Accuracy: 20.79%, Train F1-score: 0.11
Epoch [2/1000] - Val Accuracy: 20.28%, Val F1-score: 0.10
New best model saved with Val Accuracy: 20.28%
Epoch [3/1000] - Train Accuracy: 21.00%, Train F1-score: 0.11
Epoch [3/1000] - Val Accuracy: 20.46%, Val F1-score: 0.11
New best model saved with Val Accuracy: 20.46%
Epoch [4/1000] - Train Accuracy: 21.02%, Train F1-score: 0.11
Epoch [4/1000] - Val Accuracy: 20.48%, Val F1-score: 0.11
New best model saved with Val Accuracy: 20.48%
Epoch [5/1000] - Train Accuracy: 21.03%, Train F1-score: 0.11
Epoch [5/1000] - Val Accuracy: 20.50%, Val F1-score: 0.11
New best model saved with Val Accuracy: 20.50%
Epoch [6/1000] - Train Accuracy: 21.03%, Train F1-score: 0.11
Epoch [6/1000] - Val Accuracy: 20.50%, Val F1-score: 0.11
Epoch [7/1000] - Train Accuracy: 21.02%, Trai

  model1.load_state_dict(torch.load(best_model_path))


<All keys matched successfully>

# Client 2 Model

In [41]:
# Initialize the model, loss function, and optimizer
model2 = SoftmaxRegression(input_dim , output_dim)
criterion2 = nn.CrossEntropyLoss()  # Mean Squared Error Loss
optimizer2 = optim.Adam(model2.parameters(), lr=learning_rate)

In [42]:
#  ô tô, chim, mèo, hươu, chó, ếch, ngựa, tàu và xe tải.
#filter dataset
selected_classes = [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9]
client2_train_data = filter_dataset_by_class(full_train_data, selected_classes)
# split the train and val
client2_train_size = int(0.8 * len(client2_train_data))
client2_val_size = len(client2_train_data) - client2_train_size
client2_train_data, client2_val_data = random_split(client2_train_data, [client2_train_size, client2_val_size])
# move to dataloader
client2_train_dataloader = DataLoader(client2_train_data, batch_size=100, shuffle=True)
client2_val_dataloader = DataLoader(client2_val_data, batch_size=100, shuffle=True)


client2_train_images, client2_train_labels = extract_hog_features(client2_train_dataloader)
client2_val_images, client2_val_labels = extract_hog_features(client2_val_dataloader)

# Optionally, print out a summary for verification
print(f"Client 2 Train Dataset: {len(client2_train_data)} samples")
print(f"Client 2 Val Dataset: {len(client2_val_data)} samples")

Client 2 Train Dataset: 36000 samples
Client 2 Val Dataset: 9000 samples


In [43]:

# Initialize a variable to keep track of the best validation accuracy
best_val_acc = 0.0  # Set to 0 initially
best_model_path = 'best_model_2.pth'  # File path to save the best model

for epoch in range(epochs):
    # Train the model for one epoch
    train_acc, train_f1 , _ = train(model2, client2_train_images, client2_train_labels, criterion2, optimizer2)
    
    # Evaluate on the validation set
    val_acc, val_f1  = evaluate(model2, client2_val_images, client2_val_labels)
    
    # Print training and validation metrics for the current epoch
    print(f'Epoch [{epoch+1}/{epochs}] - Train Accuracy: {train_acc:.2f}%, Train F1-score: {train_f1:.2f}')
    print(f'Epoch [{epoch+1}/{epochs}] - Val Accuracy: {val_acc:.2f}%, Val F1-score: {val_f1:.2f}')
    
    # Check if the current validation accuracy is the best so far
    if val_acc > best_val_acc:
        best_val_acc = val_acc  # Update the best validation accuracy
        torch.save(model1.state_dict(), best_model_path)  # Save the model's state_dict
        print(f'New best model saved with Val Accuracy: {best_val_acc:.2f}%')

# Load the best model after training is completed
model2.load_state_dict(torch.load(best_model_path))

# Test the best model on the test set
# test(model, X_test_tensor, y_test_tensor)

Epoch [1/1000] - Train Accuracy: 13.49%, Train F1-score: 0.07
Epoch [1/1000] - Val Accuracy: 13.27%, Val F1-score: 0.06
New best model saved with Val Accuracy: 13.27%
Epoch [2/1000] - Train Accuracy: 14.83%, Train F1-score: 0.09
Epoch [2/1000] - Val Accuracy: 14.31%, Val F1-score: 0.08
New best model saved with Val Accuracy: 14.31%
Epoch [3/1000] - Train Accuracy: 16.26%, Train F1-score: 0.11
Epoch [3/1000] - Val Accuracy: 15.31%, Val F1-score: 0.10
New best model saved with Val Accuracy: 15.31%
Epoch [4/1000] - Train Accuracy: 17.56%, Train F1-score: 0.13
Epoch [4/1000] - Val Accuracy: 16.50%, Val F1-score: 0.12
New best model saved with Val Accuracy: 16.50%
Epoch [5/1000] - Train Accuracy: 18.34%, Train F1-score: 0.15
Epoch [5/1000] - Val Accuracy: 17.64%, Val F1-score: 0.14
New best model saved with Val Accuracy: 17.64%
Epoch [6/1000] - Train Accuracy: 18.78%, Train F1-score: 0.16
Epoch [6/1000] - Val Accuracy: 18.16%, Val F1-score: 0.15
New best model saved with Val Accuracy: 18.16

  model2.load_state_dict(torch.load(best_model_path))


<All keys matched successfully>

# Client 3 Model

In [44]:
# Initialize the model, loss function, and optimizer
model3 = SoftmaxRegression(input_dim , output_dim)
criterion3 = nn.CrossEntropyLoss()  # Mean Squared Error Loss
optimizer3 = optim.Adam(model3.parameters(), lr=learning_rate)

In [45]:
#  ô tô, chim, mèo, hươu, chó, ếch, ngựa, tàu và xe tải.
#filter dataset
selected_classes = [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9]
client3_train_data = filter_dataset_by_class(full_train_data, selected_classes)
# split the train and val
client3_train_size = int(0.8 * len(client3_train_data))
client3_val_size = len(client3_train_data) - client3_train_size
client3_train_data, client3_val_data = random_split(client3_train_data, [client3_train_size, client3_val_size])
# move to dataloader
client3_train_dataloader = DataLoader(client3_train_data, batch_size=100, shuffle=True)
client3_val_dataloader = DataLoader(client3_val_data, batch_size=100, shuffle=True)


client3_train_images, client3_train_labels = extract_hog_features(client3_train_dataloader)
client3_val_images, client3_val_labels = extract_hog_features(client3_val_dataloader)

# Optionally, print out a summary for verification
print(f"Client 2 Train Dataset: {len(client3_train_data)} samples")
print(f"Client 2 Val Dataset: {len(client3_val_data)} samples")

Client 2 Train Dataset: 40000 samples
Client 2 Val Dataset: 10000 samples


In [46]:

# Initialize a variable to keep track of the best validation accuracy
best_val_acc = 0.0  # Set to 0 initially
best_model_path = 'best_model_3.pth'  # File path to save the best model

for epoch in range(epochs):
    # Train the model for one epoch
    train_acc, train_f1 , _ = train(model3, client3_train_images, client3_train_labels, criterion3, optimizer3)
    
    # Evaluate on the validation set
    val_acc, val_f1  = evaluate(model3, client3_val_images, client3_val_labels)
    
    # Print training and validation metrics for the current epoch
    print(f'Epoch [{epoch+1}/{epochs}] - Train Accuracy: {train_acc:.2f}%, Train F1-score: {train_f1:.2f}')
    print(f'Epoch [{epoch+1}/{epochs}] - Val Accuracy: {val_acc:.2f}%, Val F1-score: {val_f1:.2f}')
    
    # Check if the current validation accuracy is the best so far
    if val_acc > best_val_acc:
        best_val_acc = val_acc  # Update the best validation accuracy
        torch.save(model3.state_dict(), best_model_path)  # Save the model's state_dict
        print(f'New best model saved with Val Accuracy: {best_val_acc:.2f}%')

# Load the best model after training is completed
model3.load_state_dict(torch.load(best_model_path))

Epoch [1/1000] - Train Accuracy: 13.76%, Train F1-score: 0.09
Epoch [1/1000] - Val Accuracy: 13.70%, Val F1-score: 0.09
New best model saved with Val Accuracy: 13.70%
Epoch [2/1000] - Train Accuracy: 14.63%, Train F1-score: 0.10
Epoch [2/1000] - Val Accuracy: 14.68%, Val F1-score: 0.10
New best model saved with Val Accuracy: 14.68%
Epoch [3/1000] - Train Accuracy: 15.46%, Train F1-score: 0.11
Epoch [3/1000] - Val Accuracy: 15.55%, Val F1-score: 0.12
New best model saved with Val Accuracy: 15.55%
Epoch [4/1000] - Train Accuracy: 16.63%, Train F1-score: 0.13
Epoch [4/1000] - Val Accuracy: 16.50%, Val F1-score: 0.14
New best model saved with Val Accuracy: 16.50%
Epoch [5/1000] - Train Accuracy: 17.84%, Train F1-score: 0.15
Epoch [5/1000] - Val Accuracy: 18.06%, Val F1-score: 0.16
New best model saved with Val Accuracy: 18.06%
Epoch [6/1000] - Train Accuracy: 19.50%, Train F1-score: 0.18
Epoch [6/1000] - Val Accuracy: 19.46%, Val F1-score: 0.18
New best model saved with Val Accuracy: 19.46

  model3.load_state_dict(torch.load(best_model_path))


<All keys matched successfully>

# Averaged Model

In [47]:
import copy

# Function to average models
def average_models(models):
    # Initialize a new model to store the averaged weights
    avg_model = copy.deepcopy(models[0])
    with torch.no_grad():
        # Iterate through each parameter in the model
        for param in avg_model.state_dict().keys():
            # Average the parameters from all models
            avg_param = sum(model.state_dict()[param] for model in models) / len(models)
            avg_model.state_dict()[param].copy_(avg_param)
    return avg_model

# List of models to average
models = [model1, model2, model3]

# Average the models
avg_model = average_models(models)


In [48]:
test(avg_model, test_images, test_labels)

Test Accuracy: 29.85%, Test F1-score: 0.20
