In [1]:
import boto3
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image

In [2]:
from PIL import Image
import numpy as np
import io
import os 

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split, Subset, ConcatDataset
from torchvision.transforms import transforms
import numpy as np
from torchvision import models, datasets

In [4]:
from sklearn.model_selection import train_test_split
import torch

In [5]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

### Load Data

In [6]:
class LocalData:
    def __init__ (self, clinic_id, data_range): #data_range will be removed for final code
        self.clinic_id = clinic_id
        self.path = f'../120_dataset/{clinic_id}/'
        self.range = data_range #remove for final code
        
        
    def dataset (self):
        transform = transforms.Compose([
        transforms.Resize((128, 128)),  # Resize images to a fixed size (optional)
        transforms.ToTensor(),          # Convert images to PyTorch tensors
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225]) 
        ])

# Load the dataset from the train folder
        train_dataset = datasets.ImageFolder(root=f'{self.path}', transform=transform)
        subset_indices = list(self.range)    #remove for final code
        train_dataset = Subset (train_dataset, subset_indices) #remove for final code

        train_size = int(0.8*len(train_dataset))
        val_size = len(train_dataset)-train_size

        train_subset, val_subset = random_split(train_dataset, [train_size, val_size])
        return train_subset, val_subset
    
    def dataloader(self):  
        train_subset, val_subset = self.dataset()
        train_loader = DataLoader(train_subset, batch_size=32, shuffle=True)
        val_loader = DataLoader(val_subset, batch_size=32, shuffle=False)

        print (f'loading {self.clinic_id}')
        return train_loader, val_loader

### Models

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def resnet18 (weights='DEFAULT'):
    
    resnet18 = models.resnet18(weights = weights).to(device)
    for param in resnet18.parameters():
        param.requires_grad = False
    resnet18.fc = nn.Sequential (
    nn.Linear(in_features = 512, out_features = 256, bias = True),
    nn.Dropout(p = 0.5),
    nn.Linear(in_features = 256, out_features = 1, bias = True),
    nn.Sigmoid()
    )
    
    for param in resnet18.fc.parameters():
        param.requires_grad = True
    
    return resnet18

def vgg16(weights = 'DEFAULT'):
    model = models.vgg16(weights=weights).to(device)

# Freeze the parameters of the base model
    for param in model.features.parameters():
        param.requires_grad = False
    
# Modify the classifier part for binary classification
    model.classifier[6] = nn.Sequential(
        nn.Linear(model.classifier[6].in_features, 512),
        nn.ReLU(),
        nn.Dropout(p=0.5),
        nn.Linear(512, 1),
        nn.Sigmoid()
    )

    return model

    
def vgg19 (weights='DEFAULT'):
    vgg19 = models.vgg19 (weights=weights).to(device)
    
    for param in vgg19.parameters():
        param.requires_grad = False
        
    vgg19.classifier = nn.Sequential (
        nn.Linear(25088, 4096),        
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(4096, 4096),       
        nn.ReLU(inplace=True),
        nn.Dropout(p=0.5),
        nn.Linear(4096, 1),
        nn.Sigmoid()          
    )
    for param in vgg19.classifier.parameters():
        param.requires_grad = True
    
    return vgg19

### Training

In [8]:
import torch
import torch.optim as optim
from tqdm import tqdm  # Import tqdm for the progress bar
from local_data import LocalData
from cnn_models import vgg16

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def calculate_accuracy(outputs, labels, threshold=0.5):
    preds = (outputs > threshold).float()
    correct = (preds == labels).float().sum()
    accuracy = correct / labels.size(0)
    return accuracy

def train_local_model(model, train_loader, val_loader, num_epochs=10):
    criterion = torch.nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.0001)

    for epoch in range(num_epochs):
        # Training phase
        model.train()
        running_loss = 0.0
        running_accuracy = 0.0
        total_train = 0
        threshold = 0.5

        for images, labels in train_loader:
            optimizer.zero_grad()
            images, labels = images.to(device), labels.to(device)
            outputs = model(images).squeeze(1)
            print (outputs)
            print (labels)
            # Calculate loss
            loss = criterion(outputs, labels.float())
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            running_accuracy += calculate_accuracy(outputs, labels, threshold)
            total_train += 1


        # Validation phase
        model.eval()
        val_loss = 0.0
        val_accuracy = 0.0
        total_val = 0

        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images).squeeze(1)
                loss = criterion(outputs, labels.float())
                val_loss += loss.item()
                val_accuracy += calculate_accuracy(outputs, labels, threshold)
                total_val += 1

        avg_val_loss = val_loss / total_val
        avg_val_accuracy = val_accuracy / total_val
        print (f"Epoch {epoch + 1}/{num_epochs}:\ntrain_loss: {running_loss / total_train}, train_accuracy: {running_accuracy / total_train}\nValidation Loss: {avg_val_loss:.4f}, Validation Accuracy: {avg_val_accuracy:.4f}")
    
    return model, avg_val_loss, avg_val_accuracy


### Federated Learning

In [9]:
def federated_averaging (client_weights):
    avg_weights = client_weights[0].copy()
    
    for key in avg_weights.keys():
        for key in avg_weights.keys():
            for i in range (1, len (client_weights)):
                avg_weights[key] += client_weights[i][key]
                
            avg_weights[key] = avg_weights[key] / len (client_weights)
            
    return avg_weights

In [10]:
def federated_learning (global_model, local_model, num_clients, num_rounds, train_loaders, val_loaders):
    global_model = global_model
    global_weights = global_model.state_dict()
    
    for round_num in range (num_rounds):
        print (f'Round {round_num+1}')
        
        client_weights = []
        
        for client_id in range (num_clients):
            print (f'client {client_id+1} training...')
            
            local_model = local_model
            local_model.load_state_dict (global_weights)
            local_model.to(device)
            
            client_train_loader = train_loaders[client_id]
            client_val_loader = val_loaders[client_id]
            
            output_model, _, _ = train_local_model (local_model, client_train_loader, client_val_loader)
            client_updated_weights = output_model.state_dict()
            
            client_weights.append (client_updated_weights)
            
        global_weights = federated_averaging (client_weights)
        
        global_model.load_state_dict (global_weights)
        
    return global_model

In [14]:
num_clients = 4

num_rounds = 3

train_loader_0, val_loader_0 = LocalData('clinic_0', range(8100, 8300)).dataloader()

train_loader_1, val_loader_1 = LocalData('clinic_1', range(8100, 8300)).dataloader()

train_loader_2, val_loader_2 = LocalData('clinic_2', range(8100, 8300)).dataloader()

train_loader_3, val_loader_3 = LocalData('clinic_3', range(8100, 8300)).dataloader()


train_loaders = [train_loader_0, train_loader_1, train_loader_2, train_loader_3]
val_loaders = [val_loader_0, val_loader_1, val_loader_2, val_loader_3]

loading clinic_0
loading clinic_1
loading clinic_2
loading clinic_3


In [15]:
train_loader_clinic5, val_loader_clinic5 = LocalData ('clinic_4', range (7600, 7800)).dataloader()

loading clinic_4


In [16]:
resnet = federated_learning (resnet18(None), resnet18(None), num_clients, num_rounds, train_loaders, val_loaders)

Round 1
client 1 training...
Epoch 1/10:
Train Loss: 0.7119, Train Accuracy: 0.4875
Validation Loss: 0.6993, Validation Accuracy: 0.5000
Epoch 2/10:
Train Loss: 0.6939, Train Accuracy: 0.5437
Validation Loss: 0.6923, Validation Accuracy: 0.5000
Epoch 3/10:
Train Loss: 0.6878, Train Accuracy: 0.5437
Validation Loss: 0.6871, Validation Accuracy: 0.5781
Epoch 4/10:
Train Loss: 0.6890, Train Accuracy: 0.5000
Validation Loss: 0.6862, Validation Accuracy: 0.5156
Epoch 5/10:
Train Loss: 0.6888, Train Accuracy: 0.5375
Validation Loss: 0.6821, Validation Accuracy: 0.6562
Epoch 6/10:
Train Loss: 0.6646, Train Accuracy: 0.5750
Validation Loss: 0.6785, Validation Accuracy: 0.5938
Epoch 7/10:
Train Loss: 0.6750, Train Accuracy: 0.6000
Validation Loss: 0.6760, Validation Accuracy: 0.5781
Early stopping triggered. Restoring the best model...
client 2 training...
Epoch 1/10:
Train Loss: 0.7133, Train Accuracy: 0.5125
Validation Loss: 0.7013, Validation Accuracy: 0.4844
Epoch 2/10:
Train Loss: 0.6988, 

In [17]:
clin_5 = train_local_model (resnet, train_loader_clinic5, val_loader_clinic5)[0]

Epoch 1/10:
Train Loss: 56.2500, Train Accuracy: 0.4375
Validation Loss: 53.1250, Validation Accuracy: 0.4688
Epoch 2/10:
Train Loss: 56.8750, Train Accuracy: 0.4313
Validation Loss: 53.1250, Validation Accuracy: 0.4688
Epoch 3/10:
Train Loss: 55.6250, Train Accuracy: 0.4437
Validation Loss: 53.1250, Validation Accuracy: 0.4688
Early stopping triggered. Restoring the best model...


In [18]:
cl = train_local_model (resnet18(), train_loader_clinic5, val_loader_clinic5)[0]

Epoch 1/10:
Train Loss: 0.6726, Train Accuracy: 0.6500
Validation Loss: 0.7795, Validation Accuracy: 0.5312
Epoch 2/10:
Train Loss: 0.5756, Train Accuracy: 0.7375
Validation Loss: 0.7970, Validation Accuracy: 0.5312
Epoch 3/10:
Train Loss: 0.5455, Train Accuracy: 0.7375
Validation Loss: 0.7619, Validation Accuracy: 0.5312
Early stopping triggered. Restoring the best model...


In [22]:
evaluation(clin_5)

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0.])
tensor([0., 0., 0., 0., 0., 0., 0., 0.])
Accuracy score:  0.45
Precision score:  0.0
Recall score:  0.0
F1 score:  0.0
Confusion Matrix: 
 [[18  0]
 [22  0]]


  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
evaluation (cl)

tensor([0.8843, 0.6846, 0.6354, 0.6401, 0.7873, 0.7866, 0.8195, 0.7555, 0.8272,
        0.7787, 0.7241, 0.6440, 0.7352, 0.7904, 0.7293, 0.7389, 0.5990, 0.6738,
        0.7227, 0.7704, 0.6803, 0.8143, 0.6694, 0.7728, 0.8078, 0.5394, 0.7745,
        0.7110, 0.7018, 0.7591, 0.7099, 0.7741])
tensor([0.7574, 0.8137, 0.7545, 0.7131, 0.6641, 0.8164, 0.7888, 0.7013])
Accuracy score:  0.55
Precision score:  0.55
Recall score:  1.0
F1 score:  0.7097
Confusion Matrix: 
 [[ 0 18]
 [ 0 22]]


### Evaluation

After several rounds of training, the global model's weights are now used as initiallized weights for a fresh client model. Then, we will use this model to make prediction on the 5th clinic's data.

In [74]:
for item in iter(val_loader_clinic5):
    print (item[1])

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0,
        1, 1, 1, 1, 0, 1, 1, 1])
tensor([0, 1, 1, 1, 1, 1, 1, 1])


In [20]:
# Metrics
def metrics (ground_truths, predictions):
    accuracy = accuracy_score(ground_truths, predictions).round(4)
    precision = precision_score (ground_truths, predictions).round(4)
    recall = recall_score (ground_truths, predictions).round(4)
    f1 = f1_score (ground_truths, predictions).round(4)
    confusion_ma = confusion_matrix (ground_truths, predictions)
    
    print ('Accuracy score: ',accuracy)
    print ('Precision score: ', precision)
    print ('Recall score: ', recall)
    print ('F1 score: ', f1)
    print ('Confusion Matrix: \n', confusion_ma)
    return accuracy, precision, recall, f1
    

In [21]:
#Train client model on clinic4's train_loader
#Make prediction on clinic4's val_loader
def evaluation(client_model):
    client_model.eval()
    predictions = []
    ground_truths = []
    with torch.no_grad():
        for images, labels in val_loader_clinic5:
            images, labels = images.to(device), labels.to(device)
            output = client_model(images)
            print (output.reshape(-1))
            output = output.round()
            predictions.append (output)
            ground_truths.append (labels)
    predictions = np.concatenate (predictions).reshape(-1).astype ('int')
    ground_truths = np.concatenate (ground_truths)
    
    metrics (ground_truths, predictions)

In [37]:
client_models = [resnet18(), vgg16(), vgg19()]
for client_model in client_models:
    print (client_model.__class__.__name__)
    client_model = train_local_model (client_model, train_loader_clinic5, val_loader_clinic5)[0]

    evaluation (client_model)

ResNet
Accuracy score:  0.8
Precision score:  0.7667
Recall score:  0.8214
F1 score:  0.7931
Confusion Matrix: 
 [[25  7]
 [ 5 23]]
VGG
Accuracy score:  0.8167
Precision score:  0.7429
Recall score:  0.9286
F1 score:  0.8254
Confusion Matrix: 
 [[23  9]
 [ 2 26]]
VGG
Accuracy score:  0.8167
Precision score:  0.7742
Recall score:  0.8571
F1 score:  0.8136
Confusion Matrix: 
 [[25  7]
 [ 4 24]]


In [39]:
for client_model in client_models:
    global_model = federated_learning (client_model, num_clients, num_rounds, train_loaders, val_loaders)
    model = train_local_model (global_model, train_loader_clinic5, val_loader_clinic5)[0]
    evaluation (model)

Round 1
client 1 training...
client 2 training...
client 3 training...
client 4 training...
Round 2
client 1 training...
client 2 training...
client 3 training...
client 4 training...
Round 3
client 1 training...
client 2 training...
client 3 training...
client 4 training...
Accuracy score:  0.8667
Precision score:  0.8846
Recall score:  0.8214
F1 score:  0.8519
Confusion Matrix: 
 [[29  3]
 [ 5 23]]
Round 1
client 1 training...
client 2 training...
client 3 training...
client 4 training...
Round 2
client 1 training...
client 2 training...
client 3 training...
client 4 training...
Round 3
client 1 training...
client 2 training...
client 3 training...
client 4 training...


RuntimeError: all elements of input should be between 0 and 1

In [None]:
global_model = federated_learning (client_model, num_clients, num_rounds, train_loaders, val_loaders)
output_model = client_model.load_state_dict (global_model.state_dict())