<a href="https://colab.research.google.com/github/krish-navulla/FL_Breast_Cancer_Classification/blob/main/Thesis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:

import pandas as pd
from PIL import Image



# Creating annotations file

In [2]:
annotations_benign = pd.read_csv("/content/drive/MyDrive/Thesis/BreaKHis_v1/histology_slides/breast/annotations_benign.txt", delimiter='\t')
annotations_malignant = pd.read_csv("/content/drive/MyDrive/Thesis/BreaKHis_v1/histology_slides/breast/annotations_malignant.txt", delimiter='\t')

In [3]:

annotations = pd.concat([pd.DataFrame(annotations_malignant, columns=['name', 'value']), pd.DataFrame(annotations_benign, columns=['name', 'value'])])

In [4]:
annotations = annotations.sample(frac=1).reset_index(drop=True)

# Creating pytorch data loader

In [5]:
import os
import pandas as pd
from torchvision.io import read_image
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision import transforms

class CustomImageDataset():
    def __init__(self, annotations_file, img_dir, transform, target_transform=None):
        self.img_labels = annotations_file
        self.img_dir = img_dir
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.img_labels)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0]).replace("\\","/")
        image = Image.open(img_path).convert('RGB')
        label = self.img_labels.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(label)
        return image, label

In [21]:
img_dir = "\content\drive\MyDrive\Thesis\BreaKHis_v1"
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjust the size as needed
    transforms.ToTensor(),
])

dataset = CustomImageDataset(img_dir = img_dir,annotations_file=annotations, transform = transform)
dataset.img_labels = dataset.img_labels[:-1]
dataloader = DataLoader(dataset, batch_size=64, shuffle=True)

In [22]:
dataset.__len__()

7908

# Test model


In [8]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
from tqdm import tqdm

In [24]:
dataset_size = len(dataset)
client1, client2, client3, client4 = int(0.25 * dataset_size),int(0.25 * dataset_size),int(0.25 * dataset_size), int(0.25 * dataset_size)



client1_dataset, client2_dataset, client3_dataset, client4_dataset = random_split(dataset, [client1, client2, client3, client4])





# Client 1

In [27]:
train_size = int(0.7 * len(client1_dataset))
val_size = int(0.15 * len(client1_dataset))
test_size = len(client1_dataset) - train_size - val_size
num_epochs = 2

train_dataset, val_dataset, test_dataset = random_split(client1_dataset, [train_size, val_size, test_size])

# Create DataLoader instances for each set
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [28]:
client_1_model = models.resnet18(pretrained=True)
num_ftrs = client_1_model.fc.in_features
client_1_model.fc = nn.Linear(num_ftrs, 2)  # Assuming you have 2 classes
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
client_1_model = client_1_model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(client_1_model.parameters(), lr=0.001)

# Training loop
num_epochs = 2
for epoch in range(num_epochs):
    client_1_model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = client_1_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Train Loss: {epoch_loss}")

    # Evaluation on validation set
    client_1_model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = client_1_model(images)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_accuracy = 100 * val_correct / val_total
    print(f"Validation Accuracy: {val_accuracy:.2f}%")

# Test the model on the test set
client_1_model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = client_1_model(images)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy: {test_accuracy:.2f}%")

Epoch 1/2: 100%|██████████| 22/22 [19:50<00:00, 54.10s/it]


Train Loss: 0.5510490213023177
Validation Accuracy: 71.62%


Epoch 2/2: 100%|██████████| 22/22 [00:25<00:00,  1.16s/it]


Train Loss: 0.30845355926134066
Validation Accuracy: 85.47%
Test Accuracy: 87.58%


# Client 2

In [29]:
train_size = int(0.7 * len(client2_dataset))
val_size = int(0.15 * len(client2_dataset))
test_size = len(client2_dataset) - train_size - val_size
num_epochs = 2

train_dataset, val_dataset, test_dataset = random_split(client2_dataset, [train_size, val_size, test_size])

# Create DataLoader instances for each set
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [30]:
client_2_model = models.resnet18(pretrained=True)
num_ftrs = client_2_model.fc.in_features
client_2_model.fc = nn.Linear(num_ftrs, 2)  # Assuming you have 2 classes
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
client_2_model = client_2_model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(client_2_model.parameters(), lr=0.001)

# Training loop
num_epochs = 2
for epoch in range(num_epochs):
    client_2_model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = client_2_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Train Loss client_2_model: {epoch_loss}")

    # Evaluation on validation set
    client_2_model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = client_2_model(images)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_accuracy = 100 * val_correct / val_total
    print(f"Validation Accuracy client_2_model: {val_accuracy:.2f}%")

# Test the model on the test set
client_2_model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = client_2_model(images)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy client_2_model: {test_accuracy:.2f}%")

Epoch 1/2: 100%|██████████| 22/22 [19:54<00:00, 54.30s/it]


Train Loss client_2_model: 0.5302637593951333
Validation Accuracy client_2_model: 68.92%


Epoch 2/2: 100%|██████████| 22/22 [00:25<00:00,  1.17s/it]


Train Loss client_2_model: 0.2728493737295238
Validation Accuracy client_2_model: 88.51%
Test Accuracy client_2_model: 87.58%


# Client 3

In [31]:
train_size = int(0.7 * len(client1_dataset))
val_size = int(0.15 * len(client1_dataset))
test_size = len(client1_dataset) - train_size - val_size
num_epochs = 2

train_dataset, val_dataset, test_dataset = random_split(client1_dataset, [train_size, val_size, test_size])

# Create DataLoader instances for each set
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [32]:
client_3_model = models.resnet18(pretrained=True)
num_ftrs = client_3_model.fc.in_features
client_3_model.fc = nn.Linear(num_ftrs, 2)  # Assuming you have 2 classes
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
client_3_model = client_3_model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(client_3_model.parameters(), lr=0.001)

# Training loop
num_epochs = 2
for epoch in range(num_epochs):
    client_3_model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = client_3_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Train Loss client_3_model: {epoch_loss}")

    # Evaluation on validation set
    client_3_model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = client_3_model(images)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_accuracy = 100 * val_correct / val_total
    print(f"Validation Accuracy client_3_model: {val_accuracy:.2f}%")

# Test the model on the test set
client_3_model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = client_3_model(images)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy client_3_model: {test_accuracy:.2f}%")

Epoch 1/2: 100%|██████████| 22/22 [00:25<00:00,  1.17s/it]


Train Loss client_3_model: 0.47678458569968685
Validation Accuracy client_3_model: 76.69%


Epoch 2/2: 100%|██████████| 22/22 [00:25<00:00,  1.16s/it]


Train Loss client_3_model: 0.243444893425647
Validation Accuracy client_3_model: 83.45%
Test Accuracy client_3_model: 87.25%


# Client 4

In [33]:
train_size = int(0.7 * len(client1_dataset))
val_size = int(0.15 * len(client1_dataset))
test_size = len(client1_dataset) - train_size - val_size
num_epochs = 2

train_dataset, val_dataset, test_dataset = random_split(client1_dataset, [train_size, val_size, test_size])

# Create DataLoader instances for each set
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [34]:
client_4_model = models.resnet18(pretrained=True)
num_ftrs = client_4_model.fc.in_features
client_4_model.fc = nn.Linear(num_ftrs, 2)  # Assuming you have 2 classes
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
client_4_model = client_4_model.to(device)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(client_4_model.parameters(), lr=0.001)

# Training loop
num_epochs = 2
for epoch in range(num_epochs):
    client_4_model.train()
    running_loss = 0.0
    for images, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}"):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = client_4_model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * images.size(0)
    epoch_loss = running_loss / len(train_loader.dataset)
    print(f"Train Loss client_4_model: {epoch_loss}")

    # Evaluation on validation set
    client_4_model.eval()
    val_correct = 0
    val_total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = client_4_model(images)
            _, predicted = torch.max(outputs, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()
    val_accuracy = 100 * val_correct / val_total
    print(f"Validation Accuracy client_4_model: {val_accuracy:.2f}%")

# Test the model on the test set
client_4_model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = client_4_model(images)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy client_4_model: {test_accuracy:.2f}%")

Epoch 1/2: 100%|██████████| 22/22 [00:25<00:00,  1.16s/it]


Train Loss client_4_model: 0.5318356481739687
Validation Accuracy client_4_model: 75.68%


Epoch 2/2: 100%|██████████| 22/22 [00:25<00:00,  1.16s/it]


Train Loss client_4_model: 0.27034093614126237
Validation Accuracy client_4_model: 88.18%
Test Accuracy client_4_model: 86.24%


# Saving Model Weights

In [36]:
# Get the state_dict of the trained model
client_1_model_weights = client_1_model.state_dict()
client_2_model_weights = client_2_model.state_dict()
client_3_model_weights = client_3_model.state_dict()
client_4_model_weights = client_4_model.state_dict()

# Save the model weights to a file
torch.save(client_1_model_weights, 'client_1_model_weights.pth')
torch.save(client_2_model_weights, 'client_2_model_weights.pth')
torch.save(client_3_model_weights, 'client_3_model_weights.pth')
torch.save(client_4_model_weights, 'client_4_model_weights.pth')



# Weighted Average

In [37]:
weight_client_1 = 0.25
weight_client_2 = 0.25
weight_client_3 = 0.25
weight_client_4 = 0.25

# Perform weighted average of the model weights
weighted_average_weights = {}
for key in client_1_model_weights.keys():
    weighted_average_weights[key] = (
        weight_client_1 * client_1_model_weights[key] +
        weight_client_2 * client_2_model_weights[key] +
        weight_client_3 * client_3_model_weights[key] +
        weight_client_4 * client_4_model_weights[key]
    )

# Save the weighted average model weights to a file
torch.save(weighted_average_weights, 'weighted_average_model_weights.pth')

# Weighted Averaged model

In [40]:
import copy
weighted_model = copy.deepcopy(client_4_model)


In [46]:





device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weighted_model = weighted_model.to(device)

weighted_average_model_weights = torch.load('/content/weighted_average_model_weights.pth')
weighted_model.load_state_dict(weighted_average_model_weights)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(weighted_model.parameters(), lr=0.001)

# Test the model on the test set
weighted_model.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = weighted_model(images)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy weighted_model: {test_accuracy:.2f}%")

Test Accuracy weighted_model: 67.11%


# Normalized weighted scheme

In [42]:
# Validation accuracies of each client model
validation_accuracies = {
    'client_1_model': 85.47,
    'client_2_model': 88.51,
    'client_3_model': 83.45,
    'client_4_model': 88.18
}

# Normalize validation accuracies
total_accuracy = sum(validation_accuracies.values())
normalized_accuracies = {client: accuracy / total_accuracy for client, accuracy in validation_accuracies.items()}

# Assign weights based on normalized accuracies
weights = {client: normalized_accuracy for client, normalized_accuracy in normalized_accuracies.items()}

print("Weighting Scheme:")
for client, weight in weights.items():
    print(f"{client}: {weight:.2f}")


Weighting Scheme:
client_1_model: 0.25
client_2_model: 0.26
client_3_model: 0.24
client_4_model: 0.26


In [43]:
weight_client_1_normalized_validation = 0.25
weight_client_2_normalized_validation = 0.26
weight_client_3_normalized_validation = 0.24
weight_client_4_normalized_validation = 0.26

weighted_average_weights = {}
for key in client_1_model_weights.keys():
    weighted_average_weights[key] = (
        weight_client_1_normalized_validation * client_1_model_weights[key] +
        weight_client_2_normalized_validation * client_2_model_weights[key] +
        weight_client_3_normalized_validation * client_3_model_weights[key] +
        weight_client_4_normalized_validation * client_4_model_weights[key]
    )

# Save the weighted average model weights to a file
torch.save(weighted_average_weights, 'Normalized_validation_weighted_average_model_weights.pth')

# Weighted Model - Normalized Validation scheme for weights

In [44]:
weighted_model_NV = copy.deepcopy(client_4_model)

In [47]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
weighted_model_NV = weighted_model_NV.to(device)

weighted_average_model_weights = torch.load('/content/Normalized_validation_weighted_average_model_weights.pth')
weighted_model_NV.load_state_dict(weighted_average_model_weights)

# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(weighted_model_NV.parameters(), lr=0.001)

# Test the model on the test set
weighted_model_NV.eval()
test_correct = 0
test_total = 0
with torch.no_grad():
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = weighted_model_NV(images)
        _, predicted = torch.max(outputs, 1)
        test_total += labels.size(0)
        test_correct += (predicted == labels).sum().item()
test_accuracy = 100 * test_correct / test_total
print(f"Test Accuracy weighted_model_NV: {test_accuracy:.2f}%")

Test Accuracy weighted_model_NV: 67.11%


In [48]:
import shutil

In [53]:

source_path = '/content/Normalized_validation_weighted_average_model_weights.pth'
destination_path = '/content/drive/MyDrive/Thesis/'
shutil.move(source_path, destination_path)


'/content/drive/MyDrive/Thesis/Normalized_validation_weighted_average_model_weights.pth'