<a href="https://colab.research.google.com/github/sofia4009/Oral-disease/blob/main/Train_Test_KaggleDS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
"""Author: Sofia
Date: March 26
Subject: Oral Diseases Classification
Description:
Classification of a dataset named ds_A from kaggle:  https://www.kaggle.com/datasets/salmansajid05/oral-diseases
  - without data augmentation
  - normalise all image pixel values to the range [0, 1]
  - resize all images to dimensions 112x112x3 for speeding up the training
  - use stratified k-fold cross validation (with k = 5) to split ds_A
  - use the following deep neural network models for training, testing and comparing their performance:
    - ResNet18, ResNet50, ConvNeXt, EfficientNetB0, Transformers (i.e., ViT)
  - use categorical cross entropy as the loss function and f1_score as evaluation metric

"""

'Author: Sofia\nDate: March 26\nSubject: Oral Diseases Classification\nDescription:\nClassification of a dataset named ds_A from kaggle:  https://www.kaggle.com/datasets/salmansajid05/oral-diseases\n  - without data augmentation\n  - normalise all image pixel values to the range [0, 1]\n  - resize all images to dimensions 112x112x3 for speeding up the training\n  - use stratified k-fold cross validation (with k = 5) to split ds_A\n  - use the following deep neural network models for training, testing and comparing their performance:\n    - ResNet18, ResNet50, ConvNeXt, EfficientNetB0, Transformers (i.e., ViT)\n  - use categorical cross entropy as the loss function and f1_score as evaluation metric\n\n'

# Installation and adding packages

In [None]:
!pip install opendatasets

# Install kaggle API client
!pip install -q kaggle

Collecting opendatasets
  Downloading opendatasets-0.1.22-py3-none-any.whl (15 kB)
Installing collected packages: opendatasets
Successfully installed opendatasets-0.1.22


In [None]:
import pandas as pd
import numpy as np
import os
from PIL import Image
import cv2
import zipfile

from google.colab import drive
drive.mount('/content/drive')

from google.colab import files

Mounted at /content/drive


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score
import matplotlib.pyplot as plt
from pathlib import Path
from openpyxl import load_workbook
from openpyxl.drawing.image import Image as xlImage
from openpyxl import Workbook

# Initializations

In [None]:
# Normalizing them to [0,1], Resizing to 112,112, 3
def normalize_and_resize_image(image, target_size):

    # Resize image into 112*112*3
    image = image.resize(target_size)

    # Normalize pixel values to the range [0, 1]
    image = np.array(image)
    image = image / 255.0

    # Convert the normalized numpy array back to PIL image
    image = Image.fromarray((image * 255).astype(np.uint8))

    return image

In [None]:
image_size = (112,112)

# Specify the root directory where your images are located
main_DS_directory = '/content/oral-diseases'

# Specify the output directory to save the processed images
Preprocessed_DS_directory = '/content/decreased_oral_diseases'

# Specify the output directory to save the excel file
results_directory = '/content/drive/My Drive/QM/results.xlsx'
Plot_directory = '/content/drive/My Drive/QM/plots.xlsx'

In [None]:
files.upload() #To prompt to upload the kaggle.json

# kaggle API client expects the file to be in ~/.kaggle
# so move it there
!mkdir -p ~/.kaggle
!cp kaggle.json ~/.kaggle/

# we need to set permissions
!chmod 600 /root/.kaggle/kaggle.json

# check your directory before downloading the datasets
!pwd

# download the required dataset from kaggle
!kaggle datasets download -d salmansajid05/oral-diseases

with zipfile.ZipFile('oral-diseases.zip', "r") as z:
    z.extractall(main_DS_directory)

Saving kaggle.json to kaggle.json
/content
Dataset URL: https://www.kaggle.com/datasets/salmansajid05/oral-diseases
License(s): unknown
Downloading oral-diseases.zip to /content
 99% 233M/235M [00:08<00:00, 34.0MB/s]
100% 235M/235M [00:08<00:00, 27.8MB/s]


In [None]:
# Normalizing them to [0,1], Resizing to 112,112, 3
def normalize_and_resize_image(image, target_size):

    # Resize image into 112*112*3
    image = image.resize(target_size)

    # Normalize pixel values to the range [0, 1]
    image = np.array(image)
    image = image / 255.0

    # Convert the normalized numpy array back to PIL image
    image = Image.fromarray((image * 255).astype(np.uint8))

    return image

# Saving the normalized images into a new directory in Google colab with the same subdirectories and structure
def process_images_in_directory(directory, Preprocessed_DS_directory):
    for root, dirs, files in os.walk(directory):
        for file in files:
            # Check if the file has an image extension
            if file.lower().endswith(('.png', '.jpg', '.jpeg', '.gif', '.bmp')):
                # Construct the full path to the input image file
                image_path = os.path.join(root, file)
                if 'augmented' not in image_path and 'Caries_Gingivitus_ToothDiscoloration_Ulcer-yolo_annotated-Dataset' not in image_path:
                  # Open the image using PIL
                  image = Image.open(image_path)

                  # Ensure image is in RGB mode
                  image = image.convert("RGB")

                  # Normalize and Resize the image
                  #print(image_path)
                  processed_image = normalize_and_resize_image(image, image_size)

                  # Construct the full path to the output directory
                  output_subdirectory = os.path.relpath(root, directory)
                  output_path = os.path.join(Preprocessed_DS_directory, output_subdirectory)
                  os.makedirs(output_path, exist_ok=True)

                  # Save the processed image
                  filename = os.path.join(output_path, file)
                  processed_image.save(filename, format='JPEG')  # Change 'JPEG' to the desired format
                  #print(filename)
                  #if filename.find('augmented') == -1 or filename.find('Caries_Gingivitus_ToothDiscoloration_Ulcer-yolo_annotated-Dataset') == -1:
                  #np.save(filename, image)

In [None]:
# Call the function to process images in the directory
process_images_in_directory(main_DS_directory, Preprocessed_DS_directory)

In [None]:
file_count = sum(len(files) for _, _, files in os.walk(main_DS_directory))
print(f"Number of Original Images: {file_count}")

file_count = sum(len(files) for _, _, files in os.walk(Preprocessed_DS_directory))
print(f"Number of Preprocessed Images: {file_count}")

Number of Original Images: 15373
Number of Preprocessed Images: 5563


# Initializations for training

Define transformations by Composing several transforms together

In [None]:
transform = transforms.Compose([
    transforms.Resize(image_size),
    #Convert a PIL Image or ndarray to tensor
    transforms.ToTensor()
])

In [None]:
# Use ImageFolder to load your dataset
dataset = torchvision.datasets.ImageFolder(root = Preprocessed_DS_directory, transform=transform)

Preprocessed_DS_directory, transform=transform)

In [None]:
# Define number of classes and list of labels in the dataset
num_classes = len(dataset.classes)
classes = dataset.classes

In [None]:
# Define k-fold for cross-validation
k_folds = 5

In [None]:
# Seed (random_state) is set to initialize the random number generator while splitting the DataSet into k folds
skf = StratifiedKFold(n_splits=k_folds, shuffle=True, random_state=42)

In [None]:
# Define hyperparameters to search over
learning_rates = [0.001, 0.0001]
batch_sizes = [32, 64, 128]
optimizers = ['Adam', 'SGD', 'RMSprop']

In [None]:
epochs = 35

In [None]:
best_model = None
best_f1 = 0.0
best_hyperparameters = None

In [None]:
#print(classes)
class_counts = {}
for class_name in os.listdir(Preprocessed_DS_directory):
    # Construct the full path to the class directory
    class_directory = os.path.join(Preprocessed_DS_directory, class_name)

    # Check if the path is a directory
    if os.path.isdir(class_directory):
        # Count the number of files in the class directory
        num_files = len(os.listdir(class_directory))

        # Store the count in the class_counts dictionary
        class_counts[class_name] = num_files

# ConvNexT

In [None]:
"""# ConvNeXt"""

# An empty list for storing the results
results = []
f1_vals = [0] * k_folds
results.append({'Model': 'ConvNeXt'})

# Initialize lists to store training and validation losses and accuracies
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

num_epochs = epochs

for lr in learning_rates:
    for batch_size in batch_sizes:
      for optimizer_name in optimizers:
        print("Training with optimizer: " + optimizer_name + ", learning rate: " + str(lr) + ", batch size: " + str(batch_size))

#        print(f"Training with optimizer: {optimizer_name}, learning rate: {lr}, batch size: {batch_size}")
        for fold, (train_idx, val_idx) in enumerate(skf.split(dataset.imgs, dataset.targets)):
            train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
            val_sampler = torch.utils.data.SubsetRandomSampler(val_idx)

            #train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
            #val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)

            train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler, pin_memory=True)
            val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=val_sampler, pin_memory=True)

            best_f1 = 0

            # Define the neural network

            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            model = torchvision.models.convnext_small(input_shape=image_size, num_classes=num_classes).to(device)

            #model = torchvision.models.convnext_small(input_shape=image_size, num_classes=num_classes)

            # Define loss function
            criterion = nn.CrossEntropyLoss()

            # Define optimizer
            if optimizer_name == 'Adam':
                optimizer = optim.Adam(model.parameters(), lr=lr)
            elif optimizer_name == 'SGD':
                optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
            elif optimizer_name == 'RMSprop':
                optimizer = optim.RMSprop(model.parameters(), lr=lr)

            # Train and evaluate
            for epoch in range(num_epochs):
                # Training loop
                model.train()
                running_loss = 0.0
                correct = 0
                total = 0
                for inputs, labels in train_loader:

                    inputs, labels = inputs.to(device), labels.to(device)
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                    _, preds = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (preds == labels).sum().item()
                epoch_train_loss = running_loss / len(train_loader)
                epoch_train_accuracy = correct / total
                train_losses.append(epoch_train_loss)
                train_accuracies.append(epoch_train_accuracy)

                # Validation loop
                model.eval()
                all_preds = []
                all_labels = []
                running_loss = 0.0
                correct = 0
                total = 0
                with torch.no_grad():
                   for inputs, labels in val_loader:

                      inputs, labels = inputs.to(device), labels.to(device)
                      outputs = model(inputs)
                      loss = criterion(outputs, labels)
                      running_loss += loss.item()
                      _, preds = torch.max(outputs, 1)
                      total += labels.size(0)
                      correct += (preds == labels).sum().item()
                      all_preds.extend(preds.cpu().numpy())
                      all_labels.extend(labels.cpu().numpy())
                epoch_val_loss = running_loss / len(val_loader)
                epoch_val_accuracy = correct / total
                val_losses.append(epoch_val_loss)
                val_accuracies.append(epoch_val_accuracy)

                # Calculate F1 score
                f1 = f1_score(all_labels, all_preds, average='macro')
                #print(f"Fold {fold+1}, Epoch {epoch+1}, F1 Score: {f1}")
                print("Fold " + str(fold+1) + ", Epoch " + str(epoch+1) + ", F1 Score: " + str(f1))
                # Update best F1 score and model
                if f1 > best_f1:
                  best_f1 = f1

            f1_vals[fold] = best_f1
            results.append({
                    'model': 'ConvNeXt_small',
                    'Optimizer': optimizer_name,
                    'batch_size': batch_size,
                    'learning_rate': lr,
                    'Fold': fold+1,
                    'F1 Score': best_f1
                    })
        results.append({
                  'min f1': min(f1_vals),
                  'max f1': max(f1_vals),
                  'average f1': sum(f1_vals)/len(f1_vals)
                  })

        # Save the results as a DataFrame to be saved in an Excel file
        df = pd.DataFrame(results)

        # Load an existing Excel file
        if os.path.isfile(results_directory):
            existing_file = pd.read_excel(results_directory)
            # Append the new DataFrame to the existing file
            df = pd.concat([existing_file, df])

        # Write the DataFrame to the excel file in the directory
        df.to_excel(results_directory, index=False)
# -------------- End of Save the results in a excel file ----------------



In [None]:
# ----------------- Save the results in a excel file ---------------------
# Save the results as a DataFrame to be saved in an Excel file
df = pd.DataFrame(results)

# Load an existing Excel file
if os.path.isfile(results_directory):
  existing_file = pd.read_excel(results_directory)
  # Append the new DataFrame to the existing file
  df = pd.concat([existing_file, df])

# Write the DataFrame to the excel file in the directory
df.to_excel(results_directory, index=False)
# -------------- End of Save the results in a excel file ----------------

# -------------------- Plot the error/epoch plot ------------------------

# Define the filename for the Excel file
excel_filename = Plot_directory

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(excel_filename), exist_ok=True)

# Check if the Excel file exists
if os.path.exists(excel_filename):
    # Load existing Excel file
    wb = load_workbook(excel_filename)
    ws = wb.active
else:
    # Create a new Excel workbook
    wb = Workbook()
    ws = wb.active

# Transformer: ViT

In [None]:
# Define transformations with a smaller image size
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Reduce image size
    transforms.ToTensor()
])

In [None]:
# Use ImageFolder to load your dataset
dataset = torchvision.datasets.ImageFolder(root=Preprocessed_DS_directory, transform=transform)

In [None]:
# Initialize lists to store training and validation losses and accuracies
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

In [None]:
"""#Transformer: ViT"""

from transformers import ViTModel, ViTConfig

# Define transformations by Composing several transforms together
# and change th image_size to (224*224) to be compatible with ViT
transform = transforms.Compose([
    transforms.Resize(224*224),
    #Convert a PIL Image or ndarray to tensor
    transforms.ToTensor()
])


# Use ImageFolder to load your dataset
dataset = torchvision.datasets.ImageFolder(root = Preprocessed_DS_directory, transform=transform)

# Define number of classes and list of labels in the dataset
num_classes = len(dataset.classes)
classes = dataset.classes

# An empty list for storing the results
results = []
f1_vals = [0] * k_folds
results.append({'Model': 'Transformr: ViT'})

# Initialize lists to store training and validation losses and accuracies
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

num_epochs = epochs

for batch_size in batch_sizes:
    for lr in learning_rates:
      for optimizer_name in optimizers:
        #print(f"Training with optimizer: {optimizer_name}, learning rate: {lr}, batch size: {batch_size}")
        print("Training with optimizer: " + optimizer_name + ", learning rate: " + str(lr) + ", batch size: " + str(batch_size))
        for fold, (train_idx, val_idx) in enumerate(skf.split(dataset.imgs, dataset.targets)):
            train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
            val_sampler = torch.utils.data.SubsetRandomSampler(val_idx)

            train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
            val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)

            best_f1 = 0

            # Define the device (GPU if available, otherwise CPU)
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

            # Define the neural network
            config = ViTConfig.from_pretrained('google/vit-base-patch16-224-in21k')
            model = ViTModel(config).to(device)  # Move the model to GPU

            #model = ViTModel(config)

            # Define loss function
            criterion = nn.CrossEntropyLoss()

            # Define optimizer
            if optimizer_name == 'Adam':
                optimizer = optim.Adam(model.parameters(), lr=lr)
            elif optimizer_name == 'SGD':
                optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
            elif optimizer_name == 'RMSprop':
                optimizer = optim.RMSprop(model.parameters(), lr=lr)

            # Train and evaluate
            for epoch in range(num_epochs):
                # Training loop
                model.train()
                running_loss = 0.0
                correct = 0
                total = 0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device) # Move inputs and labels to GPU
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                    _, preds = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (preds == labels).sum().item()
                epoch_train_loss = running_loss / len(train_loader)
                epoch_train_accuracy = correct / total
                train_losses.append(epoch_train_loss)
                train_accuracies.append(epoch_train_accuracy)

                # Validation loop
                model.eval()
                all_preds = []
                all_labels = []
                running_loss = 0.0
                correct = 0
                total = 0
                with torch.no_grad():
                  for inputs, labels in val_loader:
                    inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to GPU
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    running_loss += loss.item()
                    _, preds = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (preds == labels).sum().item()
                    all_preds.extend(preds.cpu().numpy())
                    all_labels.extend(labels.cpu().numpy())
                epoch_val_loss = running_loss / len(val_loader)
                epoch_val_accuracy = correct / total
                val_losses.append(epoch_val_loss)
                val_accuracies.append(epoch_val_accuracy)

                # Calculate F1 score
                f1 = f1_score(all_labels, all_preds, average='macro')
                #print(f"Fold {fold+1}, Epoch {epoch+1}, F1 Score: {f1}")
                print("Fold " + str(fold+1) + ", Epoch " + str(epoch+1) + ", F1 Score: " + str(f1))
                # Update best F1 score and model
                if f1 > best_f1:
                  best_f1 = f1

            f1_vals[fold] = best_f1
            results.append({
                    'Optimizer': optimizer_name,
                    'batch_size': batch_size,
                    'learning_rate': lr,
                    'Fold': fold+1,
                    'F1 Score': best_f1
              })
        results.append({
                  'min f1': min(f1_vals),
                  'max f1': max(f1_vals),
                  'average f1': sum(f1_vals)/len(f1_vals)
                  })

        # Define the filename for the Excel file
        excel_filename = Plot_directory

        # Create the directory if it doesn't exist
        os.makedirs(os.path.dirname(excel_filename), exist_ok=True)

        # Check if the Excel file exists
        if os.path.exists(excel_filename):
            # Load existing Excel file
            wb = load_workbook(excel_filename)
            ws = wb.active
        else:
            # Create a new Excel workbook
            wb = Workbook()
            ws = wb.active


Training with optimizer: Adam, learning rate: 0.001, batch size: 32


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/502 [00:00<?, ?B/s]

In [None]:
        # Save the results as a DataFrame to be saved in an Excel file
        df = pd.DataFrame(results)

        # Load an existing Excel file
        if os.path.isfile(results_directory):
            existing_file = pd.read_excel(results_directory)
            # Append the new DataFrame to the existing file
            df = pd.concat([existing_file, df])

        # Write the DataFrame to the excel file in the directory
        df.to_excel(results_directory, index=False)
# -------------------- Plot the error/epoch plot ------------------------

# Define the filename for the Excel file
excel_filename = Plot_directory

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(excel_filename), exist_ok=True)

# Check if the Excel file exists
if os.path.exists(excel_filename):
    # Load existing Excel file
    wb = load_workbook(excel_filename)
    ws = wb.active
else:
    # Create a new Excel workbook
    wb = Workbook()
    ws = wb.active

# ResNet18

In [None]:

"""# ResNet18"""

from torchvision.models import resnet18

# An empty list for storing the results
results = []
f1_vals = [0] * k_folds
results.append({'Model': 'ResNet18'})

# Initialize lists to store training and validation losses and accuracies
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

epochs = 10
num_epochs = epochs

for lr in learning_rates:
    for batch_size in batch_sizes:
      for optimizer_name in optimizers:
        print(f"Training with optimizer: {optimizer_name}, learning rate: {lr}, batch size: {batch_size}")
        for fold, (train_idx, val_idx) in enumerate(skf.split(dataset.imgs, dataset.targets)):
            train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
            val_sampler = torch.utils.data.SubsetRandomSampler(val_idx)

            train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
            val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)

            best_f1 = 0
            # Define the device (GPU if available, otherwise CPU)
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
            print(torch.cuda.is_available())

            # Define the neural network
            model = resnet18(weights='ResNet18_Weights.DEFAULT')
            num_ftrs = model.fc.in_features
            model.fc = nn.Linear(num_ftrs, num_classes)
            model = model.to(device) # Move the model to GPU

            # Define loss function
            criterion = nn.CrossEntropyLoss()

            # Define optimizer
            if optimizer_name == 'Adam':
                optimizer = optim.Adam(model.parameters(), lr=lr)
            elif optimizer_name == 'SGD':
                optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
            elif optimizer_name == 'RMSprop':
                optimizer = optim.RMSprop(model.parameters(), lr=lr)

            # Train and evaluate
            for epoch in range(num_epochs):
                # Training loop
                model.train()
                running_loss = 0.0
                correct = 0
                total = 0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device) # Move inputs and labels to GPU
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                    _, preds = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (preds == labels).sum().item()
                epoch_train_loss = running_loss / len(train_loader)
                epoch_train_accuracy = correct / total
                train_losses.append(epoch_train_loss)
                train_accuracies.append(epoch_train_accuracy)

                # Validation loop
                model.eval()
                all_preds = []
                all_labels = []
                running_loss = 0.0
                correct = 0
                total = 0
                with torch.no_grad():
                  for inputs, labels in val_loader:
                      inputs, labels = inputs.to(device), labels.to(device) # Move inputs and labels to GPU
                      outputs = model(inputs)
                      loss = criterion(outputs, labels)
                      running_loss += loss.item()
                      _, preds = torch.max(outputs, 1)
                      total += labels.size(0)
                      correct += (preds == labels).sum().item()
                      all_preds.extend(preds.cpu().numpy())
                      all_labels.extend(labels.cpu().numpy())
                epoch_val_loss = running_loss / len(val_loader)
                epoch_val_accuracy = correct / total
                val_losses.append(epoch_val_loss)
                val_accuracies.append(epoch_val_accuracy)

                # Calculate F1 score
                f1 = f1_score(all_labels, all_preds, average='macro')
                print(f"Fold {fold+1}, Epoch {epoch+1}, F1 Score: {f1}")

                # Update best F1 score and model
                if f1 > best_f1:
                  best_f1 = f1

            f1_vals[fold] = best_f1
            results.append({
                    'Optimizer': optimizer_name,
                    'batch_size': batch_size,
                    'learning_rate': lr,
                    'Fold': fold+1,
                    'F1 Score': best_f1
              })
        results.append({
                  'min f1': min(f1_vals),
                  'max f1': max(f1_vals),
                  'average f1': sum(f1_vals)/len(f1_vals)
                  })



Training with optimizer: Adam, learning rate: 0.001, batch size: 32
True
Fold 1, Epoch 1, F1 Score: 0.5804181163491933
Fold 1, Epoch 2, F1 Score: 0.6015906261063725
Fold 1, Epoch 3, F1 Score: 0.6147059343740789
Fold 1, Epoch 4, F1 Score: 0.7092898957068207
Fold 1, Epoch 5, F1 Score: 0.6944780524017036
Fold 1, Epoch 6, F1 Score: 0.729028123100823
Fold 1, Epoch 7, F1 Score: 0.7589068166981642
Fold 1, Epoch 8, F1 Score: 0.7522300886659014
Fold 1, Epoch 9, F1 Score: 0.5546282480526946
Fold 1, Epoch 10, F1 Score: 0.7395837087596105
True
Fold 2, Epoch 1, F1 Score: 0.6053821511114921
Fold 2, Epoch 2, F1 Score: 0.6033835530360835
Fold 2, Epoch 3, F1 Score: 0.6527047430971503
Fold 2, Epoch 4, F1 Score: 0.5273281879846695
Fold 2, Epoch 5, F1 Score: 0.2857838214934032
Fold 2, Epoch 6, F1 Score: 0.7256350259204031
Fold 2, Epoch 7, F1 Score: 0.7331283014560951
Fold 2, Epoch 8, F1 Score: 0.695354276735011
Fold 2, Epoch 9, F1 Score: 0.7079495992434349
Fold 2, Epoch 10, F1 Score: 0.7588148790059726
Tr

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Fold 1, Epoch 1, F1 Score: 0.494345880415841
Fold 1, Epoch 2, F1 Score: 0.698405654964858
Fold 1, Epoch 3, F1 Score: 0.5421223958490567
Fold 1, Epoch 4, F1 Score: 0.7069827099071949
Fold 1, Epoch 5, F1 Score: 0.7169790732800796
Fold 1, Epoch 6, F1 Score: 0.6740104284706474
Fold 1, Epoch 7, F1 Score: 0.6637087430639167
Fold 1, Epoch 8, F1 Score: 0.6608814278292904
Fold 1, Epoch 9, F1 Score: 0.7391552778162082
Fold 1, Epoch 10, F1 Score: 0.7313812797104972
True
Fold 2, Epoch 1, F1 Score: 0.6319143555917446
Fold 2, Epoch 2, F1 Score: 0.7015796247829228
Fold 2, Epoch 3, F1 Score: 0.6172155702927782
Fold 2, Epoch 4, F1 Score: 0.6628841676270466
Fold 2, Epoch 5, F1 Score: 0.655875952147994
Fold 2, Epoch 6, F1 Score: 0.6777118375431667
Fold 2, Epoch 7, F1 Score: 0.7748763641292403
Fold 2, Epoch 8, F1 Score: 0.7404980509462312
Fold 2, Epoch 9, F1 Score: 0.7117888432524332
Fold 2, Epoch 10, F1 Score: 0.7397700017750294
True
Fold 3, Epoch 1, F1 Score: 0.5968791074634109
Fold 3, Epoch 2, F1 Score

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Fold 4, Epoch 1, F1 Score: 0.5699933564919466
Fold 4, Epoch 2, F1 Score: 0.6641864524437252
Fold 4, Epoch 3, F1 Score: 0.6291192703696653
Fold 4, Epoch 4, F1 Score: 0.65682721326365
Fold 4, Epoch 5, F1 Score: 0.692203334783735
Fold 4, Epoch 6, F1 Score: 0.5292004606021847
Fold 4, Epoch 7, F1 Score: 0.6855584508983235
Fold 4, Epoch 8, F1 Score: 0.7255828321779766
Fold 4, Epoch 9, F1 Score: 0.7460886723846244
Fold 4, Epoch 10, F1 Score: 0.759604039741058
True
Fold 5, Epoch 1, F1 Score: 0.5883320192663312
Fold 5, Epoch 2, F1 Score: 0.5193335433594939
Fold 5, Epoch 3, F1 Score: 0.6520712174258672
Fold 5, Epoch 4, F1 Score: 0.6644157524655928
Fold 5, Epoch 5, F1 Score: 0.6149888608073636
Fold 5, Epoch 6, F1 Score: 0.6560386017724181
Fold 5, Epoch 7, F1 Score: 0.7002186089583159
Fold 5, Epoch 8, F1 Score: 0.7264774021822031
Fold 5, Epoch 9, F1 Score: 0.7354978399848452
Fold 5, Epoch 10, F1 Score: 0.6784720341778385
Training with optimizer: SGD, learning rate: 0.001, batch size: 64
True
Fold 

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Fold 1, Epoch 1, F1 Score: 0.6668641444456109
Fold 1, Epoch 2, F1 Score: 0.5937283077344528
Fold 1, Epoch 3, F1 Score: 0.6574287845245431
Fold 1, Epoch 4, F1 Score: 0.727634664650509
Fold 1, Epoch 5, F1 Score: 0.716149715230809
Fold 1, Epoch 6, F1 Score: 0.7100861179751989
Fold 1, Epoch 7, F1 Score: 0.7356056007875732
Fold 1, Epoch 8, F1 Score: 0.7456811192163167
Fold 1, Epoch 9, F1 Score: 0.5965359070369819
Fold 1, Epoch 10, F1 Score: 0.6979353061766919
True
Fold 2, Epoch 1, F1 Score: 0.6648344671832523
Fold 2, Epoch 2, F1 Score: 0.6756689793080161
Fold 2, Epoch 3, F1 Score: 0.6084026922892463
Fold 2, Epoch 4, F1 Score: 0.6552560272050398
Fold 2, Epoch 5, F1 Score: 0.7626010839223295
Fold 2, Epoch 6, F1 Score: 0.7309639421683684
Fold 2, Epoch 7, F1 Score: 0.7085274764194497
Fold 2, Epoch 8, F1 Score: 0.6866166359860606
Fold 2, Epoch 9, F1 Score: 0.7367980395005637
Fold 2, Epoch 10, F1 Score: 0.7257112879066638
True
Fold 3, Epoch 1, F1 Score: 0.6337785473091181
Fold 3, Epoch 2, F1 Scor

  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Fold 4, Epoch 1, F1 Score: 0.6389538506746694
Fold 4, Epoch 2, F1 Score: 0.5853502133953152
Fold 4, Epoch 3, F1 Score: 0.7110700933120305
Fold 4, Epoch 4, F1 Score: 0.7098214193473026
Fold 4, Epoch 5, F1 Score: 0.7011396832824971
Fold 4, Epoch 6, F1 Score: 0.6337288587932889
Fold 4, Epoch 7, F1 Score: 0.7169870256124425
Fold 4, Epoch 8, F1 Score: 0.7352709146717267
Fold 4, Epoch 9, F1 Score: 0.7800296514010704
Fold 4, Epoch 10, F1 Score: 0.6532785427032013
True
Fold 5, Epoch 1, F1 Score: 0.6430881364078912
Fold 5, Epoch 2, F1 Score: 0.7458367735552018
Fold 5, Epoch 3, F1 Score: 0.7172541858225129
Fold 5, Epoch 4, F1 Score: 0.714685519376724
Fold 5, Epoch 5, F1 Score: 0.6924959355656793
Fold 5, Epoch 6, F1 Score: 0.7043948980044274
Fold 5, Epoch 7, F1 Score: 0.6182839287750016
Fold 5, Epoch 8, F1 Score: 0.7302518732092068
Fold 5, Epoch 9, F1 Score: 0.707701776593956
Fold 5, Epoch 10, F1 Score: 0.7346124394698057
Training with optimizer: SGD, learning rate: 0.001, batch size: 128
True
Fo

In [None]:
# ----------------- Save the results in a excel file ---------------------
# Save the results as a DataFrame to be saved in an Excel file
df = pd.DataFrame(results)

# Load an existing Excel file
if os.path.isfile(results_directory):
  existing_file = pd.read_excel(results_directory)
  # Append the new DataFrame to the existing file
  df = pd.concat([existing_file, df])

# Write the DataFrame to the excel file in the directory
df.to_excel(results_directory, index=False)
# -------------- End of Save the results in a excel file ----------------

# -------------------- Plot the error/epoch plot ------------------------

# Define the filename for the Excel file
excel_filename = Plot_directory

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(excel_filename), exist_ok=True)

# Check if the Excel file exists
if os.path.exists(excel_filename):
    # Load existing Excel file
    wb = load_workbook(excel_filename)
    ws = wb.active
else:
    # Create a new Excel workbook
    wb = Workbook()
    ws = wb.active

# ResNet50

In [None]:

"""#ResNet50"""

from torchvision.models import resnet50

# An empty list for storing the results
results = []
f1_vals = [0] * k_folds
results.append({'Model': 'ResNet50'})

# Initialize lists to store training and validation losses and accuracies
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

epochs = 10
# Define hyperparameters to search over
learning_rates = [0.0001]
batch_sizes = [32]
optimizers = ['Adam', 'SGD', 'RMSprop']
num_epochs = epochs

for lr in learning_rates:
    for batch_size in batch_sizes:
      for optimizer_name in optimizers:
        print(f"Training with optimizer: {optimizer_name}, learning rate: {lr}, batch size: {batch_size}")
        for fold, (train_idx, val_idx) in enumerate(skf.split(dataset.imgs, dataset.targets)):
            train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
            val_sampler = torch.utils.data.SubsetRandomSampler(val_idx)

            train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
            val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)

            best_f1 = 0
            # Define the device (GPU if available, otherwise CPU)
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

            # Define the neural network
            model = resnet50(weights='ResNet50_Weights.DEFAULT')
            num_ftrs = model.fc.in_features
            model.fc = nn.Linear(num_ftrs, num_classes)
            model = model.to(device) # Move the model to GPU

            # Define loss function
            criterion = nn.CrossEntropyLoss()

            # Define optimizer
            if optimizer_name == 'Adam':
                optimizer = optim.Adam(model.parameters(), lr=lr)
            elif optimizer_name == 'SGD':
                optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
            elif optimizer_name == 'RMSprop':
                optimizer = optim.RMSprop(model.parameters(), lr=lr)

            # Train and evaluate
            for epoch in range(num_epochs):
                # Training loop
                model.train()
                running_loss = 0.0
                correct = 0
                total = 0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device) # Move inputs and labels to GPU
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                    _, preds = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (preds == labels).sum().item()
                epoch_train_loss = running_loss / len(train_loader)
                epoch_train_accuracy = correct / total
                train_losses.append(epoch_train_loss)
                train_accuracies.append(epoch_train_accuracy)

                # Validation loop
                model.eval()
                all_preds = []
                all_labels = []
                running_loss = 0.0
                correct = 0
                total = 0
                with torch.no_grad():
                  for inputs, labels in val_loader:
                      inputs, labels = inputs.to(device), labels.to(device) # Move inputs and labels to GPU
                      outputs = model(inputs)
                      loss = criterion(outputs, labels)
                      running_loss += loss.item()
                      _, preds = torch.max(outputs, 1)
                      total += labels.size(0)
                      correct += (preds == labels).sum().item()
                      all_preds.extend(preds.cpu().numpy())
                      all_labels.extend(labels.cpu().numpy())
                epoch_val_loss = running_loss / len(val_loader)
                epoch_val_accuracy = correct / total
                val_losses.append(epoch_val_loss)
                val_accuracies.append(epoch_val_accuracy)

                # Calculate F1 score
                f1 = f1_score(all_labels, all_preds, average='macro')
                print(f"Fold {fold+1}, Epoch {epoch+1}, F1 Score: {f1}")

                # Update best F1 score and model
                if f1 > best_f1:
                  best_f1 = f1

            f1_vals[fold] = best_f1
            results.append({
                    'Optimizer': optimizer_name,
                    'batch_size': batch_size,
                    'learning_rate': lr,
                    'Fold': fold+1,
                    'F1 Score': best_f1
              })
        results.append({
                  'min f1': min(f1_vals),
                  'max f1': max(f1_vals),
                  'average f1': sum(f1_vals)/len(f1_vals)
                  })

Training with optimizer: Adam, learning rate: 0.0001, batch size: 32


Downloading: "https://download.pytorch.org/models/resnet50-11ad3fa6.pth" to /root/.cache/torch/hub/checkpoints/resnet50-11ad3fa6.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 172MB/s]
  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Fold 1, Epoch 1, F1 Score: 0.5690166973267317
Fold 1, Epoch 2, F1 Score: 0.7611061146603341
Fold 1, Epoch 3, F1 Score: 0.7805996053864265
Fold 1, Epoch 4, F1 Score: 0.7930787792076676
Fold 1, Epoch 5, F1 Score: 0.8178857888308451
Fold 1, Epoch 6, F1 Score: 0.8099341231472881
Fold 1, Epoch 7, F1 Score: 0.8094702416521652
Fold 1, Epoch 8, F1 Score: 0.8095437629483605
Fold 1, Epoch 9, F1 Score: 0.8116510494370592
Fold 1, Epoch 10, F1 Score: 0.8313789312159102
Fold 2, Epoch 1, F1 Score: 0.5640797767739741
Fold 2, Epoch 2, F1 Score: 0.7432720669094849
Fold 2, Epoch 3, F1 Score: 0.7709733944769952
Fold 2, Epoch 4, F1 Score: 0.7731964138501396
Fold 2, Epoch 5, F1 Score: 0.775479575343636
Fold 2, Epoch 6, F1 Score: 0.7894467927457135
Fold 2, Epoch 7, F1 Score: 0.7955602100610318
Fold 2, Epoch 8, F1 Score: 0.7832828511879283
Fold 2, Epoch 9, F1 Score: 0.7964430553988725
Fold 2, Epoch 10, F1 Score: 0.8034643998069059
Fold 3, Epoch 1, F1 Score: 0.5386825841736553
Fold 3, Epoch 2, F1 Score: 0.7507

In [None]:
# ----------------- Save the results in a excel file ---------------------
# Save the results as a DataFrame to be saved in an Excel file
df = pd.DataFrame(results)

# Load an existing Excel file
if os.path.isfile(results_directory):
  existing_file = pd.read_excel(results_directory)
  # Append the new DataFrame to the existing file
  df = pd.concat([existing_file, df])

# Write the DataFrame to the excel file in the directory
df.to_excel(results_directory, index=False)
# -------------- End of Save the results in a excel file ----------------

# -------------------- Plot the error/epoch plot ------------------------

# Define the filename for the Excel file
excel_filename = Plot_directory

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(excel_filename), exist_ok=True)

# Check if the Excel file exists
if os.path.exists(excel_filename):
    # Load existing Excel file
    wb = load_workbook(excel_filename)
    ws = wb.active
else:
    # Create a new Excel workbook
    wb = Workbook()
    ws = wb.active

"\n# Append data to the DataFrame (train_losses, val_losses, train_accuracies, val_accuracies are assumed to be lists)\ndf['Train Loss'] = train_losses\ndf['Val Loss'] = val_losses\ndf['Train Accuracy'] = train_accuracies\ndf['Val Accuracy'] = val_accuracies\nnum_epochs = train_losses\n"

# EfficientNetB0

In [None]:
"""# EfficientNetB0"""

! pip install --upgrade efficientnet-pytorch

from efficientnet_pytorch import EfficientNet

# An empty list for storing the results
results = []
f1_vals = [0] * k_folds
results.append({'Model': 'EfficientNetB0'})

# Initialize lists to store training and validation losses and accuracies
train_losses = []
val_losses = []
train_accuracies = []
val_accuracies = []

num_epochs = epochs

for lr in learning_rates:
    for batch_size in batch_sizes:
      for optimizer_name in optimizers:
        print(f"Training with optimizer: {optimizer_name}, learning rate: {lr}, batch size: {batch_size}")
        for fold, (train_idx, val_idx) in enumerate(skf.split(dataset.imgs, dataset.targets)):
            train_sampler = torch.utils.data.SubsetRandomSampler(train_idx)
            val_sampler = torch.utils.data.SubsetRandomSampler(val_idx)

            train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=train_sampler)
            val_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, sampler=val_sampler)

            best_f1 = 0
            # Define the device (GPU if available, otherwise CPU)
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

            # Define the neural network
            model = EfficientNet.from_pretrained('efficientnet-b0', num_classes).to(device)  # Move the model to GPU

            # Define loss function
            criterion = nn.CrossEntropyLoss()

            # Define optimizer
            if optimizer_name == 'Adam':
                optimizer = optim.Adam(model.parameters(), lr=lr)
            elif optimizer_name == 'SGD':
                optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
            elif optimizer_name == 'RMSprop':
                optimizer = optim.RMSprop(model.parameters(), lr=lr)

            # Train and evaluate
            for epoch in range(num_epochs):
                # Training loop
                model.train()
                running_loss = 0.0
                correct = 0
                total = 0
                for inputs, labels in train_loader:
                    inputs, labels = inputs.to(device), labels.to(device) # Move inputs and labels to GPU
                    optimizer.zero_grad()
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    loss.backward()
                    optimizer.step()
                    running_loss += loss.item()
                    _, preds = torch.max(outputs, 1)
                    total += labels.size(0)
                    correct += (preds == labels).sum().item()
                epoch_train_loss = running_loss / len(train_loader)
                epoch_train_accuracy = correct / total
                train_losses.append(epoch_train_loss)
                train_accuracies.append(epoch_train_accuracy)

                # Validation loop
                model.eval()
                all_preds = []
                all_labels = []
                running_loss = 0.0
                correct = 0
                total = 0
                with torch.no_grad():
                  for inputs, labels in val_loader:
                      inputs, labels = inputs.to(device), labels.to(device) # Move inputs and labels to GPU
                      outputs = model(inputs)
                      loss = criterion(outputs, labels)
                      running_loss += loss.item()
                      _, preds = torch.max(outputs, 1)
                      total += labels.size(0)
                      correct += (preds == labels).sum().item()
                      all_preds.extend(preds.cpu().numpy())
                      all_labels.extend(labels.cpu().numpy())
                epoch_val_loss = running_loss / len(val_loader)
                epoch_val_accuracy = correct / total
                val_losses.append(epoch_val_loss)
                val_accuracies.append(epoch_val_accuracy)

                # Calculate F1 score
                f1 = f1_score(all_labels, all_preds, average='macro')
                print(f"Fold {fold+1}, Epoch {epoch+1}, F1 Score: {f1}")

                # Update best F1 score and model
                if f1 > best_f1:
                  best_f1 = f1

            f1_vals[fold] = best_f1
            results.append({
                    'Optimizer': optimizer_name,
                    'batch_size': batch_size,
                    'learning_rate': lr,
                    'Fold': fold+1,
                    'F1 Score': best_f1
              })
        results.append({
                  'min f1': min(f1_vals),
                  'max f1': max(f1_vals),
                  'average f1': sum(f1_vals)/len(f1_vals)
                  })



Collecting efficientnet-pytorch
  Downloading efficientnet_pytorch-0.7.1.tar.gz (21 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch->efficientnet-pytorch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch->efficientnet-pytorch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch->efficientnet-pytorch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch->efficientnet-pytorch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch->efficientnet-pytorch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b0-355c32eb.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b0-355c32eb.pth
100%|██████████| 20.4M/20.4M [00:00<00:00, 466MB/s]


Loaded pretrained weights for efficientnet-b0
Fold 1, Epoch 1, F1 Score: 0.5365134832149322
Fold 1, Epoch 2, F1 Score: 0.641612705377988


KeyboardInterrupt: 

In [None]:

# ----------------- Save the results in a excel file ---------------------
# Save the results as a DataFrame to be saved in an Excel file
df = pd.DataFrame(results)

# Load an existing Excel file
if os.path.isfile(results_directory):
  existing_file = pd.read_excel(results_directory)
  # Append the new DataFrame to the existing file
  df = pd.concat([existing_file, df])

# Write the DataFrame to the excel file in the directory
df.to_excel(results_directory, index=False)
# -------------- End of Save the results in a excel file ----------------

# -------------------- Plot the error/epoch plot ------------------------

# Define the filename for the Excel file
excel_filename = Plot_directory

# Create the directory if it doesn't exist
os.makedirs(os.path.dirname(excel_filename), exist_ok=True)

# Check if the Excel file exists
if os.path.exists(excel_filename):
    # Load existing Excel file
    wb = load_workbook(excel_filename)
    ws = wb.active
else:
    # Create a new Excel workbook
    wb = Workbook()
    ws = wb.active