In [1]:
from helper_functions import *
from classes import *
from fold_functions import *
from torch.utils.data import DataLoader
from torchvision import transforms
from timeit import default_timer as timer
import datetime
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch import nn
import os
import zipfile
from pathlib import Path
from tqdm.auto import tqdm
import requests
import seaborn as sns
from sklearn.metrics import confusion_matrix
import shutil


In [2]:
# Define the hyperparameters
BATCH_SIZE = 32 # Batch size for the dataloaders
IN_CHANNELS = 3 # Number of input channels
HIDDEN_UNITS = 16  # Number of hidden units in the fully connected layer
NUM_CLASSES = 4 # Number of classes in the dataset
SIZE = 224 # Size of the images
LEARNING_RATE = 0.001 # Learning rate for the optimizer
EPOCHS = 5 # Number of epochs to train the model
GAMMA = 0.1 # Multiplicative factor of learning rate decay
STEP_SIZE = 5 # Step size for the learning rate scheduler
WEIGHT_DECAY = 0.025 # Weight decay for the optimizer
SEED = 42 # Seed for reproducibility
EVAL_EPOCHS = 10  # Number of epochs to evaluate the model on the test set
RANDOM_ROTATION = 10  # Random rotation for the images
K_FOLDS = 10  # Number of folds for the cross-validation
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# Classes in the dataset

classes = {
    'no_tumor': 0,
    'meningioma_tumor': 1,
    'pituitary_tumor': 2,
    'glioma_tumor': 3
}

In [3]:
transform = transforms.Compose([
    transforms.Resize((SIZE, SIZE)),
    transforms.RandomRotation(RANDOM_ROTATION),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if IN_CHANNELS == 3 else transforms.Normalize(mean=[0.5], std=[0.5])
])


train_dataset = CustomDataset('data/Training', transform=transform)
test_dataset = CustomDataset('data/Testing', transform=transform)

In [4]:
# Create the combined dataset
combine_dir = combine_and_rename_images('data', classes)

# Walk through the directory and create the dataset
walk_through_dir(combine_dir)

Images combined and renamed successfully.
There are 0 directories and 3264 images in 'Combined'.


In [5]:
# Create the dataset
train_dir, test_dir = split_to_train_test(combine_dir, 0.9)

walk_through_dir('Dataset')

Images split into training and testing sets successfully.
There are 2 directories and 0 images in 'Dataset'.
There are 0 directories and 324 images in 'Dataset\Testing'.
There are 0 directories and 2940 images in 'Dataset\Training'.


In [6]:
shutil.rmtree(combine_dir)

In [7]:
# Create the dataset
train_dataset = CustomFoldDataset(train_dir, classes, transform=transform)

# Create the test dataset
test_dataset = CustomFoldDataset(test_dir, classes, transform=transform)

In [8]:
from sklearn.model_selection import StratifiedKFold

# Define the number of splits
n_splits = K_FOLDS
# Define the stratified k-fold
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=SEED)

train_data, train_labels = train_dataset.get_data_targets()

test_data, test_labels = test_dataset.get_data_targets()

# List to numpy array
train_labels = np.array(train_labels).astype(int)
test_labels = np.array(test_labels).astype(int)

test_dataloader = DataLoader(FoldDataset(test_data, test_labels), batch_size=BATCH_SIZE, shuffle=True)


In [9]:
def extract_elements(data_list, indices):
    """
    Extract elements from a list based on the given indices.

    Parameters:
        data_list (list): The list of data.
        indices (numpy.ndarray): The indices to extract.

    Returns:
        numpy.ndarray: An array containing the elements corresponding to the indices.
    """
    data_array = np.array(data_list)
    result_array = data_array[indices]
    return result_array

In [10]:
fold_results = []
fold_models = []
fold_eval_results = []

start = timer()

for i, (train_idx, val_idx) in enumerate(skf.split(train_data, train_labels)):
    print(f'Fold {i+1}')

    set_seeds(SEED)
    # Create the model
    model = MRI_classification_CNN(IN_CHANNELS, HIDDEN_UNITS, NUM_CLASSES, SIZE).to(DEVICE)
    # Create the optimizer
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
    # Create the scheduler
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=STEP_SIZE, gamma=GAMMA)
    # Create the loss function
    criterion = nn.CrossEntropyLoss()
    # Create the training and validation dataloaders
    train_fold_data = extract_elements(train_data, train_idx)
    train_fold_labels = extract_elements(train_labels, train_idx).astype(int)
    val_fold_data = extract_elements(train_data, val_idx)
    val_fold_labels = extract_elements(train_labels, val_idx).astype(int)
    
    train_dataloader_fold = DataLoader(FoldDataset(train_fold_data, train_fold_labels), batch_size=BATCH_SIZE, shuffle=True)
    val_dataloader_fold = DataLoader(FoldDataset(val_fold_data, val_fold_labels), batch_size=BATCH_SIZE, shuffle=True)

    # Train the model
    results = train_fold(model, 
                        train_dataloader_fold,
                        val_dataloader_fold,
                        criterion,
                        optimizer,
                        EPOCHS,
                        DEVICE,
                        scheduler)
    
    # Evaluate the model
    eval_results = evaluate(model, 
                            test_dataloader,
                            criterion, 
                            DEVICE)
    
    fold_results.append(results)
    fold_models.append(model)



end = timer()

toltal_time = print_train_time(start, end)

Fold 1


[Epoch 1/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 1/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 2/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 2/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 3/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 3/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 4/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 4/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 5/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 5/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 1/1] Testing:   0%|          | 0/11 [00:00<?, ?it/s]

Evaluation results
Test Loss: 1.1036 | Test Acc: 73.58%
Fold 2


[Epoch 1/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 1/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 2/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 2/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 3/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 3/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 4/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 4/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 5/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 5/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 1/1] Testing:   0%|          | 0/11 [00:00<?, ?it/s]

Evaluation results
Test Loss: 0.8610 | Test Acc: 78.12%
Fold 3


[Epoch 1/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 1/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 2/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

[Epoch 2/5] Testing:   0%|          | 0/10 [00:00<?, ?it/s]

[Epoch 3/5] Training:   0%|          | 0/83 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [11]:
shutil.rmtree('Dataset')