In [None]:
from google.colab import drive
drive.mount('/content/drive')
%env PYTHONPATH=/content/drive/MyDrive/BrainTumorDiagnosisUsingMRI:$PYTHONPATH

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
env: PYTHONPATH=/content/drive/MyDrive/BrainTumorDiagnosisUsingMRI:$PYTHONPATH


In [None]:
%cd /content/drive/MyDrive/BrainTumorDiagnosisUsingMRI/src

/content/drive/MyDrive/BrainTumorDiagnosisUsingMRI/src


In [None]:
import os
import json
from pathlib import Path

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, ReduceLROnPlateau

from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt

from sklearn.metrics import (
    accuracy_score, f1_score, roc_auc_score, precision_recall_fscore_support,
    confusion_matrix, roc_curve
)

from torch.amp import autocast, GradScaler

from dataloader.loader import get_data_loaders
from models.hybrid_model import BrainTumorModel as HybridModel

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device.type == 'cuda':
    torch.backends.cudnn.benchmark = True
print('Using device:', device)

batch_size = 16
num_epochs = 30
learning_rate = 1e-4
weight_decay = 1e-4
scheduler_choice = 'plateau'
patience = 5

data_dir = '../dataset'
checkpoint_path = str(Path('./checkpoints/best_model_gpu.pth').resolve())
logs_dir = Path('./logs')
logs_dir.mkdir(parents=True, exist_ok=True)
Path('./checkpoints').mkdir(parents=True, exist_ok=True)
log_json_path = str((logs_dir / 'training_log_gpu.json').resolve())

Using device: cuda


In [None]:
train_loader, val_loader, test_loader, class_names = get_data_loaders(data_dir, batch_size=batch_size)
num_classes = len(class_names)
print('Classes:', class_names)
print('Train/Val/Test batches:', len(train_loader), len(val_loader), len(test_loader))

Classes: ['glioma', 'meningioma', 'notumor', 'pituitary']
Train/Val/Test batches: 1229 66 67


In [None]:
model = HybridModel(num_classes=num_classes)
model = model.to(device)

total_params = sum(p.numel() for p in model.parameters())
print(f'Total parameters: {total_params:,}')

Total parameters: 31,635,332


In [None]:
scaler = GradScaler(enabled=(device.type == 'cuda'))

def _batch_accuracy(outputs, targets):
    preds = outputs.argmax(dim=1)
    correct = (preds == targets).sum().item()
    return correct / targets.size(0)

def train_one_epoch(model, dataloader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    total_samples = 0

    pbar = tqdm(dataloader, desc='Train', leave=False)
    for inputs, labels in pbar:
        if device.type == 'cuda':
            inputs = inputs.pin_memory().to(device, non_blocking=True)
            labels = labels.to(device, non_blocking=True)
        else:
            inputs = inputs.to(device)
            labels = labels.to(device)

        optimizer.zero_grad(set_to_none=True)
        with autocast(device_type=device.type):
            outputs = model(inputs)
            loss = criterion(outputs, labels)

        if device.type == 'cuda':
            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
        else:
            loss.backward()
            optimizer.step()

        batch_acc = _batch_accuracy(outputs.detach(), labels)
        running_loss += loss.item() * inputs.size(0)
        running_corrects += batch_acc * inputs.size(0)
        total_samples += inputs.size(0)

        pbar.set_postfix({'loss': f'{loss.item():.4f}', 'acc': f'{batch_acc:.4f}'})

    epoch_loss = running_loss / total_samples
    epoch_acc = running_corrects / total_samples
    return epoch_loss, epoch_acc

def validate(model, dataloader, criterion):
    model.eval()
    running_loss = 0.0
    running_corrects = 0
    total_samples = 0

    with torch.no_grad():
        pbar = tqdm(dataloader, desc='Val', leave=False)
        for inputs, labels in pbar:
            if device.type == 'cuda':
                inputs = inputs.pin_memory().to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
            else:
                inputs = inputs.to(device)
                labels = labels.to(device)

            with autocast(device_type=device.type):
                outputs = model(inputs)
                loss = criterion(outputs, labels)

            batch_acc = _batch_accuracy(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            running_corrects += batch_acc * inputs.size(0)
            total_samples += inputs.size(0)

            pbar.set_postfix({'loss': f'{loss.item():.4f}', 'acc': f'{batch_acc:.4f}'})

    epoch_loss = running_loss / total_samples
    epoch_acc = running_corrects / total_samples
    return epoch_loss, epoch_acc

In [7]:
criterion = nn.CrossEntropyLoss()
optimizer = AdamW(model.parameters(), lr=learning_rate, weight_decay=weight_decay)

if scheduler_choice == 'cosine':
    scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
elif scheduler_choice == 'plateau':
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)
else:
    raise ValueError("scheduler_choice must be 'cosine' or 'plateau'")

best_val_acc = -np.inf
epochs_no_improve = 0
history = {
    'train_loss': [], 'train_acc': [],
    'val_loss': [], 'val_acc': []
}

for epoch in range(1, num_epochs + 1):
    print(f'\nEpoch {epoch}/{num_epochs}')
    train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, criterion)
    val_loss, val_acc = validate(model, val_loader, criterion)

    if isinstance(scheduler, ReduceLROnPlateau):
        scheduler.step(val_loss)
    else:
        scheduler.step()

    history['train_loss'].append(train_loss)
    history['train_acc'].append(train_acc)
    history['val_loss'].append(val_loss)
    history['val_acc'].append(val_acc)

    print(f'Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.4f}')
    print(f'Val   Loss: {val_loss:.4f} | Val   Acc: {val_acc:.4f}')

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        epochs_no_improve = 0
        torch.save(model.state_dict(), checkpoint_path)
        print(f'Best model updated and saved to: {checkpoint_path}')
    else:
        epochs_no_improve += 1
        print(f'No improvement for {epochs_no_improve} epoch(s).')

    if epochs_no_improve >= patience:
        print('Early stopping triggered.')
        break

with open(log_json_path, 'w') as f:
    json.dump({
        'history': history,
        'config': {
            'batch_size': batch_size,
            'num_epochs': num_epochs,
            'learning_rate': learning_rate,
            'weight_decay': weight_decay,
            'scheduler_choice': scheduler_choice,
            'patience': patience
        }
    }, f, indent=2)
print(f'Training log saved to: {log_json_path}')


Epoch 1/30




Train Loss: 0.6414 | Train Acc: 0.7465
Val   Loss: 0.5084 | Val   Acc: 0.7899
Best model updated and saved to: /content/drive/MyDrive/BrainTumorDiagnosisUsingMRI/src/checkpoints/best_model_gpu.pth

Epoch 2/30




Train Loss: 0.2342 | Train Acc: 0.9179
Val   Loss: 0.4346 | Val   Acc: 0.8470
Best model updated and saved to: /content/drive/MyDrive/BrainTumorDiagnosisUsingMRI/src/checkpoints/best_model_gpu.pth

Epoch 3/30




Train Loss: 0.1442 | Train Acc: 0.9494
Val   Loss: 0.2442 | Val   Acc: 0.9068
Best model updated and saved to: /content/drive/MyDrive/BrainTumorDiagnosisUsingMRI/src/checkpoints/best_model_gpu.pth

Epoch 4/30




Train Loss: 0.0979 | Train Acc: 0.9660
Val   Loss: 0.3656 | Val   Acc: 0.8688
No improvement for 1 epoch(s).

Epoch 5/30




Train Loss: 0.0682 | Train Acc: 0.9761
Val   Loss: 0.3553 | Val   Acc: 0.9021
No improvement for 2 epoch(s).

Epoch 6/30




Train Loss: 0.0625 | Train Acc: 0.9777
Val   Loss: 0.4721 | Val   Acc: 0.8783
No improvement for 3 epoch(s).

Epoch 7/30




Train Loss: 0.0197 | Train Acc: 0.9933
Val   Loss: 0.3714 | Val   Acc: 0.9259
Best model updated and saved to: /content/drive/MyDrive/BrainTumorDiagnosisUsingMRI/src/checkpoints/best_model_gpu.pth

Epoch 8/30




Train Loss: 0.0110 | Train Acc: 0.9963
Val   Loss: 0.5016 | Val   Acc: 0.8916
No improvement for 1 epoch(s).

Epoch 9/30




Train Loss: 0.0136 | Train Acc: 0.9952
Val   Loss: 0.2336 | Val   Acc: 0.9477
Best model updated and saved to: /content/drive/MyDrive/BrainTumorDiagnosisUsingMRI/src/checkpoints/best_model_gpu.pth

Epoch 10/30




Train Loss: 0.0151 | Train Acc: 0.9952
Val   Loss: 0.4745 | Val   Acc: 0.9087
No improvement for 1 epoch(s).

Epoch 11/30




Train Loss: 0.0136 | Train Acc: 0.9957
Val   Loss: 0.2510 | Val   Acc: 0.9392
No improvement for 2 epoch(s).

Epoch 12/30




Train Loss: 0.0081 | Train Acc: 0.9977
Val   Loss: 0.2985 | Val   Acc: 0.9335
No improvement for 3 epoch(s).

Epoch 13/30




Train Loss: 0.0043 | Train Acc: 0.9986
Val   Loss: 0.3625 | Val   Acc: 0.9306
No improvement for 4 epoch(s).

Epoch 14/30


                                                                             

Train Loss: 0.0029 | Train Acc: 0.9987
Val   Loss: 0.4081 | Val   Acc: 0.9344
No improvement for 5 epoch(s).
Early stopping triggered.
Training log saved to: /content/drive/MyDrive/BrainTumorDiagnosisUsingMRI/src/logs/training_log_gpu.json




In [8]:
def evaluate(model, dataloader):
    model.eval()
    all_probs = []
    all_preds = []
    all_targets = []

    with torch.no_grad():
        pbar = tqdm(dataloader, desc='Test', leave=False)
        for inputs, labels in pbar:
            if device.type == 'cuda':
                inputs = inputs.pin_memory().to(device, non_blocking=True)
                labels = labels.to(device, non_blocking=True)
            else:
                inputs = inputs.to(device)
                labels = labels.to(device)

            with autocast(device_type=device.type):
                outputs = model(inputs)
                probs = torch.softmax(outputs, dim=1)
                preds = outputs.argmax(dim=1)

            all_probs.append(probs.cpu().numpy())
            all_preds.append(preds.cpu().numpy())
            all_targets.append(labels.cpu().numpy())

    probs_np = np.concatenate(all_probs, axis=0)
    preds_np = np.concatenate(all_preds, axis=0)
    targets_np = np.concatenate(all_targets, axis=0)

    acc = accuracy_score(targets_np, preds_np)
    f1_macro = f1_score(targets_np, preds_np, average='macro')

    try:
        from sklearn.preprocessing import label_binarize
        targets_oh = label_binarize(targets_np, classes=np.arange(len(class_names)))
        auc_macro = roc_auc_score(targets_oh, probs_np, average='macro', multi_class='ovr')
    except Exception as e:
        print('AUC computation fallback due to:', e)
        auc_macro = float('nan')

    precision, recall, f1_per_class, _ = precision_recall_fscore_support(
        targets_np, preds_np, labels=np.arange(len(class_names)), average=None
)

    cm = confusion_matrix(targets_np, preds_np, labels=np.arange(len(class_names)))

    # Confusion Matrix plot
    plt.figure(figsize=(6, 5))
    im = plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar(im, fraction=0.046, pad=0.04)
    tick_marks = np.arange(len(class_names))
    plt.xticks(tick_marks, class_names, rotation=45, ha='right')
    plt.yticks(tick_marks, class_names)
    thresh = cm.max() / 2.
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                     ha='center', va='center',
                     color='white' if cm[i, j] > thresh else 'black')
    plt.tight_layout()
    plt.xlabel('Predicted label')
    plt.ylabel('True label')
    plt.savefig(str(logs_dir / 'confusion_matrix.png'), bbox_inches='tight')
    plt.close()

    # ROC curves per class
    try:
        from sklearn.preprocessing import label_binarize
        targets_oh = label_binarize(targets_np, classes=np.arange(len(class_names)))
        plt.figure(figsize=(7, 6))
        for i in range(len(class_names)):
            fpr, tpr, _ = roc_curve(targets_oh[:, i], probs_np[:, i])
            auc_i = roc_auc_score(targets_oh[:, i], probs_np[:, i])
            plt.plot(fpr, tpr, label=f'{class_names[i]} (AUC={auc_i:.3f})')
        plt.plot([0, 1], [0, 1], 'k--', label='Chance')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC Curves (One-vs-Rest)')
        plt.legend(loc='lower right')
        plt.tight_layout()
        plt.savefig(str(logs_dir / 'roc_curves.png'), bbox_inches='tight')
        plt.close()
    except Exception as e:
        print('ROC curve generation skipped due to:', e)

    metrics = {
        'accuracy': float(acc),
        'f1_macro': float(f1_macro),
        'auc_macro': float(auc_macro) if not isinstance(auc_macro, float) or not np.isnan(auc_macro) else auc_macro,
        'precision_per_class': {cls: float(precision[i]) for i, cls in enumerate(class_names)},
        'recall_per_class': {cls: float(recall[i]) for i, cls in enumerate(class_names)},
        'f1_per_class': {cls: float(f1_per_class[i]) for i, cls in enumerate(class_names)},
        'confusion_matrix': cm.tolist()
    }
    return metrics

In [9]:
# Load best model weights before evaluation
if os.path.exists(checkpoint_path):
    model.load_state_dict(torch.load(checkpoint_path, map_location=device))
    print('Loaded best model from checkpoint for evaluation.')
else:
    print('Best checkpoint not found; evaluating current model.')

test_metrics = evaluate(model, test_loader)
print('Test Metrics:', json.dumps(test_metrics, indent=2))

Loaded best model from checkpoint for evaluation.




Test Metrics: {
  "accuracy": 0.9735099337748344,
  "f1_macro": 0.9719525182732398,
  "auc_macro": 0.9956582873198215,
  "precision_per_class": {
    "glioma": 0.9782608695652174,
    "meningioma": 0.9377431906614786,
    "notumor": 0.9933333333333333,
    "pituitary": 0.9814814814814815
  },
  "recall_per_class": {
    "glioma": 0.9221311475409836,
    "meningioma": 0.9717741935483871,
    "notumor": 0.9933333333333333,
    "pituitary": 1.0
  },
  "f1_per_class": {
    "glioma": 0.9493670886075949,
    "meningioma": 0.9544554455445544,
    "notumor": 0.9933333333333333,
    "pituitary": 0.9906542056074766
  },
  "confusion_matrix": [
    [
      225,
      16,
      0,
      3
    ],
    [
      3,
      241,
      2,
      2
    ],
    [
      2,
      0,
      298,
      0
    ],
    [
      0,
      0,
      0,
      265
    ]
  ]
}


In [11]:
# Visualization: Loss and Accuracy Curves
plt.figure(figsize=(6,4))
plt.plot(history['train_loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss Curve')
plt.legend()
plt.tight_layout()
plt.savefig(str(logs_dir / 'loss_curve.png'), bbox_inches='tight')
plt.close()

plt.figure(figsize=(6,4))
plt.plot(history['train_acc'], label='Train Acc')
plt.plot(history['val_acc'], label='Val Acc')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy Curve')
plt.legend()
plt.tight_layout()
plt.savefig(str(logs_dir / 'accuracy_curve.png'), bbox_inches='tight')
plt.close()

print('Saved plots to:', logs_dir)

Saved plots to: logs
