<a href="https://colab.research.google.com/github/sajidcsecu/radioGenomic/blob/main/UnetinGPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# (1) Import Required Libraries

In [1]:
!pip install SimpleITK
!pip install pydicom===2.4.3
!pip install pydicom-seg
!pip install numpy==1.23.5

Collecting SimpleITK
  Downloading SimpleITK-2.4.1-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Downloading SimpleITK-2.4.1-cp311-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (52.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.3/52.3 MB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SimpleITK
Successfully installed SimpleITK-2.4.1
Collecting pydicom===2.4.3
  Downloading pydicom-2.4.3-py3-none-any.whl.metadata (7.8 kB)
Downloading pydicom-2.4.3-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m25.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pydicom
Successfully installed pydicom-2.4.3
Collecting pydicom-seg
  Downloading pydicom_seg-0.4.1-py3-none-any.whl.metadata (5.2 kB)
Collecting jsonschema<4.0.0,>=3.2.0 (from pydicom-seg)
  Downloading jsonschema-3.2.0-py2.py3-none-any.whl.metadata (7.8 kB)
Collecting numpy

# (2) Import required Libraries

In [1]:
import os
import random
import time
import csv
import numpy as np
import torch
import torch.nn as nn
import torch.optim.lr_scheduler as lr_scheduler
from torch.cuda.amp import GradScaler, autocast
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import (
    jaccard_score,
    f1_score,
    recall_score,
    precision_score,
    accuracy_score,
)
from tqdm import tqdm

import cv2
from typing import List
import torch.multiprocessing as mp
import h5py
from google.colab import drive

# (3) Mount Google Drive

In [2]:
drive.mount('/content/drive')

# Define dataset directory
DATASET_DIR = "/content/drive/MyDrive/PhDwork/datasets"
os.makedirs(DATASET_DIR, exist_ok=True)
print(f" Dataset Directory: {DATASET_DIR}")

Mounted at /content/drive
 Dataset Directory: /content/drive/MyDrive/PhDwork/datasets


# (4) Data Preperation

In [3]:
class HDF5Dataset(Dataset):
    def __init__(self, hdf5_path, transform=None):
        self.hdf5_path = hdf5_path
        self.transform = transform

        with h5py.File(self.hdf5_path, "r") as f:
            self.num_samples = f["images"].shape[0]

    def __len__(self):
        return self.num_samples

    def __getitem__(self, idx):
        with h5py.File(self.hdf5_path, "r") as f:
            img = f["images"][idx]
            mask = f["masks"][idx]

        img = torch.tensor(img, dtype=torch.float32).unsqueeze(0) #remove device from here.
        mask = torch.tensor(mask, dtype=torch.uint8).unsqueeze(0) #remove device from here.

        if self.transform:
            img, mask = self.transform(img, mask)

        return img, mask


if __name__ == "__main__":
    # Define the target directory
    target_dir = "/content/drive/MyDrive/PhDwork"
    # Change to the target directory
    os.chdir(target_dir)
    # Verify the change
    print("Current Directory:", os.getcwd())

    # Define device
    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(f"Using device: {device}")

    # Define DATASET_DIR
    DATASET_DIR = "/content/drive/MyDrive/PhDwork/datasets"
    os.makedirs(DATASET_DIR, exist_ok=True) #make the directory if it does not exist.

    # Dataset paths
    train_hdf5_path = os.path.join(DATASET_DIR, "train_dataset.hdf5")
    valid_hdf5_path = os.path.join(DATASET_DIR, "valid_dataset.hdf5")
    test_hdf5_path = os.path.join(DATASET_DIR, "test_dataset.hdf5")

    # Create datasets
    train_dataset = HDF5Dataset(train_hdf5_path)
    valid_dataset = HDF5Dataset(valid_hdf5_path)
    test_dataset = HDF5Dataset(test_hdf5_path)

    # Get a single image-mask pair from each dataset
    train_img, train_mask = train_dataset[0]  # Get the first item (index 0)
    valid_img, valid_mask = valid_dataset[0]
    test_img, test_mask = test_dataset[0]

    # Print shapes and devices to verify
    print("Train Image:", train_img.shape, "Train Mask:", train_mask.shape, "Device:", train_img.device)
    print("Valid Image:", valid_img.shape, "Valid Mask:", valid_mask.shape, "Device:", valid_img.device)
    print("Test Image:", test_img.shape, "Test Mask:", test_mask.shape, "Device:", test_img.device)

    print("Single image/mask retrieval successful!")

    # Create dataloaders
    train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    valid_dataloader = DataLoader(valid_dataset, batch_size=8, shuffle=False)
    test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=False)

    # Load a Batch from train
    for batch in train_dataloader:
        imgs, masks = batch  # imgs.shape: [8, 1, 512, 512], masks.shape: [8, 1, 512, 512]
        print("✅ Train Batch Loaded:", imgs.shape, masks.shape)
        print(f"Image device: {imgs.device}, Mask device: {masks.device}")
        break

    #load a batch from valid.
    for batch in valid_dataloader:
        imgs, masks = batch
        print("✅ Validation Batch Loaded:", imgs.shape, masks.shape)
        print(f"Image device: {imgs.device}, Mask device: {masks.device}")
        break

    #load a batch from test.
    for batch in test_dataloader:
        imgs, masks = batch
        print("✅ Test Batch Loaded:", imgs.shape, masks.shape)
        print(f"Image device: {imgs.device}, Mask device: {masks.device}")
        break

    print("Data loading successful!")

Current Directory: /content/drive/MyDrive/PhDwork
Using device: cuda
Train Image: torch.Size([1, 512, 512]) Train Mask: torch.Size([1, 512, 512]) Device: cpu
Valid Image: torch.Size([1, 512, 512]) Valid Mask: torch.Size([1, 512, 512]) Device: cpu
Test Image: torch.Size([1, 512, 512]) Test Mask: torch.Size([1, 512, 512]) Device: cpu
Single image/mask retrieval successful!
✅ Train Batch Loaded: torch.Size([8, 1, 512, 512]) torch.Size([8, 1, 512, 512])
Image device: cpu, Mask device: cpu
✅ Validation Batch Loaded: torch.Size([8, 1, 512, 512]) torch.Size([8, 1, 512, 512])
Image device: cpu, Mask device: cpu
✅ Test Batch Loaded: torch.Size([8, 1, 512, 512]) torch.Size([8, 1, 512, 512])
Image device: cpu, Mask device: cpu
Data loading successful!


# 2. Unet

In [4]:
class DoubleConv(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(inplace=True),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.BatchNorm2d(out_channels),
            nn.LeakyReLU(inplace=True)
        )

    def forward(self, x):
        return self.conv(x)

class UpSample(nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.up = nn.ConvTranspose2d(in_channels, out_channels, kernel_size=2, stride=2)

    def forward(self, x):
        return self.up(x)

class EncoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels, dropout=0.1):
        super().__init__()
        self.conv = DoubleConv(in_channels, out_channels)
        self.pool = nn.MaxPool2d(2)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x):
        x = self.conv(x)
        p = self.pool(x)
        return x, self.dropout(p)

class DecoderBlock(nn.Module):
    def __init__(self, in_channels, out_channels, dropout=0.1):
        super().__init__()
        self.up = UpSample(in_channels, out_channels)
        self.conv = DoubleConv(out_channels * 2, out_channels)
        self.dropout = nn.Dropout(p=dropout)

    def forward(self, x, skip):
        x = self.up(x)
        x = torch.cat([x, skip], dim=1)
        x = self.conv(x)
        return self.dropout(x)

class UNet(nn.Module):
    def __init__(self, in_channels, out_channels, dropout=0.5):
        super().__init__()
        self.e1 = EncoderBlock(in_channels, 64, dropout=dropout)
        self.e2 = EncoderBlock(64, 128, dropout=dropout)
        self.e3 = EncoderBlock(128, 256, dropout=dropout)
        self.e4 = EncoderBlock(256, 512, dropout=dropout)

        self.b = DoubleConv(512, 1024)
        self.dropout_bottleneck = nn.Dropout(p=dropout)

        self.d1 = DecoderBlock(1024, 512, dropout=dropout)
        self.d2 = DecoderBlock(512, 256, dropout=dropout)
        self.d3 = DecoderBlock(256, 128, dropout=dropout)
        self.d4 = DecoderBlock(128, 64, dropout=dropout)

        self.outputs = nn.Conv2d(64, out_channels, kernel_size=1)

    def forward(self, x):
        s1, p1 = self.e1(x)
        s2, p2 = self.e2(p1)
        s3, p3 = self.e3(p2)
        s4, p4 = self.e4(p3)

        b = self.b(p4)
        b = self.dropout_bottleneck(b)

        d1 = self.d1(b, s4)
        d2 = self.d2(d1, s3)
        d3 = self.d3(d2, s2)
        d4 = self.d4(d3, s1)

        return self.outputs(d4)

if __name__ == "__main__":
    # double_conv = DoubleConv(256, 256)
    # print(double_conv)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    input_image = torch.randn((1, 1, 512, 512), dtype=torch.float32)
    model = UNet(1, 1).to(device)
    input_image = input_image.to(device)
    out = model(input_image)
    print(out.shape)
    print(device)
    print(torch.cuda.is_available())

torch.Size([1, 1, 512, 512])
cuda
True


## 2. Loss Function

In [5]:
class DiceLoss(nn.Module):
    def __init__(self, smooth=1e-6, epsilon=1e-8):
        super(DiceLoss, self).__init__()
        self.smooth = smooth
        self.epsilon = epsilon

    def forward(self, preds, targets):
        preds = torch.sigmoid(preds)
        preds = preds.view(-1)
        targets = targets.view(-1)
        intersection = (preds * targets).sum()
        dice_score = (2. * intersection + self.smooth) / (preds.sum() + targets.sum() + self.smooth + self.epsilon)
        return 1 - dice_score

class DiceBCELoss(nn.Module):
    def __init__(self, smooth=1e-6, epsilon=1e-8):
        super(DiceBCELoss, self).__init__()
        self.smooth = smooth
        self.epsilon = epsilon
        self.bce = nn.BCEWithLogitsLoss()

    def forward(self, preds, targets):
        preds = preds.view(-1)
        targets = targets.view(-1)
        intersection = (torch.sigmoid(preds) * targets).sum()
        dice_loss = 1 - (2. * intersection + self.smooth) / (torch.sigmoid(preds).sum() + targets.sum() + self.smooth + self.epsilon)
        bce_loss = self.bce(preds, targets)
        return bce_loss + dice_loss

# 3. Test

In [6]:
class UnetTest:
    def calculate_metrics(self, y_true: torch.Tensor, y_pred: torch.Tensor) -> List[float]:
        y_true = y_true.detach().cpu().numpy().astype(bool).reshape(-1)
        y_pred = y_pred.detach().cpu().numpy().astype(bool).reshape(-1)

        score_jaccard = jaccard_score(y_true, y_pred, zero_division=1)
        score_f1 = f1_score(y_true, y_pred, zero_division=1)
        score_recall = recall_score(y_true, y_pred, zero_division=1)
        score_precision = precision_score(y_true, y_pred, zero_division=1)
        score_acc = accuracy_score(y_true, y_pred)

        return [score_jaccard, score_f1, score_recall, score_precision, score_acc]

    def save_result(self, test_result_path: str, image: torch.Tensor, org_mask: torch.Tensor, predicted_mask: torch.Tensor, sample_id: int) -> None:
        if not os.path.exists(test_result_path):
            os.makedirs(test_result_path, exist_ok=True)

        predicted_mask = (predicted_mask.detach().cpu().numpy().squeeze() > 0.5).astype(np.uint8) * 255
        org_mask = (org_mask.detach().cpu().numpy().squeeze() > 0.5).astype(np.uint8) * 255
        image = (image.detach().cpu().numpy().squeeze() * 255).astype(np.uint8)

        h, w = image.shape
        line = np.ones((h, 10), dtype=np.uint8) * 128
        cat_images = np.concatenate([image, line, org_mask, line, predicted_mask], axis=1)

        file_name = os.path.join(test_result_path, f"sample_{sample_id}.png")
        success = cv2.imwrite(file_name, cat_images)

        if success:
            print(f"✅ Saved: {file_name}")
        else:
            print(f"❌ Failed to save image: {file_name}")

    def test(self, model: torch.nn.Module, test_loader: torch.utils.data.DataLoader, test_result_path: str, device: torch.device) -> None:
        print(torch.cuda.is_available())
        print(device)
        metrics_score = np.zeros(5)
        time_taken = []
        model.eval()

        with torch.no_grad():
            for pid, (x, y) in enumerate(test_loader):
                x = x.to(device, dtype=torch.float32)
                y = y.to(device, dtype=torch.float32)

                start_time = time.time()
                y_pred = torch.sigmoid(model(x))
                total_time = time.time() - start_time
                time_taken.append(total_time)

                score = self.calculate_metrics(y, y_pred)
                metrics_score += np.array(score)

                for idx in range(x.size(0)):
                    sample_id = pid * x.size(0) + idx
                    self.save_result(test_result_path, x[idx], y[idx], y_pred[idx], sample_id)

        num_samples = len(test_loader)
        avg_metrics = metrics_score / num_samples
        print(f"Total Images in Test Set: {num_samples}")
        print(f"Jaccard: {avg_metrics[0]:.4f} - F1: {avg_metrics[1]:.4f} - Recall: {avg_metrics[2]:.4f} - Precision: {avg_metrics[3]:.4f} - Acc: {avg_metrics[4]:.4f}")

        fps = 1 / np.mean(time_taken)
        print("FPS:", fps)

# 4. Training

In [7]:
class EarlyStopping:
    def __init__(self, patience=10, min_delta=0.001):
        self.patience = patience
        self.min_delta = min_delta
        self.best_loss = float('inf')
        self.counter = 0

    def __call__(self, val_loss):
        if val_loss < self.best_loss - self.min_delta:
            self.best_loss = val_loss
            self.counter = 0
        else:
            self.counter += 1

        if self.counter >= self.patience:
            print(f"⛔ Early stopping triggered after {self.patience} epochs without improvement!")
            return True
        return False

class UnetTrain:
    def seeding(self, seed):
        random.seed(seed)
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed(seed)
        torch.backends.cudnn.deterministic = True

    def epoch_time(self, start_time, end_time):
        elapsed_time = end_time - start_time
        elapsed_mins = int(elapsed_time / 60)
        elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
        return elapsed_mins, elapsed_secs

    def train(self, model, loader, optimizer, loss_fn, device):
        print(torch.cuda.is_available())
        print(device)
        scaler = GradScaler()
        epoch_loss = 0.0
        model.train()
        for x, y in loader:
            x = x.to(device, dtype=torch.float32)
            y = y.to(device, dtype=torch.float32)
            optimizer.zero_grad()
            with autocast():
                y_pred = model(x)
                loss = loss_fn(y_pred, y)

            scaler.scale(loss).backward()
            scaler.step(optimizer)
            scaler.update()
            epoch_loss += loss.item()

        epoch_loss = epoch_loss / len(loader)
        return epoch_loss

    def evaluate(self, model, loader, loss_fn, device):
        print(torch.cuda.is_available())
        print(device)
        epoch_loss = 0.0
        model.eval()
        with torch.no_grad():
            for x, y in loader:
                x = x.to(device, dtype=torch.float32)
                y = y.to(device, dtype=torch.float32)
                y_pred = model(x)
                loss = loss_fn(y_pred, y)
                epoch_loss += loss.item()

        return epoch_loss / len(loader)

    def execute(self, num_epochs, lr, train_loader, valid_loader, model_file, loss_result_path, device):
        model = UNet(in_channels=1, out_channels=1, dropout=0.3).to(device)
        optimizer = torch.optim.AdamW(model.parameters(), lr, weight_decay=1e-5)
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=10, T_mult=2)
        loss_fn = DiceBCELoss()

        early_stopping = EarlyStopping(patience=10, min_delta=0.001)
        best_valid_loss = float("inf")
        results = {"train_loss": [], "valid_loss": []}

        for epoch in tqdm(range(num_epochs)):
            start_time = time.time()

            train_loss = self.train(model, train_loader, optimizer, loss_fn, device)
            valid_loss = self.evaluate(model, valid_loader, loss_fn, device)

            if valid_loss < best_valid_loss:
                print(f"✅ Valid loss improved from {best_valid_loss:.4f} to {valid_loss:.4f}. Saving checkpoint.")
                best_valid_loss = valid_loss
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'loss': best_valid_loss,
                }, model_file)

            end_time = time.time()
            epoch_mins, epoch_secs = self.epoch_time(start_time, end_time)

            results["train_loss"].append(train_loss)
            results["valid_loss"].append(valid_loss)
            print(f"Epoch {epoch+1}: Time: {epoch_mins}m {epoch_secs}s, Train Loss: {train_loss:.3f}, Val Loss: {valid_loss:.3f}")

            if early_stopping(valid_loss):
                print("🛑 Stopping training early due to no improvement.")
                break

        with open(loss_result_path, "w", newline="") as file:
            writer = csv.writer(file)
            for key, value in results.items():
                writer.writerow([key, value])


In [8]:
def main(): #put main part of code in a function.
    target_dir = "/content/drive/MyDrive/PhDwork"
    os.chdir(target_dir)
    print("Current Directory:", os.getcwd())

    ut = UnetTrain()
    ut.seeding(42)

    batch_size = 8
    num_epochs = 1
    lr = 1e-4

    output_dir = os.path.join(".", "Segmentation", "files")
    os.makedirs(output_dir, exist_ok=True)
    loss_result_file = os.path.join(output_dir, "results.csv")
    model_file = os.path.join(output_dir, "checkpoint.pth")
    test_result_path = os.path.join(output_dir, "results")

    device = "cuda" if torch.cuda.is_available() else "cpu"
    print(torch.cuda.is_available())

    DATASET_DIR = "/content/drive/MyDrive/PhDwork/datasets"
    os.makedirs(DATASET_DIR, exist_ok=True)
    train_hdf5_path = os.path.join(DATASET_DIR, "train_dataset.hdf5")
    valid_hdf5_path = os.path.join(DATASET_DIR, "valid_dataset.hdf5")
    test_hdf5_path = os.path.join(DATASET_DIR, "test_dataset.hdf5")

    print("Loading Training Data...")
    train_dataset = HDF5Dataset(train_hdf5_path)
    print("Loading Validation Data...")
    valid_dataset = HDF5Dataset(valid_hdf5_path)
    print("Loading Test Data...")
    test_dataset = HDF5Dataset(test_hdf5_path)

    num_workers = 0
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True)

    print(f"Train: {len(train_dataset)}, Valid: {len(valid_dataset)}, Test: {len(test_dataset)}")

    ut.execute(num_epochs, lr, train_loader, valid_loader, model_file, loss_result_file, device)

    model = UNet(1, 1).to(device)
    checkpoint = torch.load(model_file, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])

    utest = UnetTest()
    utest.test(model, test_loader, test_result_path, device)

if __name__ == "__main__":
    mp.set_start_method('spawn') #only called once, at the very beginning of main.
    main()

Current Directory: /content/drive/MyDrive/PhDwork
True
Loading Training Data...
Loading Validation Data...
Loading Test Data...
Train: 100, Valid: 134, Test: 107


  scaler = GradScaler()


True
cuda


  with autocast():


True
cuda
✅ Valid loss improved from inf to 1.7299. Saving checkpoint.


100%|██████████| 1/1 [00:46<00:00, 46.03s/it]

Epoch 1: Time: 0m 46s, Train Loss: 1.672, Val Loss: 1.730





True
cuda
✅ Saved: ./Segmentation/files/results/sample_0.png
✅ Saved: ./Segmentation/files/results/sample_1.png
✅ Saved: ./Segmentation/files/results/sample_2.png
✅ Saved: ./Segmentation/files/results/sample_3.png
✅ Saved: ./Segmentation/files/results/sample_4.png
✅ Saved: ./Segmentation/files/results/sample_5.png
✅ Saved: ./Segmentation/files/results/sample_6.png
✅ Saved: ./Segmentation/files/results/sample_7.png
✅ Saved: ./Segmentation/files/results/sample_8.png
✅ Saved: ./Segmentation/files/results/sample_9.png
✅ Saved: ./Segmentation/files/results/sample_10.png
✅ Saved: ./Segmentation/files/results/sample_11.png
✅ Saved: ./Segmentation/files/results/sample_12.png
✅ Saved: ./Segmentation/files/results/sample_13.png
✅ Saved: ./Segmentation/files/results/sample_14.png
✅ Saved: ./Segmentation/files/results/sample_15.png
✅ Saved: ./Segmentation/files/results/sample_16.png
✅ Saved: ./Segmentation/files/results/sample_17.png
✅ Saved: ./Segmentation/files/results/sample_18.png
✅ Saved: ./S