# Supervised Lesion Detector

This notebook is dedicated to training and evaluating a supervised lesion detector on DeepLesion dataset with the following supervised model architectures for image detection with ResNet-50 backbone:
- YOLOv5 (stable, but YOLOv8 is newer),
- Faster R-CNN (torchvision.models.detection or Detectron2),
- DETR (Facebook DETR),
- Improved DETR (DINO-DETR or Deformable DETR, DINO has better performance but Deformable is faster),
- RetinaNet (FPN backbone + anchor-based).

## Assumptions:
- Use 2D slice inputs (optionally use the neighbouring ones too),
- Resize all images to 512x512,
- Use COCO-style Dataset class.
- Use DeepLesion for training a general lesion localizer and some other like LiTS (Liver Tumor Segmentation) or CHAOS (CT liver dataset) for more specialized localizer.

## 📚 Thesis Value Summary
### Contribution and Value:
- Comparison of CNN vs Transformer detectors on DeepLesion	-> ✅ Fills a gap in literature
- Evaluation of improved DETRs (DINO/Deformable) -> ✅ Modern insight
- General vs specialized lesion detection -> ✅ Strong clinical relevance
- Analysis of training time, robustness, failure modes -> ✅ Engineering depth


# Import all packages

In [None]:
import os
import json
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpy.typing import NDArray
from typing import Any
from PIL import Image
import shutil
import random
from pathlib import Path
import yaml

# Image utility methods

In [None]:
def load_metadata(path: str) -> pd.DataFrame:
    """
    Loads metadata from the given path and
    returns it as a pandas DataFrame.
    """
    
    return pd.read_csv(path)

def normalize(img: NDArray[np.uint16], per_image_norm: bool):
    """
    Normalizes the input image
    """

    img = img.astype(np.float32)
    if not per_image_norm:
        return img / 65535.0
    max = np.max(img)
    min = np.min(img)
    img = (img - min) / (max - min)
    return img

def convert_to_hu(img: NDArray[np.uint16], norm: bool, hu_min=-1024, hu_max=3071):
    """
    Converts the pixel data of a uint16
    CT image to Hounsfield Units (HU).
    """

    hu_img = img.astype(np.int32) - 32768
    hu_img = np.clip(hu_img, hu_min, hu_max).astype(np.float32)
    if norm:
        hu_img = (hu_img - hu_min) / (hu_max - hu_min)
        hu_img = np.clip(hu_img, 0.0, 1.0)
    return hu_img

def load_image(path: str, hu_scale: bool = True, norm: bool = True, per_image_norm: bool = True):
    """
    Loads an image given its path
    and returns it as a numpy array.
    """
    
    img = Image.open(path)
    img_array = np.array(img)
    if hu_scale:
        hu_min = -160
        hu_max = 240
        return convert_to_hu(img_array, norm, hu_min, hu_max)
    elif norm:
        return normalize(img_array, per_image_norm)
    return img_array

def save_image(img_array: NDArray[Any], path: str):
    """
    Saves the input image to the specified path.
    """

    if img_array.dtype != np.uint16:
        img_array = img_array.astype(np.uint16)
    img = Image.fromarray(img_array)
    img.save(path)

def show_image(img: NDArray[np.float32], title="Example Image", cmap="gray"):
    """
    Shows the image given its data, title and colour map.
    """

    plt.figure(figsize=(5, 5))
    plt.imshow(img, cmap=cmap)
    if title is not None:
        plt.title(title)
    plt.axis("off")
    plt.show()


# Convert DeepLesion metadata to COCO format

In [None]:
deeplesion_metadata = load_metadata("../data/deeplesion_metadata.csv")
deeplesion_image_path = "../data/deeplesion/key_slices/"
deeplesion_coco_json_path = "../data/deeplesion_coco.json"

images = []
annotations = []
categories = [
    {"id": 1, "name": "bone"},
    {"id": 2, "name": "abdomen"},
    {"id": 3, "name": "mediastinum"},
    {"id": 4, "name": "liver"},
    {"id": 5, "name": "lung"},
    {"id": 6, "name": "kidney"},
    {"id": 7, "name": "soft tissue"},
    {"id": 8, "name": "pelvis"}
]

image_counter = 1
annotation_id = 1
image_id_map = {}

for idx, row in deeplesion_metadata.iterrows():
    # Extract only images with annotated lesions (Val + Test)
    if row["Train_Val_Test"] == 1:
        continue

    file_name = row["File_name"]
    lesion_type = row["Coarse_lesion_type"]
    bbox_str = row["Bounding_boxes"]
    size_str = row["Image_size"]
    image_path = os.path.join(deeplesion_image_path, file_name)

    # Extract ground truth bounding box' coordinates
    bbox_coords = [float(val) for val in bbox_str.split(",")]
    x1, y1, x2, y2 = [round(c) for c in bbox_coords]

    # Extract sizes
    image_sizes = [int(val) for val in size_str.split(",")]
    width, height = [size for size in image_sizes]

    # Register image
    if file_name not in image_id_map:
        image_id_map[file_name] = image_counter
        images.append({
            "id": image_counter,
            "file_name": file_name,
            "width": width,
            "height": height
        })
        image_counter += 1
    image_id = image_id_map[file_name]

    # Initialize ground truth bounding box' parameters
    bbox = [x1, y1, x2 - x1, y2 - y1]
    area = bbox[2] * bbox[3]

    annotations.append({
        "id": annotation_id,
        "image_id": image_id,
        "category": lesion_type,
        "bbox": bbox,
        "area": area,
        "iscrowd": 0    # Normal object (not crowd of indistinct objects, that can't be cleanly separated)
    })

    annotation_id += 1

# Save to JSON
deeplesion_coco_format = {
    "images": images,
    "annotations": annotations,
    "categories": categories
}

with open(deeplesion_coco_json_path, "w") as f:
    json.dump(deeplesion_coco_format, f, indent=2)


# Convert DeepLesion metadata to text files required by YOLOv5

In [None]:
deeplesion_metadata = load_metadata("../data/deeplesion_metadata.csv")

# Source directories
image_dir = Path("../data/deeplesion/key_slices/")
label_dir = Path("labels_unsorted/")

# Create .txt files
os.makedirs(label_dir, exist_ok=True)

for idx, row in deeplesion_metadata.iterrows():
    # Extract only images with annotated lesions (Val + Test)
    if row["Train_Val_Test"] == 1:
        continue

    file_name = row["File_name"]
    image_path = os.path.join(str(image_dir), file_name)
    label_path = os.path.join(str(label_dir), file_name.replace(".png", ".txt"))

    if not os.path.exists(image_path):
        continue

    lesion_type = row["Coarse_lesion_type"] - 1 # YOLOv5 requires class IDS starting at 0
    bbox_str = row["Bounding_boxes"]
    size_str = row["Image_size"]

    # Extract ground truth bounding box' coordinates
    bbox_coords = [float(val) for val in bbox_str.split(",")]
    x1, y1, x2, y2 = [round(c) for c in bbox_coords]

    # Extract sizes
    image_sizes = [int(val) for val in size_str.split(",")]
    width, height = [size for size in image_sizes]

    bbox_width = x2 - x1
    bbox_height = y2 - y1
    x_center = x1 + bbox_width / 2
    y_center = y1 + bbox_height / 2

    # Normalize
    x_center /= width
    y_center /= height
    bbox_width /= width
    bbox_height /= height

    with open(label_path, 'a') as f:
        f.write(f"{lesion_type} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}\n")

# -----------------------------------------------------------------------------

# Create target structure
target_dir = Path("deeplesion_yolo")
if target_dir.exists() and target_dir.is_dir():
    shutil.rmtree(target_dir)

splits = ["train", "val", "test"]
for split in splits:
    (target_dir / "images" / split).mkdir(parents=True, exist_ok=True)
    (target_dir / "labels" / split).mkdir(parents=True, exist_ok=True)

# Collect all images with annotated lesions
annotated_images = [img for img in image_dir.glob("*.png") if (label_dir / (img.stem + ".txt")).exists()]
random.shuffle(annotated_images)

# Split into train, val and test sets
n_total = len(annotated_images)
n_train = int(0.7 * n_total)
n_val = int(0.15 * n_total)

train_images = annotated_images[:n_train]
val_images = annotated_images[n_train:n_train + n_val]
test_images = annotated_images[n_train + n_val:]

splits_map = {
    "train": train_images,
    "val": val_images,
    "test": test_images
}

# TODO - PERFORM NORMALIZATION ACCORDING TO HU SCALE HERE
# Copy image files
for split, images in splits_map.items():
    for image_path in images:
        label_path = label_dir / (image_path.stem + ".txt")
        shutil.copy(image_path, target_dir / "images" / split / image_path.name)
        shutil.copy(label_path, target_dir / "labels" / split / label_path.name)

print(f"Split done! Total = {n_total}")

# Generate deeplesion.yaml
dataset_root = os.path.abspath(str(target_dir))
deeplesion_yaml = {
    "path": dataset_root,
    "train": os.path.join(dataset_root, "images/train"),
    "val": os.path.join(dataset_root, "images/val"),
    "test": os.path.join(dataset_root, "images/test"),
    "nc": 8,
    "names": [
        "bone",
        "abdomen",
        "mediastinum",
        "liver",
        "lung",
        "kidney",
        "soft_tissue",
        "pelvis"
    ]
}

with open(target_dir / "deeplesion.yaml", "w") as f:
    yaml.dump(deeplesion_yaml, f)

# Remove directory with unsorted labels
if label_dir.exists() and label_dir.is_dir():
    shutil.rmtree(label_dir)


# Train and evaluate YOLOv5 model

In [None]:
# Download pretrained YOLOv5 model
!git clone https://github.com/ultralytics/yolov5
%cd yolov5
!pip install -r requirements.txt
%cd ..

In [None]:
# Train the YOLOv5 model on the DeepLesion dataset
!python yolov5/train.py --img 512 --batch 8 --epochs 100 --data deeplesion_yolo/deeplesion.yaml --weights yolov5s.pt --name deeplesion_yolov5

In [None]:
# Evaluate the YOLOv5 model
!python yolov5/val.py --data deeplesion_yolo/deeplesion.yaml --weights yolov5/runs/train/deeplesion_yolov5/weights/best.pt --img 512