# Supervised Lesion Detector

This notebook is dedicated to training and evaluating a supervised lesion detector on DeepLesion dataset with the following supervised model architectures for image detection with ResNet-50 backbone:
- YOLOv5 (stable, but YOLOv8 is newer),
- Faster R-CNN (torchvision.models.detection or Detectron2),
- DETR (Facebook DETR),
- Improved DETR (DINO-DETR or Deformable DETR, DINO has better performance but Deformable is faster),
- RetinaNet (FPN backbone + anchor-based).

## Assumptions:
- Use 2D slice inputs (optionally use the neighbouring ones too),
- Resize all images to 512x512,
- Use COCO-style Dataset class.
- Use DeepLesion for training a general lesion localizer and some other like LiTS (Liver Tumor Segmentation) or CHAOS (CT liver dataset) for more specialized localizer.

## 📚 Thesis Value Summary
### Contribution and Value:
- Comparison of CNN vs Transformer detectors on DeepLesion	-> ✅ Fills a gap in literature
- Evaluation of improved DETRs (DINO/Deformable) -> ✅ Modern insight
- General vs specialized lesion detection -> ✅ Strong clinical relevance
- Analysis of training time, robustness, failure modes -> ✅ Engineering depth


# Image utility methods

In [4]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from numpy.typing import NDArray
from typing import Any
from PIL import Image

def load_metadata(path: str) -> pd.DataFrame:
    """
    Loads metadata from the given path and
    returns it as a pandas DataFrame.
    """
    
    return pd.read_csv(path)

def normalize(img: NDArray[np.uint16], per_image_norm: bool):
    """
    Normalizes the input image
    """

    img = img.astype(np.float32)
    if not per_image_norm:
        return img / 65535.0
    max = np.max(img)
    min = np.min(img)
    img = (img - min) / (max - min)
    return img

def convert_to_hu(img: NDArray[np.uint16], norm: bool, hu_min=-1024, hu_max=3071):
    """
    Converts the pixel data of a uint16
    CT image to Hounsfield Units (HU).
    """

    hu_img = img.astype(np.int32) - 32768
    hu_img = np.clip(hu_img, hu_min, hu_max).astype(np.float32)
    if norm:
        hu_img = (hu_img - hu_min) / (hu_max - hu_min)
        hu_img = np.clip(hu_img, 0.0, 1.0)
    return hu_img

def load_image(path: str, hu_scale: bool = True, norm: bool = True, per_image_norm: bool = True):
    """
    Loads an image given its path
    and returns it as a numpy array.
    """
    
    img = Image.open(path)
    img_array = np.array(img)
    if hu_scale:
        hu_min = -160
        hu_max = 240
        return convert_to_hu(img_array, norm, hu_min, hu_max)
    elif norm:
        return normalize(img_array, per_image_norm)
    return img_array

def save_image(img_array: NDArray[Any], path: str):
    """
    Saves the input image to the specified path.
    """

    if img_array.dtype != np.uint16:
        img_array = img_array.astype(np.uint16)
    img = Image.fromarray(img_array)
    img.save(path)

def show_image(img: NDArray[np.float32], title="Example Image", cmap="gray"):
    """
    Shows the image given its data, title and colour map.
    """

    plt.figure(figsize=(5, 5))
    plt.imshow(img, cmap=cmap)
    if title is not None:
        plt.title(title)
    plt.axis("off")
    plt.show()


# Convert DeepLesion metadata to COCO format

In [8]:
import os
import json
import pandas as pd

deeplesion_metadata = load_metadata("../data/deeplesion_metadata.csv")

images = []
annotations = []
categories = [
    {"id": 1, "name": "bone"},
    {"id": 2, "name": "abdomen"},
    {"id": 3, "name": "mediastinum"},
    {"id": 4, "name": "liver"},
    {"id": 5, "name": "lung"},
    {"id": 6, "name": "kidney"},
    {"id": 7, "name": "soft tissue"},
    {"id": 8, "name": "pelvis"}
]

image_id = 1
annotation_id = 1

for idx, row in deeplesion_metadata.iterrows():
    # Extract only images with annotated lesions (Val + Test)
    if row["Train_Val_Test"] == 1:
        continue

    file_name = row["File_name"]
    lesion_type = row["Coarse_lesion_type"]
    bbox_str = row["Bounding_boxes"]
    size_str = row["Image_size"]
    image_path = os.path.join("../data/deeplesion/key_slices/", file_name)

    # Extract ground truth bounding box' coordinates
    bbox_coords = [float(val) for val in bbox_str.split(",")]
    x1, y1, x2, y2 = [round(c) for c in bbox_coords]

    # Extract sizes
    bbox_sizes = [int(val) for val in size_str.split(",")]
    width, height = [size for size in bbox_sizes]

    # Register image
    images.append({
        "id": image_id,
        "file_name": file_name,
        "width": width,
        "height": height
    })

    # Initialize ground truth bounding box' parameters
    bbox = [x1, y1, x2 - x1, y2 - y1]
    area = bbox[2] * bbox[3]

    annotations.append({
        "id": annotation_id,
        "image_id": image_id,
        "category": lesion_type,
        "bbox": bbox,
        "area": area,
        "iscrowd": 0    # Normal object (not crowd of indistinct objects, that can't be cleanly separated)
    })

    annotation_id += 1
    image_id += 1

# Save to JSON
deeplesion_coco_format = {
    "images": images,
    "annotations": annotations,
    "categories": categories
}

with open("../data/deeplesion_coco.json", "w") as f:
    json.dump(deeplesion_coco_format, f, indent=2)



# Train YOLOv5

In [None]:
# Convert COCO JSON to YOLOv5 .txt format
# TODO