## Define Input Data

In [1]:
import os

# Define the folder containing .spm files
folder_path = r"C:\Users\cobia\OneDrive - University of Cambridge\Python\afm_data\all_images_03_08_2025"

spm_files = [f for f in os.listdir(folder_path) if f.endswith('.spm')]

## Modules and Preprocessing Functions

In [2]:
import cv2
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from detectron2.structures import BoxMode
from detectron2.data import DatasetCatalog, MetadataCatalog

def generate_circular_mask(center_x, center_y, radius, shape=(512, 512)):
    Y, X = np.ogrid[:shape[0], :shape[1]]
    dist_from_center = np.sqrt((X - center_x) ** 2 + (Y - center_y) ** 2)
    return dist_from_center <= radius

def load_afm_dataset(folder_path, spm_files):
    dataset = []
    category_to_id = {"small": 0, "large": 1}

    for idx, filename in enumerate(spm_files):
        afm_file = os.path.join(folder_path, f"{filename}_corrected.csv")
        label_file = os.path.join(folder_path, f"{filename}_categorised.csv")

        afm_data = pd.read_csv(afm_file).values
        label_data = pd.read_csv(label_file)

        # Create image
        image = (afm_data - np.min(afm_data)) / (np.max(afm_data) - np.min(afm_data)) * 255
        image = image.astype(np.uint8)
        image = cv2.cvtColor(image, cv2.COLOR_GRAY2BGR)
        img_path = os.path.join(folder_path, f"{filename}.png")
        cv2.imwrite(img_path, image)

        record = {
            "file_name": img_path,
            "image_id": idx,
            "height": 512,
            "width": 512,
            "annotations": []
        }

        for _, row in label_data.iterrows():
            if row['category'] == 'combination':
                continue

            x, y, area, cat = row['x'], row['y'], row['size'], row['category']
            radius = np.sqrt(area / np.pi)

            mask = generate_circular_mask(x, y, radius)
            contours, _ = cv2.findContours(mask.astype(np.uint8), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

            if len(contours) == 0:
                continue

            segmentation = contours[0].flatten().tolist()
            bbox = cv2.boundingRect(contours[0])

            record["annotations"].append({
                "bbox": list(bbox),
                "bbox_mode": BoxMode.XYWH_ABS,
                "segmentation": [segmentation],
                "category_id": category_to_id[cat],
                "iscrowd": 0
            })

        dataset.append(record)
    return dataset


## Register Dataset

In [3]:
from detectron2.data import DatasetCatalog, MetadataCatalog

def register_afm_dataset(folder_path, spm_files):
    DatasetCatalog.register("afm_spots", lambda: load_afm_dataset(folder_path, spm_files))
    MetadataCatalog.get("afm_spots").set(thing_classes=["small", "large"])


## Train with Detectron2 (Mask R-CNN)

In [4]:
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.utils.visualizer import Visualizer

def train_model(output_dir="./output"):
    cfg = get_cfg()
    cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))

    cfg.DATASETS.TRAIN = ("afm_spots",)
    cfg.DATASETS.TEST = ()
    cfg.DATALOADER.NUM_WORKERS = 2
    cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
    cfg.SOLVER.IMS_PER_BATCH = 2
    cfg.SOLVER.BASE_LR = 0.00025
    cfg.SOLVER.MAX_ITER = 1000
    cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
    cfg.MODEL.ROI_HEADS.NUM_CLASSES = 2
    cfg.OUTPUT_DIR = output_dir

    os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
    trainer = DefaultTrainer(cfg)
    trainer.resume_or_load(resume=False)
    trainer.train()
    return cfg


## Visualize and Evaluate

In [5]:
from detectron2.engine import DefaultPredictor
from sklearn.metrics import classification_report
from detectron2.utils.visualizer import ColorMode

def evaluate_model(cfg, dataset_dicts):
    predictor = DefaultPredictor(cfg)
    metadata = MetadataCatalog.get("afm_spots")

    y_true, y_pred = [], []

    for d in dataset_dicts:
        im = cv2.imread(d["file_name"])
        outputs = predictor(im)
        pred_classes = outputs["instances"].pred_classes.cpu().numpy()

        gt_classes = [ann["category_id"] for ann in d["annotations"]]
        y_true.extend(gt_classes)
        y_pred.extend(pred_classes[:len(gt_classes)])

        v = Visualizer(im[:, :, ::-1],
                       metadata=metadata,
                       scale=1.0,
                       instance_mode=ColorMode.IMAGE_BW)
        out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
        plt.figure(figsize=(8, 8))
        plt.imshow(out.get_image())
        plt.axis("off")
        plt.show()

    print(classification_report(y_true, y_pred, target_names=["small", "large"]))


## Full Execution

In [6]:
register_afm_dataset(folder_path, spm_files)
train_cfg = train_model()
evaluate_model(train_cfg, load_afm_dataset(folder_path, spm_files))


AssertionError: Torch not compiled with CUDA enabled