# Object Detection Pipeline - Model Experimentation

This notebook demonstrates the complete pipeline for training different YOLO models on aerial imagery.

In [None]:
from pathlib import Path
from dl4cv_oda import (
    clean_osm_data,
    clip_labels_to_tiles,
    convert_to_yolo_format,
    create_train_val_split,
    create_yolo_config,
    train_model,
)

## Configuration

In [None]:
DATA_DIR = Path("../data")
RAW_DIR = DATA_DIR / "raw"
CHIPS_DIR = DATA_DIR / "chips"
LABELS_DIR = DATA_DIR / "labels"
YOLO_DIR = DATA_DIR / "yolo"

OSM_FILE = RAW_DIR / "kolovai-trees.geojson"
CLEANED_FILE = RAW_DIR / "cleaned.geojson"
TREES_BOX_FILE = DATA_DIR / "trees_box.geojson"
TILES_FILE = DATA_DIR / "tiles.geojson"

## Step 1: Clean OSM Data

In [None]:
if not CLEANED_FILE.exists():
    count = clean_osm_data(str(OSM_FILE), str(CLEANED_FILE), str(TREES_BOX_FILE))
    print(f"Processed {count} trees")
else:
    print("Cleaned data already exists")

## Step 2: Clip Labels to Tiles

Assumes tiles have already been downloaded.

In [None]:
if TILES_FILE.exists():
    stats = clip_labels_to_tiles(
        str(TREES_BOX_FILE), 
        str(TILES_FILE), 
        str(LABELS_DIR)
    )
    print(f"Processed {stats['processed']} tiles, {stats['total_trees']} trees")
else:
    print("Tiles file not found. Run tiling step first.")

## Step 3: Convert to YOLO Format

In [None]:
class_mapping = convert_to_yolo_format(
    str(TREES_BOX_FILE),
    str(CHIPS_DIR),
    str(LABELS_DIR),
    str(YOLO_DIR),
    target_species="Coconut",
)

print("Class mapping:", class_mapping)

## Step 4: Create Train/Val Split

In [None]:
train_count, val_count = create_train_val_split(
    str(LABELS_DIR),
    str(CHIPS_DIR),
    str(YOLO_DIR),
    train_ratio=0.8,
)

print(f"Train: {train_count} | Val: {val_count}")

## Step 5: Create YOLO Config

In [None]:
config_file = create_yolo_config(str(YOLO_DIR), class_mapping)
print(f"Config saved to: {config_file}")

## Experiment 1: YOLOv8n (nano)

In [None]:
results_n = train_model(
    str(config_file),
    epochs=100,
    imgsz=256,
    batch=16,
    name="coconut_yolov8n",
    model_name="yolov8n.pt",
)

## Experiment 2: YOLOv8s (small)

In [None]:
results_s = train_model(
    str(config_file),
    epochs=100,
    imgsz=256,
    batch=16,
    name="coconut_yolov8s",
    model_name="yolov8s.pt",
)

## Experiment 3: YOLOv8m (medium)

In [None]:
results_m = train_model(
    str(config_file),
    epochs=100,
    imgsz=256,
    batch=8,
    name="coconut_yolov8m",
    model_name="yolov8m.pt",
)

## Compare Results

Check the `runs/detect/` directory for training results and metrics.

In [None]:
import pandas as pd
from pathlib import Path

runs_dir = Path("runs/detect")
if runs_dir.exists():
    experiments = sorted([d.name for d in runs_dir.iterdir() if d.is_dir()])
    print("Available experiments:")
    for exp in experiments:
        print(f"  - {exp}")
        results_csv = runs_dir / exp / "results.csv"
        if results_csv.exists():
            df = pd.read_csv(results_csv)
            if len(df) > 0:
                last_row = df.iloc[-1]
                print(f"    mAP50: {last_row.get('metrics/mAP50(B)', 'N/A')}")
                print(f"    mAP50-95: {last_row.get('metrics/mAP50-95(B)', 'N/A')}")

## Custom Experiment

Run your own experiment with custom parameters:

In [None]:
MODEL = "yolov8n.pt"
EPOCHS = 50
BATCH = 16
IMG_SIZE = 256
EXPERIMENT_NAME = "custom_experiment"

results_custom = train_model(
    str(config_file),
    epochs=EPOCHS,
    imgsz=IMG_SIZE,
    batch=BATCH,
    name=EXPERIMENT_NAME,
    model_name=MODEL,
)