## Setup

In [1]:
!pip install -q -U -r requirements.txt

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [2]:
import os
from pathlib import Path
from getpass import getpass
from datasets import load_dataset

import pandas as pd
import shutil
from tqdm.auto import tqdm
import ast
import pybboxes as pbx

import ultralytics
from ultralytics import YOLO

import wandb

import random

os.environ["WANDB_PROJECT"] = "object-detection-yolov8"
os.environ["WANDB_NOTEBOOK_NAME"] = "yolov8_bloodcells_finetuning.ipynb"

In [3]:
from ultralytics.yolo.utils.torch_utils import get_flops, get_num_params

# try:
#     import wandb

#     assert hasattr(wandb, '__version__')
# except (ImportError, AssertionError):
#     wandb = None


def on_pretrain_routine_start(trainer):
    wandb.init(
        project=trainer.args.project or "YOLOv8", 
        name=trainer.args.name, 
        save_code=True,
        config=dict(trainer.args)
    ) if not wandb.run else wandb.run


def on_fit_epoch_end(trainer):
    wandb.run.log(trainer.metrics, step=trainer.epoch + 1)

    if trainer.epoch == 0:
        model_info = {
            "model/parameters": get_num_params(trainer.model),
            "model/GFLOPs": round(get_flops(trainer.model), 3),
            "model/speed(ms)": round(sum(trainer.validator.speed.values()), 3)
        }
        wandb.run.log(model_info, step=trainer.epoch + 1)


def on_train_epoch_end(trainer):
    wandb.run.log(trainer.label_loss_items(trainer.tloss, prefix="train"), step=trainer.epoch + 1)
    wandb.run.log(trainer.lr, step=trainer.epoch + 1)
    print(trainer.save_dir)
    if (trainer.epoch + 1) % 1 == 0:
        # wandb.run.log({f.stem: wandb.Image(str(f)) for f in trainer.save_dir.glob('train_batch*.jpg')}, step=trainer.epoch + 1)
        images = [wandb.Image(str(img_path), caption=img_path.stem) for img_path in Path(trainer.save_dir).glob("*.jpg")]
        wandb.run.log({f"train set | {trainer.args.name} | train": images}, step=trainer.epoch + 1)


def on_train_end(trainer):
    art = wandb.Artifact(type="model", name=f"run_{wandb.run.id}_model")
    if trainer.best.exists():
        art.add_file(trainer.best)
        wandb.run.log_artifact(art)
    
    # wandb.run.finish()


def on_predict_start(predictor):
    wandb.init(
        project=os.getenv("WANDB_PROJECT"), 
        # name=predictor.args.name,
        job_type="prediction",
        tags=["prediction", predictor.args.name],
        save_code=True,
        config=dict(predictor.args)
    ) if not wandb.run else wandb.run

def on_predict_end(predictor):
    split = Path(predictor.data_path).parent.stem
    images = [wandb.Image(str(img_path), caption=img_path.stem) for img_path in Path(predictor.save_dir).glob("*.jpg")]
    wandb.run.log({f"{split} set | {predictor.args.name} | pred": images})
    wandb.finish()

    import torch
    ckpt = torch.load("model.pt")  # applies to both official and custom models
    torch.save(ckpt, "updated-model.pt")



In [4]:
callbacks = {
    "on_pretrain_routine_start": on_pretrain_routine_start,
    "on_train_epoch_end": on_train_epoch_end,
    "on_fit_epoch_end": on_fit_epoch_end,
    "on_train_end": on_train_end,
    "on_predict_start": on_predict_start,
    "on_predict_end": on_predict_end,
} if wandb else {}

In [73]:
dataset = load_dataset("keremberke/blood-cell-object-detection", "full")

In [74]:
dataset_dir = "/workspace/object-detection-balloons/datasets/bloodcells"
os.makedirs(dataset_dir, exist_ok=True)

In [76]:
# rm -rf /workspace/object-detection-balloons/datasets/bloodcells

In [77]:
for split in dataset:
    for row in dataset[split]:
        img_path = Path(os.path.join(dataset_dir, "images", split, str(row["image_id"])) + ".jpg")
        os.makedirs(img_path.parent, exist_ok=True)
        row["image"].save(img_path)
        for bbox, category in zip(row["objects"]["bbox"], row["objects"]["category"]):
            bbox_yolo = pbx.convert_bbox(bbox, from_type="coco", to_type="yolo", image_size=(row["width"], row["height"]))
            bbox_yolo = " ".join([str(bb) for bb in bbox_yolo])
            file_path = Path(os.path.join(dataset_dir, "labels", split, str(row["image_id"])) + ".txt")
            os.makedirs(file_path.parent, exist_ok=True)
            with open(file_path, "a") as f:
                f.write(f"{category} {bbox_yolo}\n")

In [7]:
ultralytics.checks()

Ultralytics YOLOv8.0.170 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
Setup complete ✅ (32 CPUs, 94.3 GB RAM, 1.3/20.0 GB disk)


In [8]:
val_img_base_path = "/workspace/object-detection-balloons/datasets/bloodcells/images/validation"
val_img_paths = [os.path.join(val_img_base_path, fname) for fname in os.listdir(val_img_base_path)]
print(val_img_paths[:3])

test_img_base_path = "/workspace/object-detection-balloons/datasets/bloodcells/images/test"
test_img_paths = [os.path.join(test_img_base_path, fname) for fname in os.listdir(test_img_base_path)]
print(test_img_paths[:3])

['/workspace/object-detection-balloons/datasets/bloodcells/images/validation/51.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/validation/14.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/validation/48.jpg']
['/workspace/object-detection-balloons/datasets/bloodcells/images/test/10.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/test/32.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/test/34.jpg']


In [None]:
for split in dataset:
    for row in dataset[split]:
        img_path = Path(os.path.join(dataset_dir, "images", split, str(row["image_id"])) + ".jpg")
        os.makedirs(img_path.parent, exist_ok=True)
        row["image"].save(img_path)
        for bbox, category in zip(row["objects"]["bbox"], row["objects"]["category"]):
            bbox_yolo = pbx.convert_bbox(bbox, from_type="coco", to_type="yolo", image_size=(row["width"], row["height"]))
            bbox_yolo = " ".join([str(bb) for bb in bbox_yolo])
            file_path = Path(os.path.join(dataset_dir, "labels", split, str(row["image_id"])) + ".txt")
            os.makedirs(file_path.parent, exist_ok=True)
            with open(file_path, "a") as f:
                f.write(f"{category} {bbox_yolo}\n")

## Log images on WANDB

In [38]:
run = wandb.init(job_type="dataset")

class_id_to_label = {
    0: "platelets",
    1: "rbc",
    2: "wbc",
}

images = []
for split in tqdm(dataset):
    for row in dataset[split]:
        positions = [
            dict(zip(("minX", "minY", "maxX", "maxY"), pbx.convert_bbox(bbox, from_type="coco", to_type="voc", image_size=(row["width"], row["height"]))))
            for bbox in row["objects"]["bbox"]
        ]

        class_ids = row["objects"]["category"]
        box_captions = [class_id_to_label[id] for id in class_ids]

        box_data = [dict(zip(("position", "class_id", "box_caption", "domain"), x)) for x in zip(positions, class_ids, box_captions, ["pixel"] * len(class_ids))]

        images.append(wandb.Image(row["image"], caption=str(row["image_id"]), boxes={"ground_truth": {"box_data": box_data}}))

    # run.finish()
    # break
    run.log({f"{split}_set": images})
    images = []

run.finish()

  0%|          | 0/3 [00:00<?, ?it/s]



wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.


## Baseline

In [9]:
checkpoint = 'yolov8n.pt'

In [105]:
yolo_base = YOLO(checkpoint)

for cb_name, fb_fn in callbacks.items():
    yolo_base.add_callback(cb_name, fb_fn)

preds = yolo_base.predict(val_img_paths, save=True, project="preds", name="baseline")




0: 640x640 1 toothbrush, 1: 640x640 (no detections), 2: 640x640 1 person, 3: 640x640 1 teddy bear, 4: 640x640 1 person, 5: 640x640 (no detections), 6: 640x640 2 persons, 7: 640x640 (no detections), 8: 640x640 (no detections), 9: 640x640 (no detections), 10: 640x640 1 person, 11: 640x640 1 person, 12: 640x640 2 persons, 13: 640x640 (no detections), 14: 640x640 (no detections), 15: 640x640 1 person, 16: 640x640 (no detections), 17: 640x640 (no detections), 18: 640x640 1 person, 1 sports ball, 19: 640x640 1 person, 20: 640x640 1 toothbrush, 21: 640x640 1 person, 22: 640x640 (no detections), 23: 640x640 1 toothbrush, 24: 640x640 1 person, 1 toothbrush, 25: 640x640 1 donut, 26: 640x640 2 persons, 27: 640x640 (no detections), 28: 640x640 (no detections), 29: 640x640 (no detections), 30: 640x640 2 sports balls, 1 toothbrush, 31: 640x640 1 donut, 32: 640x640 1 sports ball, 33: 640x640 1 person, 1 sports ball, 34: 640x640 (no detections), 35: 640x640 (no detections), 36: 640x640 (no detections)

wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.


## Finetuning

In [10]:
yolo_finetuned = YOLO(checkpoint)  # load a pretrained model (recommended for training)
for cb_name, cb_fn in callbacks.items():
    yolo_finetuned.add_callback(cb_name, cb_fn)

dataset_yaml_path = "/workspace/object-detection-balloons/bloodcells.yaml"

In [13]:
# Use the model
results = yolo_finetuned.train(data=dataset_yaml_path, epochs=2, batch=32)  # train the model

Ultralytics YOLOv8.0.170 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/workspace/object-detection-balloons/bloodcells.yaml, epochs=2, patience=50, batch=32, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, b

Model summary: 225 layers, 3011433 parameters, 3011417 gradients

Transferred 355/355 items from pretrained weights


Freezing layer 'model.22.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks with YOLOv8n...


KeyboardInterrupt: 

In [21]:
wandb.finish()

## Evaluation

### Validation Eval

In [9]:
!rm -rf /workspace/object-detection-balloons/preds/val
preds = yolo_finetuned.predict(val_img_paths, save=True, project="preds", name="val") 


0: 640x640 30 rbcs, 1 wbc, 1: 640x640 1 platelets, 15 rbcs, 1 wbc, 2: 640x640 2 plateletss, 29 rbcs, 1 wbc, 3: 640x640 1 platelets, 24 rbcs, 1 wbc, 4: 640x640 2 plateletss, 21 rbcs, 1 wbc, 5: 640x640 1 platelets, 29 rbcs, 1 wbc, 6: 640x640 15 rbcs, 1 wbc, 7: 640x640 2 plateletss, 14 rbcs, 1 wbc, 8: 640x640 17 rbcs, 1 wbc, 9: 640x640 3 plateletss, 23 rbcs, 1 wbc, 10: 640x640 1 platelets, 17 rbcs, 2 wbcs, 11: 640x640 1 platelets, 16 rbcs, 1 wbc, 12: 640x640 18 rbcs, 1 wbc, 13: 640x640 1 platelets, 20 rbcs, 1 wbc, 14: 640x640 5 plateletss, 23 rbcs, 1 wbc, 15: 640x640 1 platelets, 16 rbcs, 1 wbc, 16: 640x640 1 platelets, 18 rbcs, 1 wbc, 17: 640x640 1 platelets, 27 rbcs, 1 wbc, 18: 640x640 1 platelets, 27 rbcs, 1 wbc, 19: 640x640 1 platelets, 26 rbcs, 1 wbc, 20: 640x640 22 rbcs, 1 wbc, 21: 640x640 2 plateletss, 20 rbcs, 2 wbcs, 22: 640x640 23 rbcs, 1 wbc, 23: 640x640 23 rbcs, 1 wbc, 24: 640x640 1 platelets, 20 rbcs, 1 wbc, 25: 640x640 22 rbcs, 1 wbc, 26: 640x640 3 plateletss, 25 rbcs, 1 wb

### Test Eval

In [10]:
!rm -rf /workspace/object-detection-balloons/preds/test
preds = yolo_finetuned.predict(test_img_paths, save=True, project="preds", name="test") 


0: 640x640 1 platelets, 25 rbcs, 1 wbc, 1: 640x640 3 plateletss, 24 rbcs, 2 wbcs, 2: 640x640 1 platelets, 22 rbcs, 1 wbc, 3: 640x640 4 plateletss, 16 rbcs, 1 wbc, 4: 640x640 2 plateletss, 25 rbcs, 1 wbc, 5: 640x640 2 plateletss, 27 rbcs, 1 wbc, 6: 640x640 1 platelets, 17 rbcs, 1 wbc, 7: 640x640 20 rbcs, 1 wbc, 8: 640x640 1 platelets, 23 rbcs, 1 wbc, 9: 640x640 3 plateletss, 16 rbcs, 2 wbcs, 10: 640x640 2 plateletss, 22 rbcs, 1 wbc, 11: 640x640 1 platelets, 16 rbcs, 1 wbc, 12: 640x640 1 platelets, 27 rbcs, 1 wbc, 13: 640x640 20 rbcs, 1 wbc, 14: 640x640 2 plateletss, 25 rbcs, 1 wbc, 15: 640x640 14 rbcs, 1 wbc, 16: 640x640 22 rbcs, 1 wbc, 17: 640x640 18 rbcs, 1 wbc, 18: 640x640 2 plateletss, 18 rbcs, 1 wbc, 19: 640x640 2 plateletss, 29 rbcs, 2 wbcs, 20: 640x640 1 platelets, 19 rbcs, 1 wbc, 21: 640x640 2 plateletss, 30 rbcs, 1 wbc, 22: 640x640 3 plateletss, 18 rbcs, 1 wbc, 23: 640x640 18 rbcs, 1 wbc, 24: 640x640 21 rbcs, 1 wbc, 25: 640x640 2 plateletss, 22 rbcs, 1 wbc, 26: 640x640 1 plate