## Setup

In [1]:
!pip install -q -U -r requirements.txt

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [1]:
import os
from pathlib import Path
from getpass import getpass
from datasets import load_dataset

import pandas as pd
import shutil
from tqdm.auto import tqdm
import ast
import pybboxes as pbx

import ultralytics
from ultralytics import YOLO
from ultralytics.utils.torch_utils import get_flops, get_num_params

import wandb


os.environ["WANDB_PROJECT"] = "object-detection-yolov8"
os.environ["WANDB_NOTEBOOK_NAME"] = "yolov8_bloodcells_finetuning.ipynb"

In [13]:
def on_pretrain_routine_start(trainer):
    wandb.init(
        project=os.getenv("WANDB_PROJECT"), 
        # project=trainer.args.project or "YOLOv8",
        job_type="training",
        name=trainer.args.name, 
        save_code=False,
        config=dict(trainer.args)
    )
    # ) if not wandb.run else wandb.run


def on_fit_epoch_end(trainer):
    wandb.run.log(trainer.metrics, step=trainer.epoch + 1)

    if trainer.epoch == 0:
        model_info = {
            "model/parameters": get_num_params(trainer.model),
            "model/GFLOPs": round(get_flops(trainer.model), 3),
            "model/speed(ms)": round(sum(trainer.validator.speed.values()), 3)
        }
        wandb.run.log(model_info, step=trainer.epoch + 1)

 
def on_train_epoch_end(trainer):
    wandb.run.log(trainer.label_loss_items(trainer.tloss, prefix="train"), step=trainer.epoch + 1)
    wandb.run.log(trainer.lr, step=trainer.epoch + 1)

    if trainer.epoch == 0:
        print(f"[INFO] Logging atraining artifacts for Epoch {trainer.epoch + 1}...")
        # wandb.run.log({f.stem: wandb.Image(str(f)) for f in trainer.save_dir.glob('train_batch*.jpg')}, step=trainer.epoch + 1)
        images = [wandb.Image(str(img_path), caption=img_path.stem) for img_path in Path(trainer.save_dir).glob("*.jpg")]
        wandb.run.log({f"training artifacts": images}, step=trainer.epoch + 1)


def on_train_end(trainer):
    art = wandb.Artifact(type="model", name=f"run_{wandb.run.id}_model")
    if trainer.best.exists():
        art.add_file(trainer.best)
        wandb.run.log_artifact(art)
    


def teardown(trainer):
    wandb.finish()

def on_predict_start(predictor):
    wandb.init(
        project=os.getenv("WANDB_PROJECT"), 
        # name=predictor.args.name,
        job_type="prediction",
        tags=["prediction", predictor.args.name],
        save_code=True,
        config=dict(predictor.args)
    )
    # ) if not wandb.run else wandb.run

def on_predict_end(predictor):
    split = Path(predictor.data_path).parent.stem
    images = [wandb.Image(str(img_path), caption=img_path.stem) for img_path in Path(predictor.save_dir).glob("*.jpg")]
    wandb.run.log({f"{split} set | {predictor.args.name} | pred": images})
    wandb.finish()


callbacks = {
    "on_pretrain_routine_start": on_pretrain_routine_start,
    "teardown": teardown,
    "on_train_epoch_end": on_train_epoch_end,
    "on_fit_epoch_end": on_fit_epoch_end,
    "on_train_end": on_train_end,
    "on_predict_start": on_predict_start,
    "on_predict_end": on_predict_end,
} if wandb else {}

In [3]:
dataset = load_dataset("keremberke/blood-cell-object-detection", "full")

In [4]:
!rm -rf /workspace/object-detection-balloons/datasets/bloodcells
dataset_dir = "/workspace/object-detection-balloons/datasets/bloodcells"
os.makedirs(dataset_dir, exist_ok=True)

In [5]:
for split in dataset:
    for row in dataset[split]:
        img_path = Path(os.path.join(dataset_dir, "images", split, str(row["image_id"])) + ".jpg")
        os.makedirs(img_path.parent, exist_ok=True)
        row["image"].save(img_path)
        for bbox, category in zip(row["objects"]["bbox"], row["objects"]["category"]):
            bbox_yolo = pbx.convert_bbox(bbox, from_type="coco", to_type="yolo", image_size=(row["width"], row["height"]))
            bbox_yolo = " ".join([str(bb) for bb in bbox_yolo])
            file_path = Path(os.path.join(dataset_dir, "labels", split, str(row["image_id"])) + ".txt")
            os.makedirs(file_path.parent, exist_ok=True)
            with open(file_path, "a") as f:
                f.write(f"{category} {bbox_yolo}\n")

In [6]:
# ultralytics.checks()

In [7]:
train_img_base_path = "/workspace/object-detection-balloons/datasets/bloodcells/images/train"
train_img_paths = [os.path.join(train_img_base_path, fname) for fname in os.listdir(train_img_base_path)]
print(train_img_paths[:3])

val_img_base_path = "/workspace/object-detection-balloons/datasets/bloodcells/images/validation"
val_img_paths = [os.path.join(val_img_base_path, fname) for fname in os.listdir(val_img_base_path)]
print(val_img_paths[:3])

test_img_base_path = "/workspace/object-detection-balloons/datasets/bloodcells/images/test"
test_img_paths = [os.path.join(test_img_base_path, fname) for fname in os.listdir(test_img_base_path)]
print(test_img_paths[:3])

['/workspace/object-detection-balloons/datasets/bloodcells/images/train/3.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/train/129.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/train/153.jpg']
['/workspace/object-detection-balloons/datasets/bloodcells/images/validation/51.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/validation/14.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/validation/48.jpg']
['/workspace/object-detection-balloons/datasets/bloodcells/images/test/10.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/test/32.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/test/34.jpg']


## Log images on WANDB

In [8]:
run = wandb.init(job_type="dataset")

class_id_to_label = {
    0: "platelets",
    1: "rbc",
    2: "wbc",
}

images = []
for split in tqdm(dataset):
    for row in dataset[split]:
        positions = [
            dict(zip(("minX", "minY", "maxX", "maxY"), pbx.convert_bbox(bbox, from_type="coco", to_type="voc", image_size=(row["width"], row["height"]))))
            for bbox in row["objects"]["bbox"]
        ]

        class_ids = row["objects"]["category"]
        box_captions = [class_id_to_label[id] for id in class_ids]

        box_data = [dict(zip(("position", "class_id", "box_caption", "domain"), x)) for x in zip(positions, class_ids, box_captions, ["pixel"] * len(class_ids))]

        images.append(wandb.Image(row["image"], caption=str(row["image_id"]), boxes={"ground_truth": {"box_data": box_data}}))

    # run.finish()
    # break
    run.log({f"{split}_set": images})
    images = []

run.finish()

[34m[1mwandb[0m: Currently logged in as: [33mmatt24[0m. Use [1m`wandb login --relogin`[0m to force relogin


  0%|          | 0/3 [00:00<?, ?it/s]



VBox(children=(Label(value='27.942 MB of 32.702 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.8544…

## Baseline

In [9]:
checkpoint = 'yolov8n.pt'

In [10]:
yolo_base = YOLO(checkpoint)

for cb_name, fb_fn in callbacks.items():
    yolo_base.add_callback(cb_name, fb_fn)

preds = yolo_base.predict(val_img_paths, save=True, project="preds", name="baseline")




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011112656858232286, max=1.0…

0: 640x640 1 toothbrush, 1: 640x640 (no detections), 2: 640x640 1 person, 3: 640x640 1 teddy bear, 4: 640x640 1 person, 5: 640x640 (no detections), 6: 640x640 2 persons, 7: 640x640 (no detections), 8: 640x640 (no detections), 9: 640x640 (no detections), 10: 640x640 1 person, 11: 640x640 1 person, 12: 640x640 2 persons, 13: 640x640 (no detections), 14: 640x640 (no detections), 15: 640x640 1 person, 16: 640x640 (no detections), 17: 640x640 (no detections), 18: 640x640 1 person, 1 sports ball, 19: 640x640 1 person, 20: 640x640 1 toothbrush, 21: 640x640 1 person, 22: 640x640 (no detections), 23: 640x640 1 toothbrush, 24: 640x640 1 person, 1 toothbrush, 25: 640x640 1 donut, 26: 640x640 2 persons, 27: 640x640 (no detections), 28: 640x640 (no detections), 29: 640x640 (no detections), 30: 640x640 2 sports balls, 1 toothbrush, 31: 640x640 1 donut, 32: 640x640 1 sports ball, 33: 640x640 1 person, 1 sports ball, 34: 640x640 (no detections), 35: 640x640 (no detections), 36: 640x640 (no detections)

## Finetuning

In [14]:
yolo_finetuned = YOLO(checkpoint)  # load a pretrained model (recommended for training)
for cb_name, cb_fn in callbacks.items():
    yolo_finetuned.add_callback(cb_name, cb_fn)

dataset_yaml_path = "/workspace/object-detection-balloons/bloodcells.yaml"

In [15]:
# Use the model
results = yolo_finetuned.train(data=dataset_yaml_path, epochs=2, batch=16)  # train the model

Ultralytics YOLOv8.0.187 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/workspace/object-detection-balloons/bloodcells.yaml, epochs=2, patience=50, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, b

Freezing layer 'model.22.dfl.conv.weight'
[34m[1mAMP: [0mrunning Automatic Mixed Precision (AMP) checks with YOLOv8n...
[34m[1mAMP: [0mchecks passed ✅
[34m[1mtrain: [0mScanning /workspace/object-detection-balloons/datasets/bloodcells/labels/train.cache... 255 images, 0 backgrounds, 0 corrupt: 100%|██████████| 255/255 [00:00<?, ?it/s]
[34m[1mval: [0mScanning /workspace/object-detection-balloons/datasets/bloodcells/labels/validation.cache... 73 images, 0 backgrounds, 0 corrupt: 100%|██████████| 73/73 [00:00<?, ?it/s]
Plotting labels to runs/detect/train7/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.001429, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 8 dataloader workers
Logging results to [1mruns/detect/train7[0m
Sta

[INFO] Logging atraining artifacts for Epoch 1...


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  7.06it/s]
                   all         73        967     0.0438       0.63      0.186     0.0951

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size
        2/2      2.43G      1.311      2.002      1.385        358        640: 100%|██████████| 16/16 [00:01<00:00, 11.41it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:00<00:00,  7.13it/s]
                   all         73        967     0.0495      0.628      0.501      0.337

2 epochs completed in 0.002 hours.
Optimizer stripped from runs/detect/train7/weights/last.pt, 6.2MB
Optimizer stripped from runs/detect/train7/weights/best.pt, 6.2MB

Validating runs/detect/train7/weights/best.pt...
Ultralytics YOLOv8.0.187 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
Model summary (fused): 168 

0,1
lr/pg0,▁█
lr/pg1,▁█
lr/pg2,▁█
metrics/mAP50(B),▁█
metrics/mAP50-95(B),▁█
metrics/precision(B),▁█
metrics/recall(B),█▁
model/GFLOPs,▁
model/parameters,▁
model/speed(ms),▁

0,1
lr/pg0,0.00022
lr/pg1,0.00022
lr/pg2,0.00022
metrics/mAP50(B),0.50209
metrics/mAP50-95(B),0.33903
metrics/precision(B),0.04939
metrics/recall(B),0.62832
model/GFLOPs,0.0
model/parameters,3011433.0
model/speed(ms),3.275


## Evaluation

### Validation Eval

In [14]:
!rm -rf /workspace/object-detection-balloons/preds/val
preds = yolo_finetuned.predict(val_img_paths, save=True, project="preds", name="val") 




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.01111417164405187, max=1.0)…

0: 640x640 15 rbcs, 1 wbc, 1: 640x640 13 rbcs, 1 wbc, 2: 640x640 2 plateletss, 16 rbcs, 1 wbc, 3: 640x640 14 rbcs, 1 wbc, 4: 640x640 2 plateletss, 12 rbcs, 1 wbc, 5: 640x640 21 rbcs, 1 wbc, 6: 640x640 11 rbcs, 1 wbc, 7: 640x640 1 platelets, 11 rbcs, 1 wbc, 8: 640x640 11 rbcs, 1 wbc, 9: 640x640 3 plateletss, 18 rbcs, 1 wbc, 10: 640x640 12 rbcs, 2 wbcs, 11: 640x640 13 rbcs, 1 wbc, 12: 640x640 16 rbcs, 1 wbc, 13: 640x640 1 platelets, 9 rbcs, 1 wbc, 14: 640x640 4 plateletss, 17 rbcs, 1 wbc, 15: 640x640 1 platelets, 13 rbcs, 1 wbc, 16: 640x640 1 platelets, 13 rbcs, 1 wbc, 17: 640x640 1 platelets, 21 rbcs, 1 wbc, 18: 640x640 1 platelets, 20 rbcs, 1 wbc, 19: 640x640 1 platelets, 23 rbcs, 1 wbc, 20: 640x640 17 rbcs, 1 wbc, 21: 640x640 19 rbcs, 1 wbc, 22: 640x640 18 rbcs, 1 wbc, 23: 640x640 24 rbcs, 1 wbc, 24: 640x640 1 platelets, 15 rbcs, 1 wbc, 25: 640x640 18 rbcs, 1 wbc, 26: 640x640 3 plateletss, 16 rbcs, 1 wbc, 27: 640x640 14 rbcs, 1 wbc, 28: 640x640 20 rbcs, 1 wbc, 29: 640x640 16 rbcs, 1 w

In [16]:
metrics = yolo_finetuned.val(data="/workspace/object-detection-balloons/bloodcells.yaml", split="val")

Ultralytics YOLOv8.0.187 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mval: [0mScanning /workspace/object-detection-balloons/datasets/bloodcells/labels/validation.cache... 73 images, 0 backgrounds, 0 corrupt: 100%|██████████| 73/73 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 5/5 [00:02<00:00,  2.20it/s]
                   all         73        967      0.828      0.901      0.928      0.651
             platelets         73         76      0.878      0.758      0.913      0.505
                   rbc         73        819       0.66      0.946      0.882      0.631
                   wbc         73         72      0.945          1      0.987      0.819
Speed: 1.4ms preprocess, 1.8ms inference, 0.0ms loss, 2.1ms postprocess per image
Results saved to [1mruns/detect/val15[0m


### Test Eval

In [None]:
!rm -rf /workspace/object-detection-balloons/preds/test
preds = yolo_finetuned.predict(test_img_paths, save=True, project="preds", name="test") 




VBox(children=(Label(value='Waiting for wandb.init()...\r'), FloatProgress(value=0.011114031987057791, max=1.0…

0: 640x640 1 platelets, 22 rbcs, 1 wbc, 1: 640x640 2 plateletss, 15 rbcs, 2 wbcs, 2: 640x640 14 rbcs, 1 wbc, 3: 640x640 3 plateletss, 14 rbcs, 1 wbc, 4: 640x640 17 rbcs, 1 wbc, 5: 640x640 1 platelets, 18 rbcs, 1 wbc, 6: 640x640 1 platelets, 14 rbcs, 1 wbc, 7: 640x640 12 rbcs, 1 wbc, 8: 640x640 1 platelets, 11 rbcs, 1 wbc, 9: 640x640 2 plateletss, 8 rbcs, 1 wbc, 10: 640x640 1 platelets, 13 rbcs, 1 wbc, 11: 640x640 1 platelets, 13 rbcs, 1 wbc, 12: 640x640 1 platelets, 17 rbcs, 1 wbc, 13: 640x640 19 rbcs, 1 wbc, 14: 640x640 14 rbcs, 2 wbcs, 15: 640x640 14 rbcs, 1 wbc, 16: 640x640 16 rbcs, 1 wbc, 17: 640x640 14 rbcs, 1 wbc, 18: 640x640 2 plateletss, 16 rbcs, 1 wbc, 19: 640x640 19 rbcs, 1 wbc, 20: 640x640 1 platelets, 15 rbcs, 1 wbc, 21: 640x640 17 rbcs, 1 wbc, 22: 640x640 2 plateletss, 19 rbcs, 1 wbc, 23: 640x640 17 rbcs, 1 wbc, 24: 640x640 17 rbcs, 1 wbc, 25: 640x640 22 rbcs, 1 wbc, 26: 640x640 14 rbcs, 1 wbc, 27: 640x640 2 plateletss, 18 rbcs, 1 wbc, 28: 640x640 19 rbcs, 1 wbc, 29: 640x6

wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.
wandb: Network error (SSLError), entering retry loop.


In [17]:
metrics = yolo_finetuned.val(data="/workspace/object-detection-balloons/bloodcells.yaml", split="test")

Ultralytics YOLOv8.0.187 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mval: [0mScanning /workspace/object-detection-balloons/datasets/bloodcells/labels/test... 36 images, 0 backgrounds, 0 corrupt: 100%|██████████| 36/36 [00:00<00:00, 795.56it/s]
[34m[1mval: [0mNew cache created: /workspace/object-detection-balloons/datasets/bloodcells/labels/test.cache
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 3/3 [00:01<00:00,  2.03it/s]
                   all         36        471      0.819      0.859      0.902      0.613
             platelets         36         36      0.835      0.694      0.866      0.437
                   rbc         36        398      0.693      0.909      0.868      0.621
                   wbc         36         37      0.929      0.973      0.973      0.783
Speed: 2.1ms preprocess, 3.3ms inference, 0.0ms loss, 2.6ms postprocess per image
Results saved to [1mruns/dete