## Setup

In [1]:
!pip install -q -U -r requirements.txt

[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.1.2[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython -m pip install --upgrade pip[0m


In [3]:
import os
from pathlib import Path
from datasets import load_dataset

import pandas as pd
from tqdm.auto import tqdm
import pybboxes as pbx

import ultralytics
from ultralytics import YOLO

In [5]:
dataset = load_dataset("keremberke/blood-cell-object-detection", "full")

In [6]:
!rm -rf /workspace/object-detection-balloons/datasets/bloodcells
dataset_dir = "/workspace/object-detection-balloons/datasets/bloodcells"
os.makedirs(dataset_dir, exist_ok=True)

In [7]:
for split in dataset:
    for row in dataset[split]:
        img_path = Path(os.path.join(dataset_dir, "images", split, str(row["image_id"])) + ".jpg")
        os.makedirs(img_path.parent, exist_ok=True)
        row["image"].save(img_path)
        for bbox, category in zip(row["objects"]["bbox"], row["objects"]["category"]):
            bbox_yolo = pbx.convert_bbox(bbox, from_type="coco", to_type="yolo", image_size=(row["width"], row["height"]))
            bbox_yolo = " ".join([str(bb) for bb in bbox_yolo])
            file_path = Path(os.path.join(dataset_dir, "labels", split, str(row["image_id"])) + ".txt")
            os.makedirs(file_path.parent, exist_ok=True)
            with open(file_path, "a") as f:
                f.write(f"{category} {bbox_yolo}\n")

In [8]:
ultralytics.checks()

Ultralytics YOLOv8.0.187 ðŸš€ Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
Setup complete âœ… (32 CPUs, 94.3 GB RAM, 1.3/20.0 GB disk)


In [9]:
train_img_base_path = "/workspace/object-detection-balloons/datasets/bloodcells/images/train"
train_img_paths = [os.path.join(train_img_base_path, fname) for fname in os.listdir(train_img_base_path)]
print(train_img_paths[:3])

val_img_base_path = "/workspace/object-detection-balloons/datasets/bloodcells/images/validation"
val_img_paths = [os.path.join(val_img_base_path, fname) for fname in os.listdir(val_img_base_path)]
print(val_img_paths[:3])

test_img_base_path = "/workspace/object-detection-balloons/datasets/bloodcells/images/test"
test_img_paths = [os.path.join(test_img_base_path, fname) for fname in os.listdir(test_img_base_path)]
print(test_img_paths[:3])

['/workspace/object-detection-balloons/datasets/bloodcells/images/train/3.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/train/129.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/train/153.jpg']
['/workspace/object-detection-balloons/datasets/bloodcells/images/validation/51.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/validation/14.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/validation/48.jpg']
['/workspace/object-detection-balloons/datasets/bloodcells/images/test/10.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/test/32.jpg', '/workspace/object-detection-balloons/datasets/bloodcells/images/test/34.jpg']


## Baseline

In [10]:
checkpoint = 'yolov8n.pt'

In [11]:
yolo_base = YOLO(checkpoint)

preds = yolo_base.predict(val_img_paths, save=True, project="preds", name="baseline")


0: 640x640 1 toothbrush, 1: 640x640 (no detections), 2: 640x640 1 person, 3: 640x640 1 teddy bear, 4: 640x640 1 person, 5: 640x640 (no detections), 6: 640x640 2 persons, 7: 640x640 (no detections), 8: 640x640 (no detections), 9: 640x640 (no detections), 10: 640x640 1 person, 11: 640x640 1 person, 12: 640x640 2 persons, 13: 640x640 (no detections), 14: 640x640 (no detections), 15: 640x640 1 person, 16: 640x640 (no detections), 17: 640x640 (no detections), 18: 640x640 1 person, 1 sports ball, 19: 640x640 1 person, 20: 640x640 1 toothbrush, 21: 640x640 1 person, 22: 640x640 (no detections), 23: 640x640 1 toothbrush, 24: 640x640 1 person, 1 toothbrush, 25: 640x640 1 donut, 26: 640x640 2 persons, 27: 640x640 (no detections), 28: 640x640 (no detections), 29: 640x640 (no detections), 30: 640x640 2 sports balls, 1 toothbrush, 31: 640x640 1 donut, 32: 640x640 1 sports ball, 33: 640x640 1 person, 1 sports ball, 34: 640x640 (no detections), 35: 640x640 (no detections), 36: 640x640 (no detections

In [14]:
metrics = yolo_base.val(data="/workspace/object-detection-balloons/bloodcells.yaml", split="val")

Ultralytics YOLOv8.0.187 ðŸš€ Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mval: [0mScanning /workspace/object-detection-balloons/datasets/bloodcells/labels/validation... 73 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 73/73 [00:00<00:00, 887.25it/s]
[34m[1mval: [0mNew cache created: /workspace/object-detection-balloons/datasets/bloodcells/labels/validation.cache
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 5/5 [00:01<00:00,  2.71it/s]
                   all         73        967   0.000121    0.00439   6.62e-05   3.97e-05
                person         73         76   0.000362     0.0132   0.000199   0.000119
               bicycle         73        819          0          0          0          0
                   car         73         72          0          0          0          0
Speed: 1.6ms preprocess, 2.3ms inference, 0.0ms loss, 1.4

In [15]:
metrics = yolo_base.val(data="/workspace/object-detection-balloons/bloodcells.yaml", split="test")

Ultralytics YOLOv8.0.187 ðŸš€ Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mval: [0mScanning /workspace/object-detection-balloons/datasets/bloodcells/labels/test... 36 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 36/36 [00:00<00:00, 944.56it/s]
[34m[1mval: [0mNew cache created: /workspace/object-detection-balloons/datasets/bloodcells/labels/test.cache
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:01<00:00,  2.55it/s]
                   all         36        471   0.000248    0.00926   0.000136   8.15e-05
                person         36         36   0.000745     0.0278   0.000408   0.000245
               bicycle         36        398          0          0          0          0
                   car         36         37          0          0          0          0
Speed: 2.4ms preprocess, 3.8ms inference, 0.0ms loss, 3.1ms postproce

## Finetuning

In [17]:
yolo_finetuned = YOLO(checkpoint)  # load a pretrained model (recommended for training)

dataset_yaml_path = "/workspace/object-detection-balloons/bloodcells.yaml"

In [18]:
# Use the model
results = yolo_finetuned.train(data=dataset_yaml_path, epochs=50, batch=32)  # train the model

Ultralytics YOLOv8.0.187 ðŸš€ Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/workspace/object-detection-balloons/bloodcells.yaml, epochs=50, patience=50, batch=32, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=Fals

 15                  -1  1     37248  ultralytics.nn.modules.block.C2f             [192, 64, 1]                  
 16                  -1  1     36992  ultralytics.nn.modules.conv.Conv             [64, 64, 3, 2]                
 17            [-1, 12]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 18                  -1  1    123648  ultralytics.nn.modules.block.C2f             [192, 128, 1]                 
 19                  -1  1    147712  ultralytics.nn.modules.conv.Conv             [128, 128, 3, 2]              
 20             [-1, 9]  1         0  ultralytics.nn.modules.conv.Concat           [1]                           
 21                  -1  1    493056  ultralytics.nn.modules.block.C2f             [384, 256, 1]                 
 22        [15, 18, 21]  1    751897  ultralytics.nn.modules.head.Detect           [3, [64, 128, 256]]           
Model summary: 225 layers, 3011433 parameters, 3011417 gradients

Transferred 319/355 it

## Evaluation

### Validation Eval

In [19]:
!rm -rf /workspace/object-detection-balloons/preds/val
preds = yolo_finetuned.predict(val_img_paths, save=True, project="preds", name="val") 


0: 640x640 24 rbcs, 1 wbc, 1: 640x640 1 platelets, 10 rbcs, 1 wbc, 2: 640x640 2 plateletss, 24 rbcs, 1 wbc, 3: 640x640 1 platelets, 21 rbcs, 1 wbc, 4: 640x640 2 plateletss, 14 rbcs, 1 wbc, 5: 640x640 1 platelets, 22 rbcs, 1 wbc, 6: 640x640 15 rbcs, 1 wbc, 7: 640x640 2 plateletss, 12 rbcs, 1 wbc, 8: 640x640 15 rbcs, 1 wbc, 9: 640x640 3 plateletss, 17 rbcs, 1 wbc, 10: 640x640 1 platelets, 11 rbcs, 2 wbcs, 11: 640x640 1 platelets, 13 rbcs, 1 wbc, 12: 640x640 3 plateletss, 16 rbcs, 1 wbc, 13: 640x640 1 platelets, 13 rbcs, 1 wbc, 14: 640x640 4 plateletss, 13 rbcs, 1 wbc, 15: 640x640 3 plateletss, 13 rbcs, 1 wbc, 16: 640x640 1 platelets, 14 rbcs, 1 wbc, 17: 640x640 1 platelets, 22 rbcs, 1 wbc, 18: 640x640 1 platelets, 22 rbcs, 1 wbc, 19: 640x640 1 platelets, 22 rbcs, 1 wbc, 20: 640x640 1 platelets, 19 rbcs, 1 wbc, 21: 640x640 2 plateletss, 20 rbcs, 1 wbc, 22: 640x640 16 rbcs, 1 wbc, 23: 640x640 20 rbcs, 1 wbc, 24: 640x640 1 platelets, 18 rbcs, 1 wbc, 25: 640x640 18 rbcs, 1 wbc, 26: 640x640 

In [20]:
metrics = yolo_finetuned.val(data="/workspace/object-detection-balloons/bloodcells.yaml", split="val")

Ultralytics YOLOv8.0.187 ðŸš€ Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mval: [0mScanning /workspace/object-detection-balloons/datasets/bloodcells/labels/validation.cache... 73 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 73/73 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 3/3 [00:02<00:00,  1.29it/s]
                   all         73        967      0.852      0.907      0.927      0.656
             platelets         73         76       0.79       0.89      0.916      0.532
                   rbc         73        819      0.797      0.832      0.881      0.635
                   wbc         73         72      0.969          1      0.984      0.803
Speed: 2.0ms preprocess, 4.6ms inference, 0.0ms loss, 3.8ms postprocess per image
Results saved to [1mruns/detect/val9[0m


### Test Eval

In [37]:
!rm -rf /workspace/object-detection-balloons/preds/test
preds = yolo_finetuned.predict(test_img_paths, save=True, project="preds", name="test") 


0: 640x640 2 plateletss, 21 rbcs, 1 wbc, 1: 640x640 3 plateletss, 16 rbcs, 2 wbcs, 2: 640x640 1 platelets, 16 rbcs, 1 wbc, 3: 640x640 3 plateletss, 13 rbcs, 1 wbc, 4: 640x640 2 plateletss, 20 rbcs, 1 wbc, 5: 640x640 1 platelets, 21 rbcs, 1 wbc, 6: 640x640 1 platelets, 18 rbcs, 1 wbc, 7: 640x640 15 rbcs, 1 wbc, 8: 640x640 1 platelets, 17 rbcs, 1 wbc, 9: 640x640 3 plateletss, 13 rbcs, 2 wbcs, 10: 640x640 2 plateletss, 17 rbcs, 1 wbc, 11: 640x640 1 platelets, 18 rbcs, 1 wbc, 12: 640x640 1 platelets, 20 rbcs, 1 wbc, 13: 640x640 20 rbcs, 1 wbc, 14: 640x640 2 plateletss, 20 rbcs, 1 wbc, 15: 640x640 16 rbcs, 1 wbc, 16: 640x640 18 rbcs, 1 wbc, 17: 640x640 17 rbcs, 1 wbc, 18: 640x640 2 plateletss, 18 rbcs, 1 wbc, 19: 640x640 3 plateletss, 21 rbcs, 3 wbcs, 20: 640x640 1 platelets, 17 rbcs, 1 wbc, 21: 640x640 2 plateletss, 24 rbcs, 1 wbc, 22: 640x640 4 plateletss, 19 rbcs, 1 wbc, 23: 640x640 18 rbcs, 1 wbc, 24: 640x640 17 rbcs, 1 wbc, 25: 640x640 2 plateletss, 24 rbcs, 1 wbc, 26: 640x640 1 plate

In [38]:
metrics = yolo_finetuned.val(data="/workspace/object-detection-balloons/bloodcells.yaml", split="test")

Ultralytics YOLOv8.0.187 ðŸš€ Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (Tesla V100-FHHL-16GB, 16151MiB)
[34m[1mval: [0mScanning /workspace/object-detection-balloons/datasets/bloodcells/labels/test.cache... 36 images, 0 backgrounds, 0 corrupt: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 36/36 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00,  2.06it/s]
                   all         36        471      0.863      0.863      0.902       0.63
             platelets         36         36       0.82      0.833       0.87      0.477
                   rbc         36        398      0.803      0.781      0.866      0.626
                   wbc         36         37      0.965      0.973      0.969      0.788
Speed: 2.0ms preprocess, 1.7ms inference, 0.0ms loss, 2.3ms postprocess per image
Results saved to [1mruns/detect/val11[0m
