In [2]:
!pip install -r requirements.txt

Collecting ultralytics (from -r requirements.txt (line 1))
  Downloading ultralytics-8.0.166-py3-none-any.whl (612 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m612.3/612.3 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting kaggle (from -r requirements.txt (line 2))
  Downloading kaggle-1.5.16.tar.gz (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.6/83.6 kB[0m [31m13.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
[?25hCollecting pybboxes (from -r requirements.txt (line 3))
  Downloading pybboxes-0.1.6-py3-none-any.whl (24 kB)
Collecting datasets (from -r requirements.txt (line 4))
  Downloading datasets-2.14.4-py3-none-any.whl (519 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m519.3/519.3 kB[0m [31m23.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting matplotlib>=3.2.2 (from ultralytics->-r requirements.txt (line 1))
  Downloading matplotlib-3.7.2-c

In [1]:
import os
from pathlib import Path
from getpass import getpass
from datasets import load_dataset

import pandas as pd
import shutil
from tqdm.auto import tqdm
import ast
import pybboxes as pbx

import ultralytics
from ultralytics import YOLO

import random

In [2]:
dataset = load_dataset("keremberke/blood-cell-object-detection", "full")

In [3]:
categories = {
    "id": []
    "split": [],
    "category": [],
}

for split in dataset:
    for row in dataset[split]:


{'image_id': 3,
 'image': <PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=416x416>,
 'width': 416,
 'height': 416,
 'objects': {'id': [28,
   29,
   30,
   31,
   32,
   33,
   34,
   35,
   36,
   37,
   38,
   39,
   40,
   41,
   42,
   43,
   44,
   45,
   46],
  'area': [20234,
   5232,
   6292,
   3990,
   3926,
   3926,
   5103,
   5575,
   4956,
   4956,
   2875,
   3504,
   7134,
   3944,
   6499,
   5664,
   6867,
   5400,
   6350],
  'bbox': [[44.0, 273.0, 141.5, 143.0],
   [225.0, 313.0, 65.0, 80.5],
   [34.0, 155.0, 60.5, 104.0],
   [291.0, 346.0, 57.0, 70.0],
   [300.0, 114.0, 56.5, 69.5],
   [295.0, 256.0, 56.5, 69.5],
   [271.0, 245.0, 59.0, 86.5],
   [181.0, 296.0, 59.0, 94.5],
   [355.0, 54.0, 59.0, 84.0],
   [316.0, 79.0, 59.0, 84.0],
   [244.0, 148.0, 40.5, 71.0],
   [214.0, 153.0, 43.0, 81.5],
   [189.0, 51.0, 75.5, 94.5],
   [195.0, 1.0, 68.0, 58.0],
   [225.0, 23.0, 67.0, 97.0],
   [87.0, 1.0, 69.5, 81.5],
   [1.0, 33.0, 63.0, 109.0],
   [107.0, 139.0, 60.

In [4]:
dataset_dir = "/workspace/object-detection-balloons/datasets/bloodcells"
os.makedirs(dataset_dir, exist_ok=True)

In [5]:
# rm -rf /workspace/object-detection-balloons/datasets/bloodcells

In [12]:
for split in dataset:
    for row in dataset[split]:
        img_path = Path(os.path.join(dataset_dir, "images", split, str(row["image_id"])) + ".jpg")
        os.makedirs(img_path.parent, exist_ok=True)
        row["image"].save(img_path)
        for bbox, category in zip(row["objects"]["bbox"], row["objects"]["category"]):
            bbox_yolo = pbx.convert_bbox(bbox, from_type="coco", to_type="yolo", image_size=(row["width"], row["height"]))
            bbox_yolo = " ".join([str(bb) for bb in bbox_yolo])
            file_path = Path(os.path.join(dataset_dir, "labels", split, str(row["image_id"])) + ".txt")
            os.makedirs(file_path.parent, exist_ok=True)
            with open(file_path, "a") as f:
                f.write(f"{category} {bbox_yolo}\n")

In [7]:
ultralytics.checks()

Ultralytics YOLOv8.0.166 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3090, 24268MiB)
Setup complete ✅ (48 CPUs, 188.7 GB RAM, 1.2/20.0 GB disk)


## Baseline

In [8]:
img_path_base = "/workspace/object-detection-balloons/datasets/bloodcells/images/validation"
val_img_paths = [os.path.join(img_path_base, fname) for fname in os.listdir(img_path_base)]
val_img_paths[:3]

['/workspace/object-detection-balloons/datasets/bloodcells/images/validation/51.jpg',
 '/workspace/object-detection-balloons/datasets/bloodcells/images/validation/14.jpg',
 '/workspace/object-detection-balloons/datasets/bloodcells/images/validation/48.jpg']

In [9]:
yolo_base = YOLO('yolov8n.pt')
preds = yolo_base.predict(val_img_paths, save=True, project="preds", name="baseline") 

Downloading https://github.com/ultralytics/assets/releases/download/v0.0.0/yolov8n.pt to 'yolov8n.pt'...
100%|██████████| 6.23M/6.23M [00:00<00:00, 104MB/s]

0: 640x640 1 toothbrush, 1: 640x640 (no detections), 2: 640x640 1 person, 3: 640x640 1 teddy bear, 4: 640x640 1 person, 5: 640x640 (no detections), 6: 640x640 2 persons, 7: 640x640 (no detections), 8: 640x640 (no detections), 9: 640x640 (no detections), 10: 640x640 1 person, 11: 640x640 1 person, 12: 640x640 2 persons, 13: 640x640 (no detections), 14: 640x640 (no detections), 15: 640x640 1 person, 16: 640x640 (no detections), 17: 640x640 (no detections), 18: 640x640 1 person, 1 sports ball, 19: 640x640 1 person, 20: 640x640 1 toothbrush, 21: 640x640 1 person, 22: 640x640 (no detections), 23: 640x640 1 toothbrush, 24: 640x640 1 person, 1 toothbrush, 25: 640x640 1 donut, 26: 640x640 2 persons, 27: 640x640 (no detections), 28: 640x640 (no detections), 29: 640x640 (no detections), 30: 640x640 2 sports balls, 1 toothbrush, 31: 640x640 

## Finetuning

In [10]:
yolo_finetuned = YOLO('yolov8n.pt')  # load a pretrained model (recommended for training)
dataset_yaml_path = "/workspace/object-detection-balloons/bloodcells.yaml"

# Use the model
results = yolo_finetuned.train(data=dataset_yaml_path, epochs=100, batch=32)  # train the model

Ultralytics YOLOv8.0.166 🚀 Python-3.10.6 torch-2.0.1+cu118 CUDA:0 (NVIDIA GeForce RTX 3090, 24268MiB)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov8n.pt, data=/workspace/object-detection-balloons/bloodcells.yaml, epochs=100, patience=50, batch=32, imgsz=640, save=True, save_period=-1, cache=False, device=None, workers=8, project=None, name=None, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, show=False, save_txt=False, save_conf=False, save_crop=False, show_labels=True, show_conf=True, vid_stride=1, stream_buffer=False, line_width=None, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=Fal

In [11]:
!rm -rf /workspace/object-detection-balloons/preds/finetuned
preds = yolo_finetuned.predict(val_img_paths, save=True, project="preds", name="finetuned") 


0: 640x640 30 rbcs, 1 wbc, 1: 640x640 1 platelets, 23 rbcs, 1 wbc, 2: 640x640 2 plateletss, 30 rbcs, 1 wbc, 3: 640x640 1 platelets, 30 rbcs, 1 wbc, 4: 640x640 2 plateletss, 22 rbcs, 1 wbc, 5: 640x640 1 platelets, 38 rbcs, 1 wbc, 6: 640x640 19 rbcs, 1 wbc, 7: 640x640 2 plateletss, 16 rbcs, 1 wbc, 8: 640x640 16 rbcs, 1 wbc, 9: 640x640 2 plateletss, 22 rbcs, 1 wbc, 10: 640x640 2 plateletss, 17 rbcs, 2 wbcs, 11: 640x640 18 rbcs, 1 wbc, 12: 640x640 2 plateletss, 22 rbcs, 1 wbc, 13: 640x640 1 platelets, 19 rbcs, 1 wbc, 14: 640x640 4 plateletss, 22 rbcs, 1 wbc, 15: 640x640 2 plateletss, 19 rbcs, 1 wbc, 16: 640x640 1 platelets, 15 rbcs, 1 wbc, 17: 640x640 1 platelets, 26 rbcs, 1 wbc, 18: 640x640 1 platelets, 25 rbcs, 1 wbc, 19: 640x640 1 platelets, 27 rbcs, 1 wbc, 20: 640x640 26 rbcs, 1 wbc, 21: 640x640 2 plateletss, 21 rbcs, 1 wbc, 22: 640x640 23 rbcs, 1 wbc, 23: 640x640 26 rbcs, 1 wbc, 24: 640x640 1 platelets, 22 rbcs, 1 wbc, 25: 640x640 29 rbcs, 1 wbc, 26: 640x640 3 plateletss, 26 rbcs, 1 