In [15]:
import os
import shutil
import datasets
import supervision as sv

from tqdm import tqdm


In [2]:
fake_ds = sv.DetectionDataset.from_yolo("datasets/scc_cell_detection_fake/train/images", "datasets/scc_cell_detection_fake/train/labels", "datasets/scc_cell_detection_fake/data.yaml")
print(len(fake_ds))

2500


In [12]:
real_ds = datasets.load_dataset("mario-dg/brightfield-microscopy-scc-filtered", cache_dir=".cache/")
print(real_ds)

DatasetDict({
    train: Dataset({
        features: ['image', 'label', 'width', 'height', 'objects', 'well_edge'],
        num_rows: 20558
    })
    validation: Dataset({
        features: ['image', 'label', 'width', 'height', 'objects', 'well_edge'],
        num_rows: 2527
    })
    test: Dataset({
        features: ['image', 'label', 'width', 'height', 'objects', 'well_edge'],
        num_rows: 1200
    })
})


In [39]:
splits = real_ds.keys()
shutil.rmtree("datasets/scc_cell_detection_real", ignore_errors=True)
for split in splits:
    ds_dir = f"datasets/scc_cell_detection_real/{split}"
    images_dir = f"{ds_dir}/images"
    labels_dir = f"{ds_dir}/labels"
    os.makedirs(ds_dir, exist_ok=True)
    os.makedirs(images_dir, exist_ok=True)
    os.makedirs(labels_dir, exist_ok=True)
    for index, item in tqdm(enumerate(real_ds[split]), desc=split, total=len(real_ds[split])):
        image_file = f"{ds_dir}/images/real_cell_image_{index:04d}.png"
        item["image"].save(image_file)
        with open(f"{ds_dir}/labels/real_cell_image_{index:04d}.txt", 'w') as f:
            for det_index, det in enumerate(item['objects']['bbox']):
                f.write(f"{item['objects']['categories'][det_index]} {det[0]} {det[1]} {det[2]} {det[3]}\n")
        if split == "train" and index == 4999:
            break

Images:  24%|██▍       | 5000/20558 [05:48<18:03, 14.36it/s]
Images: 100%|██████████| 2527/2527 [02:56<00:00, 14.32it/s]
Images: 100%|██████████| 1200/1200 [01:23<00:00, 14.35it/s]


In [4]:
real_train_ds = sv.DetectionDataset.from_yolo("datasets/scc_cell_detection_real/train/images", "datasets/scc_cell_detection_real/train/labels", "datasets/scc_cell_detection_real/data.yaml")
real_val_ds = sv.DetectionDataset.from_yolo("datasets/scc_cell_detection_real/validation/images", "datasets/scc_cell_detection_real/validation/labels", "datasets/scc_cell_detection_real/data.yaml")
real_test_ds = sv.DetectionDataset.from_yolo("datasets/scc_cell_detection_real/test/images", "datasets/scc_cell_detection_real/test/labels", "datasets/scc_cell_detection_real/data.yaml")
print(len(real_train_ds))
print(len(real_val_ds))
print(len(real_test_ds))

5000
2527
1200


In [5]:
REAL_10 = int(5000 * 0.9)
REAL_30 = int(5000 * 0.7)
REAL_50 = int(5000 * 0.5)
FAKE_10 = 5000 - REAL_10
FAKE_30 = 5000 - REAL_30
FAKE_50 = 5000 - REAL_50

print(f"{REAL_10=} {FAKE_10=}")
print(f"{REAL_30=} {FAKE_30=}")
print(f"{REAL_50=} {FAKE_50=}")

REAL_10=4500 FAKE_10=500
REAL_30=3500 FAKE_30=1500
REAL_50=2500 FAKE_50=2500


In [13]:
print(sv.DetectionDataset(
            classes=fake_ds.classes,
            images={name: fake_ds.images[name] for name in list(fake_ds.images.keys())[:500]},
            annotations={name: fake_ds.annotations[name] for name in list(fake_ds.images.keys())[:500]},
        ))

DetectionDataset(classes=['cell'], images={'datasets\\scc_cell_detection_fake\\train\\images\\sample_00001_38_png.rf.cd93f83a56e890806bb58cd7b9035cd7.jpg': array([[[157, 160, 158],
        [157, 160, 158],
        [156, 159, 157],
        ...,
        [136, 139, 137],
        [146, 149, 147],
        [158, 161, 159]],

       [[152, 155, 153],
        [152, 155, 153],
        [153, 156, 154],
        ...,
        [139, 142, 140],
        [143, 146, 144],
        [148, 151, 149]],

       [[148, 151, 149],
        [148, 151, 149],
        [149, 152, 150],
        ...,
        [143, 146, 144],
        [140, 143, 141],
        [138, 141, 139]],

       ...,

       [[  8,  11,   9],
        [  8,  11,   9],
        [  8,  11,   9],
        ...,
        [  7,  10,   8],
        [  7,  10,   8],
        [  7,  10,   8]],

       [[  8,  11,   9],
        [  8,  11,   9],
        [  8,  11,   9],
        ...,
        [  7,  10,   8],
        [  7,  10,   8],
        [  7,  10,   8]],

      

In [14]:
scc_cell_detection_10 = sv.DetectionDataset.merge([sv.DetectionDataset(
                                                    classes=fake_ds.classes,
                                                    images={name: fake_ds.images[name] for name in list(fake_ds.images.keys())[:FAKE_10]},
                                                    annotations={name: fake_ds.annotations[name] for name in list(fake_ds.images.keys())[:FAKE_10]},
                                                ), 
                                                    sv.DetectionDataset(
                                                    classes=real_train_ds.classes,
                                                    images={name: real_train_ds.images[name] for name in list(real_train_ds.images.keys())[:REAL_10]},
                                                    annotations={name: real_train_ds.annotations[name] for name in list(real_train_ds.images.keys())[:REAL_10]},
                                                )])
scc_cell_detection_30 = sv.DetectionDataset.merge([sv.DetectionDataset(
                                                    classes=fake_ds.classes,
                                                    images={name: fake_ds.images[name] for name in list(fake_ds.images.keys())[:FAKE_30]},
                                                    annotations={name: fake_ds.annotations[name] for name in list(fake_ds.images.keys())[:FAKE_30]},
                                                    ), 
                                                    sv.DetectionDataset(
                                                    classes=real_train_ds.classes,
                                                    images={name: real_train_ds.images[name] for name in list(real_train_ds.images.keys())[:REAL_30]},
                                                    annotations={name: real_train_ds.annotations[name] for name in list(real_train_ds.images.keys())[:REAL_30]},
                                                )])
scc_cell_detection_50 = sv.DetectionDataset.merge([sv.DetectionDataset(
                                                    classes=fake_ds.classes,
                                                    images={name: fake_ds.images[name] for name in list(fake_ds.images.keys())[:FAKE_50]},
                                                    annotations={name: fake_ds.annotations[name] for name in list(fake_ds.images.keys())[:FAKE_50]},
                                                    ), 
                                                    sv.DetectionDataset(
                                                    classes=real_train_ds.classes,
                                                    images={name: real_train_ds.images[name] for name in list(real_train_ds.images.keys())[:REAL_50]},
                                                    annotations={name: real_train_ds.annotations[name] for name in list(real_train_ds.images.keys())[:REAL_50]},
                                                )])

scc_cell_detection_10.as_yolo("datasets/scc_cell_detection_10/train/images", "datasets/scc_cell_detection_10/train/labels", "datasets/scc_cell_detection_10/data.yaml")
scc_cell_detection_30.as_yolo("datasets/scc_cell_detection_30/train/images", "datasets/scc_cell_detection_30/train/labels", "datasets/scc_cell_detection_30/data.yaml")
scc_cell_detection_50.as_yolo("datasets/scc_cell_detection_50/train/images", "datasets/scc_cell_detection_50/train/labels", "datasets/scc_cell_detection_50/data.yaml")

In [5]:
from ultralytics import YOLO
from ultralytics.utils.callbacks.wb import callbacks as wandb_callbacks
import wandb


run = wandb.init(
    project="Thesis-Research-Detection", 
    name="yolov8m",
    group="scc_cell_detection_test", 
    job_type="det_train", 
    save_code=True, 
    config={
        "model": "yolov8m",
        "dataset": "scc_cell_detection_test",
        "epochs": 20,
        "patience": 10,
        "batch": 8,
        "imgsz": 512,
        "device": "cuda:0",
    })

model = YOLO("models/base/yolov8s.pt")
for cb_event, cb in wandb_callbacks.items():
    model.add_callback(cb_event, cb)

model.train(
    data="datasets/scc_cell_detection_test/data.yaml",
    epochs=20,
    patience=1,
    batch=8,
    imgsz=512,
    device="cuda:0",
    save_period=5,
    project="scc_cell_detection_test",
)

test_metrics = model.val(
                    data="datasets/scc_cell_detection_test/data.yaml",
                    split="test", 
                    conf=0.6, 
                    batch=8, 
                    imgsz=512, 
                    device="cuda:0",
                    save_json=True,
                    save_hybrid=True,
                    plots=True,
                    )

print(f"{test_metrics=}")
run.log({"test": test_metrics})

Ultralytics YOLOv8.2.22  Python-3.10.12 torch-2.1.2+cu121 CUDA:0 (NVIDIA GeForce RTX 2080 SUPER, 8192MiB)
[34m[1mengine\trainer: [0mtask=detect, mode=train, model=models/base/yolov8s.pt, data=datasets/scc_cell_detection_test/data.yaml, epochs=20, time=None, patience=1, batch=8, imgsz=512, save=True, save_period=5, cache=False, device=cuda:0, workers=8, project=scc_cell_detection_test, name=train9, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hybrid=False, conf=None, iou=0.7, max_det=300, half=False, dnn=False, plots=True, source=None, vid_stride=1, stream_buffer=False, visualize=False, augment=False, agnostic_nms=False, classes=None, retina_masks=False, embed=None, show=False, save_frames=False, save_txt=Fa

[34m[1mtrain: [0mScanning U:\03_Master\HAW\thesis-research\datasets\scc_cell_detection_test\train\labels.cache... 2500 images, 0 backgrounds, 0 corrupt: 100%|██████████| 2500/2500 [00:00<?, ?it/s]
[34m[1mval: [0mScanning U:\03_Master\HAW\thesis-research\datasets\scc_cell_detection_test\validation\labels.cache... 927 images, 0 backgrounds, 0 corrupt: 100%|██████████| 927/927 [00:00<?, ?it/s]


Plotting labels to scc_cell_detection_test\train9\labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m AdamW(lr=0.002, momentum=0.9) with parameter groups 57 weight(decay=0.0), 64 weight(decay=0.0005), 63 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added 
Image sizes 512 train, 512 val
Using 8 dataloader workers
Logging results to [1mscc_cell_detection_test\train9[0m
Starting training for 20 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       1/20         2G      1.561      1.199      1.193          5        512: 100%|██████████| 313/313 [00:32<00:00,  9.49it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 58/58 [00:06<00:00,  8.90it/s]

                   all        927       1114      0.953      0.868       0.89      0.586






      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


       2/20       1.9G      1.438     0.7211      1.208          1        512: 100%|██████████| 313/313 [00:30<00:00, 10.43it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 58/58 [00:06<00:00,  8.89it/s]

                   all        927       1114      0.948      0.835      0.879      0.539
[34m[1mEarlyStopping: [0mTraining stopped early as no improvement observed in last 1 epochs. Best results observed at epoch 1, best model saved as best.pt.
To update EarlyStopping(patience=1) pass a new patience value, i.e. `patience=300` or use `patience=0` to disable EarlyStopping.






2 epochs completed in 0.046 hours.
Optimizer stripped from scc_cell_detection_test\train9\weights\last.pt, 22.5MB
Optimizer stripped from scc_cell_detection_test\train9\weights\best.pt, 22.5MB

Validating scc_cell_detection_test\train9\weights\best.pt...
Ultralytics YOLOv8.2.22  Python-3.10.12 torch-2.1.2+cu121 CUDA:0 (NVIDIA GeForce RTX 2080 SUPER, 8192MiB)
Model summary (fused): 168 layers, 11125971 parameters, 0 gradients, 28.4 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 58/58 [00:06<00:00,  8.81it/s]


                   all        927       1114       0.95      0.866      0.888      0.586
Speed: 0.1ms preprocess, 2.0ms inference, 0.0ms loss, 1.0ms postprocess per image
Results saved to [1mscc_cell_detection_test\train9[0m


VBox(children=(Label(value='25.239 MB of 25.239 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
lr/pg0,▁█
lr/pg1,▁█
lr/pg2,▁█
metrics/mAP50(B),█▁
metrics/mAP50-95(B),█▁
metrics/precision(B),█▁
metrics/recall(B),█▁
model/GFLOPs,▁
model/parameters,▁
model/speed_PyTorch(ms),▁

0,1
lr/pg0,0.00127
lr/pg1,0.00127
lr/pg2,0.00127
metrics/mAP50(B),0.88843
metrics/mAP50-95(B),0.58601
metrics/precision(B),0.95047
metrics/recall(B),0.86625
model/GFLOPs,28.647
model/parameters,11135987.0
model/speed_PyTorch(ms),1.866


Ultralytics YOLOv8.2.22  Python-3.10.12 torch-2.1.2+cu121 CUDA:0 (NVIDIA GeForce RTX 2080 SUPER, 8192MiB)
Model summary (fused): 168 layers, 11125971 parameters, 0 gradients, 28.4 GFLOPs


[34m[1mval: [0mScanning U:\03_Master\HAW\thesis-research\datasets\scc_cell_detection_test\test\labels.cache... 600 images, 0 backgrounds, 0 corrupt: 100%|██████████| 600/600 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 75/75 [00:06<00:00, 11.19it/s]


                   all        600        725          1          1      0.995      0.995
Speed: 0.6ms preprocess, 3.9ms inference, 0.0ms loss, 2.3ms postprocess per image
Saving scc_cell_detection_test\train92\predictions.json...
Results saved to [1mscc_cell_detection_test\train92[0m
test_metrics=ultralytics.utils.metrics.DetMetrics object with attributes:

ap_class_index: array([0])
box: ultralytics.utils.metrics.Metric object
confusion_matrix: <ultralytics.utils.metrics.ConfusionMatrix object at 0x00000256C06FDA20>
curves: ['Precision-Recall(B)', 'F1-Confidence(B)', 'Precision-Confidence(B)', 'Recall-Confidence(B)']
curves_results: [[array([          0,    0.001001,    0.002002,    0.003003,    0.004004,    0.005005,    0.006006,    0.007007,    0.008008,    0.009009,     0.01001,    0.011011,    0.012012,    0.013013,    0.014014,    0.015015,    0.016016,    0.017017,    0.018018,    0.019019,     0.02002,    0.021021,    0.022022,    0.023023,
          0.024024,    0.025025,   

UsageError: Run (4d90gi55) is finished. The call to `log` will be ignored. Please make sure that you are using an active run.