In [40]:
!git clone https://github.com/rkuo2000/yolov5
%cd yolov5

fatal: destination path 'yolov5' already exists and is not an empty directory.
/content/yolov5/yolov5


In [41]:
# for storing labels (.txt) converted from annotation (.xml)
!mkdir -p Dataset/BCCD/Labels 

In [42]:
import os
import numpy as np
from pathlib import Path
from xml.dom.minidom import parse
from shutil import copyfile

In [43]:
FILE_ROOT = "/content/BCCD_Dataset-master/"

IMAGE_SET_ROOT = FILE_ROOT + "BCCD/ImageSets/Main"  
IMAGE_PATH = FILE_ROOT + "BCCD/JPEGImages"  
ANNOTATIONS_PATH = FILE_ROOT + "BCCD/Annotations"

DATA_ROOT = "Dataset/"
LABELS_ROOT = DATA_ROOT + "BCCD/Labels"

DEST_IMAGES_PATH = "images"
DEST_LABELS_PATH = "labels" 

In [44]:
classes = ['Platelets', 'RBC', 'WBC']

In [45]:
def cord_converter(size, box):
    """
    convert xml annotation to darknet format coordinates
    :param size： [w,h]
    :param box: anchor box coordinates [upper-left x,uppler-left y,lower-right x, lower-right y]
    :return: converted [x,y,w,h]
    """
    x1 = int(box[0])
    y1 = int(box[1])
    x2 = int(box[2])
    y2 = int(box[3])

    dw = np.float32(1. / int(size[0]))
    dh = np.float32(1. / int(size[1]))

    w = x2 - x1
    h = y2 - y1
    x = x1 + (w / 2)
    y = y1 + (h / 2)

    x = x * dw
    w = w * dw
    y = y * dh
    h = h * dh
    return [x, y, w, h]

def save_file(img_jpg_file_name, size, img_box):
    save_file_name = LABELS_ROOT + '/' + img_jpg_file_name + '.txt'
    print(save_file_name)
    file_path = open(save_file_name, "a+")
    for box in img_box:

        cls_num = classes.index(box[0]) # find class_id

        new_box = cord_converter(size, box[1:]) # convert box coord into YOLO x,y,w,h

        file_path.write(f"{cls_num} {new_box[0]} {new_box[1]} {new_box[2]} {new_box[3]}\n")

    file_path.flush()
    file_path.close()
    
def get_xml_data(file_path, img_xml_file):
    img_path = file_path + '/' + img_xml_file + '.xml'
    print(img_path)

    dom = parse(img_path)
    root = dom.documentElement
    img_name = root.getElementsByTagName("filename")[0].childNodes[0].data
    img_size = root.getElementsByTagName("size")[0]
    objects = root.getElementsByTagName("object")
    img_w = img_size.getElementsByTagName("width")[0].childNodes[0].data
    img_h = img_size.getElementsByTagName("height")[0].childNodes[0].data
    img_c = img_size.getElementsByTagName("depth")[0].childNodes[0].data
    # print("img_name:", img_name)
    # print("image_info:(w,h,c)", img_w, img_h, img_c)
    img_box = []
    for box in objects:
        cls_name = box.getElementsByTagName("name")[0].childNodes[0].data
        x1 = int(box.getElementsByTagName("xmin")[0].childNodes[0].data)
        y1 = int(box.getElementsByTagName("ymin")[0].childNodes[0].data)
        x2 = int(box.getElementsByTagName("xmax")[0].childNodes[0].data)
        y2 = int(box.getElementsByTagName("ymax")[0].childNodes[0].data)
        # print("box:(c,xmin,ymin,xmax,ymax)", cls_name, x1, y1, x2, y2)
        img_jpg_file_name = img_xml_file + '.jpg'
        img_box.append([cls_name, x1, y1, x2, y2])
    # print(img_box)

    # test_dataset_box_feature(img_jpg_file_name, img_box)
    save_file(img_xml_file, [img_w, img_h], img_box)

In [46]:
files = os.listdir(ANNOTATIONS_PATH)
for file in files:
    print("file name: ", file)
    file_xml = file.split(".")
    get_xml_data(ANNOTATIONS_PATH, file_xml[0])

file name:  BloodImage_00388.xml
/content/BCCD_Dataset-master/BCCD/Annotations/BloodImage_00388.xml
Dataset/BCCD/Labels/BloodImage_00388.txt
file name:  BloodImage_00222.xml
/content/BCCD_Dataset-master/BCCD/Annotations/BloodImage_00222.xml
Dataset/BCCD/Labels/BloodImage_00222.txt
file name:  BloodImage_00357.xml
/content/BCCD_Dataset-master/BCCD/Annotations/BloodImage_00357.xml
Dataset/BCCD/Labels/BloodImage_00357.txt
file name:  BloodImage_00271.xml
/content/BCCD_Dataset-master/BCCD/Annotations/BloodImage_00271.xml
Dataset/BCCD/Labels/BloodImage_00271.txt
file name:  BloodImage_00241.xml
/content/BCCD_Dataset-master/BCCD/Annotations/BloodImage_00241.xml
Dataset/BCCD/Labels/BloodImage_00241.txt
file name:  BloodImage_00236.xml
/content/BCCD_Dataset-master/BCCD/Annotations/BloodImage_00236.xml
Dataset/BCCD/Labels/BloodImage_00236.txt
file name:  BloodImage_00371.xml
/content/BCCD_Dataset-master/BCCD/Annotations/BloodImage_00371.xml
Dataset/BCCD/Labels/BloodImage_00371.txt
file name:  B

In [47]:
def copy_data(img_set_source, img_labels_root, imgs_source, type):
    file_name = img_set_source + '/' + type + ".txt"
    file = open(file_name)

    root_file = Path(DATA_ROOT + DEST_IMAGES_PATH + '/' + type)
    if not root_file.exists():
        print(f"Path {root_file} is not exit")
        os.makedirs(root_file)

    root_file = Path(DATA_ROOT + DEST_LABELS_PATH + '/' + type)
    if not root_file.exists():
        print(f"Path {root_file} is not exit")
        os.makedirs(root_file)

    for line in file.readlines():
        print(line)
        img_name = line.strip('\n')
        img_sor_file = imgs_source + '/' + img_name + '.jpg'          
        label_sor_file = img_labels_root + '/' + img_name + '.txt'

        # print(img_sor_file)
        # print(label_sor_file)
        # im = Image.open(rf"{img_sor_file}")
        # im.show()

        # Copy image
        DICT_DIR = DATA_ROOT + DEST_IMAGES_PATH + '/' + type
        img_dict_file = DICT_DIR + '/' + img_name + '.jpg'

        copyfile(img_sor_file, img_dict_file)

        # Copy label
        DICT_DIR = DATA_ROOT + DEST_LABELS_PATH + '/' + type
        img_dict_file = DICT_DIR + '/' + img_name + '.txt'
        copyfile(label_sor_file, img_dict_file)

In [48]:
copy_data(IMAGE_SET_ROOT, LABELS_ROOT, IMAGE_PATH, "train")
copy_data(IMAGE_SET_ROOT, LABELS_ROOT, IMAGE_PATH, "val")
copy_data(IMAGE_SET_ROOT, LABELS_ROOT, IMAGE_PATH, "test")

BloodImage_00001

BloodImage_00003

BloodImage_00004

BloodImage_00005

BloodImage_00006

BloodImage_00008

BloodImage_00009

BloodImage_00010

BloodImage_00012

BloodImage_00013

BloodImage_00020

BloodImage_00022

BloodImage_00023

BloodImage_00024

BloodImage_00026

BloodImage_00032

BloodImage_00034

BloodImage_00036

BloodImage_00038

BloodImage_00039

BloodImage_00040

BloodImage_00042

BloodImage_00043

BloodImage_00044

BloodImage_00045

BloodImage_00046

BloodImage_00047

BloodImage_00048

BloodImage_00049

BloodImage_00050

BloodImage_00052

BloodImage_00054

BloodImage_00056

BloodImage_00059

BloodImage_00069

BloodImage_00070

BloodImage_00071

BloodImage_00076

BloodImage_00078

BloodImage_00079

BloodImage_00081

BloodImage_00082

BloodImage_00083

BloodImage_00086

BloodImage_00087

BloodImage_00090

BloodImage_00091

BloodImage_00092

BloodImage_00094

BloodImage_00095

BloodImage_00097

BloodImage_00100

BloodImage_00101

BloodImage_00106

BloodImage_00107

BloodImage

In [49]:
print(len(os.listdir('Dataset/images/train')))
print(len(os.listdir('Dataset/images/val')))
print(len(os.listdir('Dataset/images/test')))

205
87
72


In [50]:
!echo "train: Dataset/images/train\n" > data/bccd.yaml
!echo "val:   Dataset/images/val\n" >> data/bccd.yaml
!echo "nc : 3\n" >> data/bccd.yaml
!echo "names: ['Platelets', 'RBC', 'WBC']\n" >> data/bccd.yaml

!cat data/bccd.yaml

train: Dataset/images/train\n
val:   Dataset/images/val\n
nc : 3\n
names: ['Platelets', 'RBC', 'WBC']\n


In [51]:
!python train.py --img 640 --batch 8 --epochs 300 --data data/bccd.yaml --cfg models/yolov5s.yaml

[34m[1mtrain: [0mweights=yolov5s.pt, cfg=models/yolov5s.yaml, data=data/bccd.yaml, hyp=data/hyps/hyp.scratch-low.yaml, epochs=300, batch_size=8, imgsz=640, rect=False, resume=False, nosave=False, noval=False, noautoanchor=False, noplots=False, evolve=None, bucket=, cache=None, image_weights=False, device=, multi_scale=False, single_cls=False, optimizer=SGD, sync_bn=False, workers=8, project=runs/train, name=exp, exist_ok=False, quad=False, cos_lr=False, label_smoothing=0.0, patience=100, freeze=[0], save_period=-1, seed=0, local_rank=-1, entity=None, upload_dataset=False, bbox_interval=-1, artifact_alias=latest
[34m[1mgithub: [0mup to date with https://github.com/ultralytics/yolov5 ✅
YOLOv5 🚀 v7.0-80-gc3c8e71 Python-3.8.10 torch-1.13.1+cu116 CUDA:0 (Tesla T4, 15110MiB)

[34m[1mhyperparameters: [0mlr0=0.01, lrf=0.01, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anc