# Introduction

This notebook presents **YOLOv2** applied to the [Blood Cell Images](https://www.kaggle.com/paultimothymooney/blood-cells) dataset.

**Resources**

* [BCCD Dataset](https://github.com/Shenggan/BCCD_Dataset) - alternative dataset source with links to original authors

# Imports

In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt

import xml.etree.ElementTree as ET

Limit TensorFlow GPU memory usage

In [2]:
import tensorflow as tf
gpu_options = tf.GPUOptions(allow_growth=True)  # init TF ...
config=tf.ConfigProto(gpu_options=gpu_options)  # w/o taking ...
with tf.Session(config=config): pass            # all GPU memory

# Configuration

In [3]:
dataset_location = '/home/marcin/Datasets/blood-cells/dataset-master/'

In [4]:
images_location = os.path.join(dataset_location, 'JPEGImages')
annotations_location = os.path.join(dataset_location, 'Annotations')

In [5]:
filename_list_xml = sorted(os.listdir(annotations_location))
display(filename_list_xml[:3])

['BloodImage_00000.xml', 'BloodImage_00001.xml', 'BloodImage_00002.xml']

In [17]:
class ImageWrapper:
    def __init__(self, filepath, width, height, depth):
        self.filepath = filepath
        self.width = width
        self.height = height
        self.depth = depth
        self.objects = []
    def __str__(self):
        return f'{self.filename}\nw:{self.width} h:{self.height} d:{self.depth}'

In [18]:
class ObjectWrapper:
    def __init__(self, name, xmin ,ymin, xmax, ymax):
        self.name = name
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
    def __str__(self):
        return f'{self.name} {self.xmin} {self.ymin} {self.xmax} {self.ymax}'

In [19]:
annot_list = []

for filename_xml in filename_list_xml:       # 'BloodImage_00000.xml'
    filepath_xml = os.path.join(             # '/../../BloodImage_00000.xml'
        annotations_location, filename_xml)  
    tree = ET.parse(filepath_xml)            # xml.etree.ElementTree.ElementTree
    
    filename = tree.find('./filename').text  # 'BloodImage_00000'
    w = tree.find('./size/width').text       # '640'
    h = tree.find('./size/height').text      # '480'
    d = tree.find('./size/depth').text       # '3'
    
    filepath_jpg = os.path.join(             # '/../../BloodImage_00000.jpg'
        images_location, filename+'.jpg')
    
    assert os.path.isfile(annot.filepath)
    
    iw = ImageWrapper(filepath=filepath_jpg, width=int(w),
                      height=int(h), depth=int(d))
    
    object_elemnts = tree.findall('./object')  # [xml.etree.ElementTree.ElementTree, ...]
    for obj_el in object_elemnts:
    
        name = obj_el.find('./name').text         # 'RBC'
        xmin = obj_el.find('./bndbox/xmin').text  # '233'
        ymin = obj_el.find('./bndbox/ymin').text  # '368'
        xmax = obj_el.find('./bndbox/xmax').text  # '338'
        ymax = obj_el.find('./bndbox/ymax').text  # '452'
        
        ow = ObjectWrapper(name=name, xmin=int(xmin), ymin=int(ymin),
                           xmax=int(xmax), ymax=int(ymax))
        
        iw.objects.append(ow)
        
    annot_list.append(iw)

In [15]:
for annot in annot_list:
    print(annot)
    for obj in annot.objects:
        print('  ', obj)
    break

/home/marcin/Datasets/blood-cells/dataset-master/JPEGImages/BloodImage_00000.jpg
w:640 h:480 d:3
   RBC 216 359 316 464
   RBC 77 326 177 431
   RBC 540 353 640 458
   RBC 405 350 513 457
   RBC 160 72 245 177
   RBC 5 335 90 440
   RBC 540 39 640 149
   RBC 383 1 504 113
   RBC 9 82 108 168
   RBC 68 212 165 346
   RBC 171 181 264 282


In [None]:

'''THIS IS NOT FULLY PORTED YET, JUST COPIED FROM CatKeypoints'''

class BloodSequence(tf.keras.utils.Sequence):
    def __init__(self, files_list, target_size, batch_size,
                 preprocess_images_function=None,
                 preprocess_keypts_function=None,
                 shuffle=False):
        assert isinstance(files_list, (list, tuple, np.ndarray))
        assert isinstance(target_size, int) and target_size > 0
        assert isinstance(batch_size, int) and batch_size > 0
        assert preprocess_images_function is None or callable(preprocess_images_function)
        assert preprocess_keypts_function is None or callable(preprocess_keypts_function)
        assert isinstance(shuffle, bool)
        
        self.files_list = np.array(files_list)  # for advanced indexing
        self.target_size = target_size
        self.batch_size = batch_size
        self.preprocess_images_function = preprocess_images_function
        self.preprocess_keypts_function = preprocess_keypts_function
        self.shuffle = shuffle
        self.on_epoch_end()
    
    
    def __len__(self):
        return int(np.ceil(len(self.files_list) / self.batch_size))
    
    
    def __getitem__(self, idx):
        batch_i = self.indices[idx*self.batch_size : (idx+1)*self.batch_size]
        batch_fl = self.files_list[batch_i]
        
        images_list, keypoints_list = [], []
        for file_path in batch_fl:
            _, _, image_new, keypoints_new = load_image_keypoints(
                file_path, file_path+'.cat', target_size=self.target_size)
            image_arr = np.array(image_new)
            images_list.append(image_arr)
            keypoints_list.append(keypoints_new)
        
        images_arr = np.array(images_list)
        keypoints_arr = np.array(keypoints_list)
        
        if self.preprocess_images_function is not None:
            images_arr = self.preprocess_images_function(images_arr)
        if self.preprocess_keypts_function is not None:
            keypoints_arr = self.preprocess_keypts_function(keypoints_arr)
        
        return images_arr, keypoints_arr

    
    def on_epoch_end(self):
        self.indices = np.arange(len(self.files_list))
        if self.shuffle:
            np.random.shuffle(self.indices)