In [77]:
#David Alba
#Parsing functions in top cell, example usage below

In [73]:
import glob
import pandas as pd
import numpy as np
import xml.etree.ElementTree as ET

#get all data from folder
def get_raw_data_from_annotation(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'cell_type', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    for i in range(len(xml_df['filename'])):
        if '.jpg' not in xml_df.filename[i]:
            xml_df.filename[i] = xml_df.filename[i] + '.jpg'
    return xml_df

#get file names of images
def get_file_names_from_folder(path):
    xml_list = []
    previous_name = ""
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text)
            if previous_name != value:
                xml_list.append(value)
                previous_name = value
    return xml_list

# gets all data for one file
def get_raw_data_from_file(filename):
    xml_list = []
    tree = ET.parse(filename)
    root = tree.getroot()
    for member in root.findall('object'):
        value = (root.find('filename').text,
                int(root.find('size')[0].text),
                int(root.find('size')[1].text),
                member[0].text,
                int(member[4][0].text),
                int(member[4][1].text),
                int(member[4][2].text),
                int(member[4][3].text)
                )
        xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'cell_type', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    for i in range(len(xml_df['filename'])):
        if '.jpg' not in xml_df.filename[i]:
            xml_df.filename[i] = xml_df.filename[i] + '.jpg'
    return xml_df

#gets filename, labels, and bounding boxes
def get_parse(filepath):
    df = get_raw_data_from_file(filepath)
    cell_num = df.shape[0]
    filename = df['filename'][0]
    labels = []
    boxes = []
    for i in range(cell_num): 
        l = []
        b = []
        l.append(df['cell_type'][i])
        b.append(df['xmin'][i])
        b.append(df['ymin'][i])
        b.append(df['xmax'][i])
        b.append(df['ymax'][i])                
        labels.append(l)
        boxes.append(b)
        
    return filename, labels, boxes

def generator(batch_size, num_classes, annotation_folder):
    pattern_shape = [52, 26, 13]
    anchor_shape = [3, 3]
    while True:
        input = []
        ytrue = [np.zeros((batch_size, pattern_shape[l], pattern_shape[l], 3, 5 + num_classes))
                 for l in range(anchor_shape[0])]

        for i in range(batch_size):
            filename, labels, boxes = get_parse(annotation_folder[i])

        yield input, ytrue

    pass

In [57]:
#Modify data path for your system
datapath = "./BCCD_Dataset-master/BCCD/Annotations/"

In [61]:
#get all data from folder as Dataframe
df = get_raw_data_from_annotation(datapath)
print(df.shape)
df.head()

(4888, 8)


Unnamed: 0,filename,width,height,cell_type,xmin,ymin,xmax,ymax
0,BloodImage_00000.jpg,640,480,WBC,260,177,491,376
1,BloodImage_00000.jpg,640,480,RBC,78,336,184,435
2,BloodImage_00000.jpg,640,480,RBC,63,237,169,336
3,BloodImage_00000.jpg,640,480,RBC,214,362,320,461
4,BloodImage_00000.jpg,640,480,RBC,414,352,506,445


In [65]:
#get all data from one file 
xml_name = 'BloodImage_00001.xml'
df = get_raw_data_from_file(datapath + xml_name)
print(df.shape)
df.head()

(19, 8)


Unnamed: 0,filename,width,height,cell_type,xmin,ymin,xmax,ymax
0,BloodImage_00001.jpg,640,480,WBC,68,315,286,480
1,BloodImage_00001.jpg,640,480,RBC,346,361,446,454
2,BloodImage_00001.jpg,640,480,RBC,53,179,146,299
3,BloodImage_00001.jpg,640,480,RBC,449,400,536,480
4,BloodImage_00001.jpg,640,480,RBC,461,132,548,212


In [67]:
#gets all files in a folder
folder = glob.glob(datapath + '/*.xml')
folder

['./BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00000.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00001.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00002.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00003.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00004.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00005.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00006.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00007.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00008.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00009.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00010.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00011.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00012.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00013.xml',
 './BCCD_Dataset-master/BCCD/Annotations\\BloodImage_00014.xml',
 './BCCD_Dataset-master/B

In [48]:
namelist = get_file_names_from_folder(datapath)
namelist

['BloodImage_00000.jpg',
 'BloodImage_00001.jpg',
 'BloodImage_00002.jpg',
 'BloodImage_00003.jpg',
 'BloodImage_00004.jpg',
 'BloodImage_00005.jpg',
 'BloodImage_00006.jpg',
 'BloodImage_00007.jpg',
 'BloodImage_00008.jpg',
 'BloodImage_00009.jpg',
 'BloodImage_00010.jpg',
 'BloodImage_00011.jpg',
 'BloodImage_00012.jpg',
 'BloodImage_00013.jpg',
 'BloodImage_00014.jpg',
 'BloodImage_00015.jpg',
 'BloodImage_00016.jpg',
 'BloodImage_00017.jpg',
 'BloodImage_00018.jpg',
 'BloodImage_00019.jpg',
 'BloodImage_00020.jpg',
 'BloodImage_00021.jpg',
 'BloodImage_00022.jpg',
 'BloodImage_00023.jpg',
 'BloodImage_00024.jpg',
 'BloodImage_00026.jpg',
 'BloodImage_00028.jpg',
 'BloodImage_00029.jpg',
 'BloodImage_00030.jpg',
 'BloodImage_00031.jpg',
 'BloodImage_00032.jpg',
 'BloodImage_00033.jpg',
 'BloodImage_00034.jpg',
 'BloodImage_00035.jpg',
 'BloodImage_00036.jpg',
 'BloodImage_00037.jpg',
 'BloodImage_00038.jpg',
 'BloodImage_00039.jpg',
 'BloodImage_00040.jpg',
 'BloodImage_00041.jpg',


In [69]:
# Once folder is defined, each element can be passed into get_parse()
filename, labels, boxes = get_parse(folder[0])
filename

'BloodImage_00000.jpg'

In [70]:
labels

[['WBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC'],
 ['RBC']]

In [71]:
boxes

[[260, 177, 491, 376],
 [78, 336, 184, 435],
 [63, 237, 169, 336],
 [214, 362, 320, 461],
 [414, 352, 506, 445],
 [555, 356, 640, 455],
 [469, 412, 567, 480],
 [1, 333, 87, 437],
 [4, 406, 95, 480],
 [155, 74, 247, 174],
 [11, 84, 104, 162],
 [534, 39, 639, 139],
 [547, 195, 640, 295],
 [388, 11, 481, 111],
 [171, 175, 264, 275],
 [260, 1, 374, 83],
 [229, 91, 343, 174],
 [69, 144, 184, 235],
 [482, 131, 594, 230],
 [368, 89, 464, 176]]

In [72]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.layers import BatchNormalization, \
    LeakyReLU, Conv2D, ZeroPadding2D, Add, Concatenate, UpSampling2D
from tensorflow.keras.regularizers import l2


def conv(*args, **kwargs):
    new_kwargs = {'kernel_regularizer': l2(5e-4),
                  'padding': 'valid' if kwargs.get('strides') == (2, 2) else 'same'}
    new_kwargs.update(kwargs)

    return Conv2D(*args, **new_kwargs)


def CBL(x, *args, **kwargs):
    new_kwargs = {'use_bias': False}
    new_kwargs.update(kwargs)
    x = conv(*args, **new_kwargs)(x)
    x = BatchNormalization()(x)
    x = LeakyReLU(alpha=0.1)(x)

    return x


def PCBL(x, num_filters):
    x = ZeroPadding2D(((1, 0), (1, 0)))(x)
    x = CBL(x, num_filters, (3, 3), strides=(2, 2))

    return x


def CBLR(x, num_filters):
    y = CBL(x, num_filters, (1, 1))
    y = CBL(y, num_filters * 2, (3, 3))
    x = Add()([x, y])

    return x


def CBL5(x, num_filters):
    x = CBL(x, num_filters, (1, 1))
    x = CBL(x, num_filters * 2, (3, 3))
    x = CBL(x, num_filters, (1, 1))
    x = CBL(x, num_filters * 2, (3, 3))
    x = CBL(x, num_filters, (1, 1))

    return x


def CBLC(x, num_filters, out_filters):
    x = CBL(x, num_filters * 2, (3, 3))
    x = conv(out_filters, (1, 1))(x)

    return x


def CBLU(x, num_filters):
    x = CBL(x, num_filters, (1, 1))
    x = UpSampling2D(2)(x)

    return x

def body(inputs, num_anchors, num_classes):
    out = []
    x = CBL(inputs, 32, (3, 3))
    n = [1, 2, 8, 8, 4]
    for i in range(5):
        x = PCBL(x, 2 ** (6 + i))
        for _ in range(n[i]):
            x = CBLR(x, 2 ** (5 + i))

        if i in [2, 3, 4]:
            out.append(x)

    x1 = CBL5(out[2], 512)
    y1 = CBLC(x1, 512, num_anchors * (num_classes + 5))

    x = CBLU(x1, 256)
    x = Concatenate()([x, out[1]])

    x2 = CBL5(x, 256)
    y2 = CBLC(x2, 256, num_anchors * (num_classes + 5))

    x = CBLU(x2, 128)
    x = Concatenate()([x, out[0]])

    x3 = CBL5(x, 128)
    y3 = CBLC(x3, 128, num_anchors * (num_classes + 5))

    return [y1, y2, y3]


In [76]:
num_anchors = 3
num_classes = 5  # three types of cell

model_input = Input(shape=(416, 416, 3))
model_output = body(model_input, num_anchors, num_classes)
model = Model(model_input, model_output)

# model.summary()
# print('the num of layers ' + str(len(model.layers)))

batch_size = 100
num_classes = 3
annotation_folder = "./BCCD_Dataset-master/BCCD/Annotations/"
model.compile(optimizer=Adam(lr=1e-3), loss="MeanSquaredError")
model.fit_generator(generator(batch_size, num_classes, annotation_folder))

PermissionError: [Errno 13] Permission denied: '.'