In [1]:
# https://www.analyticsvidhya.com/blog/2018/11/implementation-faster-r-cnn-python-object-detection/?utm_source=blog&utm_medium=a-step-by-step-introduction-to-the-basic-object-detection-algorithms-part-1


In [2]:
import os
from os import listdir
import xml.etree.ElementTree as ET
import pandas as pd
import glob

In [3]:
# function to extract bounding boxes from an annotation file

def extract_boxes(filename):
    
    blood_frame = pd.DataFrame()
    
    # load and parse the file
    tree = ET.parse(filename)
    
    # get the root of the document
    root = tree.getroot()
    
    # extract each bounding box
    boxes = list()
    i_name = root[1].text    # since image name is in position [1] of .xml file - see .xml file
    width = int(root.find('.//size/width').text)
    height = int(root.find('.//size/height').text)
    
    
    for box in root.findall('.//object'):
        
        c_name = box.find('name').text
        
        xmin = int(box.find('.//bndbox/xmin').text)
        ymin = int(box.find('.//bndbox/ymin').text)
        xmax = int(box.find('.//bndbox/xmax').text)
        ymax = int(box.find('.//bndbox/ymax').text)
        coors = [i_name, c_name, xmin, xmax, ymin, ymax, width, height]
        boxes.append(coors)
        
    blood_frame = pd.DataFrame(boxes)
    
    return blood_frame

In [4]:
# Extract all info incl. boundary boxes from annotation file and creating a dataframe :

files = glob.glob('/home/scar3crow/Downloads/BCCD_Dataset-master/BCCD/Annotations/*.xml')

col_names = ['image_names', 'cell_type', 'xmin', 'xmax', 'ymin', 'ymax', 'width', 'height']

b_frame = pd.DataFrame()

for i in range(len(files)):
#    b_frame.append(extract_boxes(files[i]))
    b_frame = pd.concat([b_frame, extract_boxes(files[i])])
    
b_frame.columns = col_names

b_frame = b_frame.reset_index(drop = True)

In [5]:
print('Number of unique images = ', b_frame['image_names'].nunique())  # print total no, of unique images

print('Number of classes in diff. categories = ', b_frame['cell_type'].value_counts()) 

b_frame.tail(3)

Number of unique images =  364
Number of classes in diff. categories =  RBC          4155
WBC           372
Platelets     361
Name: cell_type, dtype: int64


Unnamed: 0,image_names,cell_type,xmin,xmax,ymin,ymax,width,height
4885,BloodImage_00248.jpg,WBC,149,354,233,423,640,480
4886,BloodImage_00147.jpg,Platelets,200,238,249,294,640,480
4887,BloodImage_00147.jpg,WBC,339,514,42,207,640,480


In [6]:
# making train_dataframe for images in train_images

train_df = pd.DataFrame()

train_image_list = list()
train_image_list = listdir('/home/scar3crow/Dropbox/WorkStation-Subrata/python/keras-frcnn/train_images')

train_df = b_frame.loc[b_frame['image_names'].isin(train_image_list)]

train_df = train_df.reset_index(drop=True)

train_df.tail(3)

Unnamed: 0,image_names,cell_type,xmin,xmax,ymin,ymax,width,height
3404,BloodImage_00248.jpg,WBC,149,354,233,423,640,480
3405,BloodImage_00147.jpg,Platelets,200,238,249,294,640,480
3406,BloodImage_00147.jpg,WBC,339,514,42,207,640,480


In [7]:
print('Number of images in train_images file = ', len(train_image_list))
print('Number of unique images in train dataframe = ', train_df['image_names'].nunique())

Number of images in train_images file =  254
Number of unique images in train dataframe =  254


In [8]:
data = pd.DataFrame()

data['format'] = train_df['image_names']

data.tail(3)

Unnamed: 0,format
3404,BloodImage_00248.jpg
3405,BloodImage_00147.jpg
3406,BloodImage_00147.jpg


In [9]:
pwd

'/home/scar3crow/Dropbox/WorkStation-Subrata/python'

In [10]:
cd keras-frcnn

/home/scar3crow/Dropbox/WorkStation-Subrata/python/keras-frcnn


In [11]:
# as the images are in train_images folder, add path of images before the image name

for i in range(data.shape[0]):
    data['format'][i] = '/home/scar3crow/Dropbox/WorkStation-Subrata/python/keras-frcnn/train_images/' + data['format'][i]
    
    
# add xmin, ymin, xmax, ymax and class as per the format required

for i in range(data.shape[0]):
    data['format'][i] = data['format'][i] + ',' + str(train_df['xmin'][i]) + ',' + str(train_df['ymin'][i]) + ',' + str(train_df['xmax'][i]) + ',' + str(train_df['ymax'][i]) + ',' + train_df['cell_type'][i]

data.to_csv('annotate_new.txt', header=None, index=None, sep=' ')


In [None]:
!python3 train_frcnn.py -o simple -p annotate_new.txt

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Parsing annotation files
Training images per class:
{'Platelets': 264, 'RBC': 2882, 'WBC': 261, 'bg': 0}
Num classes (including bg) = 4
Config has been written to config.pickle, and can be loaded when testing to ensure correct results
Num train samples 214
Num val samples 40
loading weights from resnet50_weights_tf_dim_ordering_tf_kernels.h5
Could not load pretrained model weights. Weights can be found in the keras application folder 		https://github.com/fchollet/keras/tree/master/keras/applications
Starting training
Epoch 1/200
2020-05-08 21:27:35.605698: I tensorflow/core/platform/cpu_feature_guard.cc:141] Your CPU supports instructions that this TensorFl

Mean number of bounding boxes from RPN overlapping ground truth boxes: 30.11
Classifier accuracy for bounding boxes from RPN: 0.8171875
Loss RPN classifier: 2.1103865864827607
Loss RPN regression: 0.10419854074716568
Loss Detector classifier: 0.4161063253507018
Loss Detector regression: 0.2513520134985447
Elapsed time: 2531.979500055313
Total loss decreased from 3.763545685857534 to 2.8820434660791725, saving weights
Epoch 9/200
Mean number of bounding boxes from RPN overlapping ground truth boxes: 28.01
Classifier accuracy for bounding boxes from RPN: 0.811875
Loss RPN classifier: 2.0636238936606968
Loss RPN regression: 0.12138618141412735
Loss Detector classifier: 0.4239970819652081
Loss Detector regression: 0.27092184767127037
Elapsed time: 2523.2708010673523
Total loss decreased from 2.8820434660791725 to 2.8799290047113026, saving weights
Epoch 10/200
Mean number of bounding boxes from RPN overlapping ground truth boxes: 31.19
Classifier accuracy for bounding boxes from RPN: 0.82


Mean number of bounding boxes from RPN overlapping ground truth boxes: 58.08
Classifier accuracy for bounding boxes from RPN: 0.81875
Loss RPN classifier: 1.227372633210689
Loss RPN regression: 0.10704187363386154
Loss Detector classifier: 0.3941270187497139
Loss Detector regression: 0.20527045298367738
Elapsed time: 2401.3843603134155
Total loss decreased from 2.09711310326099 to 1.9338119785779417, saving weights
Epoch 20/200
Mean number of bounding boxes from RPN overlapping ground truth boxes: 54.23
Classifier accuracy for bounding boxes from RPN: 0.815625
Loss RPN classifier: 1.420070722538851
Loss RPN regression: 0.11247278325259685
Loss Detector classifier: 0.40128643199801445
Loss Detector regression: 0.2053396685421467
Elapsed time: 2390.8946104049683
Epoch 21/200
Mean number of bounding boxes from RPN overlapping ground truth boxes: 54.8
Classifier accuracy for bounding boxes from RPN: 0.8059375
Loss RPN classifier: 1.1177865564302936
Loss RPN regression: 0.10277656706050038


Mean number of bounding boxes from RPN overlapping ground truth boxes: 57.71
Classifier accuracy for bounding boxes from RPN: 0.83
Loss RPN classifier: 0.9769971531948886
Loss RPN regression: 0.09050333252176643
Loss Detector classifier: 0.39242436081171034
Loss Detector regression: 0.17922499880194664
Elapsed time: 2383.281487226486
Total loss decreased from 1.7154683384431337 to 1.6391498453303122, saving weights
Epoch 31/200
Mean number of bounding boxes from RPN overlapping ground truth boxes: 57.57
Classifier accuracy for bounding boxes from RPN: 0.8259375
Loss RPN classifier: 1.044028130500028
Loss RPN regression: 0.09544778941199183
Loss Detector classifier: 0.3738933852314949
Loss Detector regression: 0.17463862534612418
Elapsed time: 2377.325515270233
Epoch 32/200
Mean number of bounding boxes from RPN overlapping ground truth boxes: 58.61
Classifier accuracy for bounding boxes from RPN: 0.8440625
Loss RPN classifier: 0.9741901048453141
Loss RPN regression: 0.08424406485632062