#### Dataset
dog_dataset/images and annotations

Parse Annotation File(xml):  
Python provides the ElementTree API to load and parse an XML file
find() and findall() perform the XPath to extract data needed
- size: shape of the photo
- object: bounding box of the objects

In [1]:
from xml.etree import ElementTree

# function to extract bounding boxes from an annotation file
def extract_boxes(filename):
    # load and parse the file
    tree = ElementTree.parse(filename)
    # get the root of the document
    root = tree.getroot()
    # extract each bounding box
    boxes = list()
    for box in root.findall('.//bndbox'):
        xmin = int(box.find('xmin').text)
        ymin = int(box.find('ymin').text)
        xmax = int(box.find('xmax').text)
        ymax = int(box.find('ymax').text)
        coors = [xmin, ymin, xmax, ymax]
        boxes.append(coors)
    # extract image dimensions
    width = int(root.find('.//size/width').text)
    height = int(root.find('.//size/height').text)
    return boxes, width, height

In [2]:
#Test the function
annDir = './dataset/dog/annotations/xmls/'
(boxes, w, h) = extract_boxes(annDir+'dog_001.xml')

In [3]:
print(boxes, w, h)

[[151, 121, 577, 346]] 640 480


#### Dataset Object
    mask-rcnn datasets are managed by mrcnn.utils.Dataset object
    define a new class from mrcnn.utils.Dataset class and methods:
    
    load_dataset(self, dataset_dir, is_train)  
        defining the classes:
        self.add_class('dataset', 'class_id', 'class_name')
        define the "images info" dictionary in the datase:
        self.add_image('dataset', image_id, path, annotation)
        
    load_mask(image_id)
        image_id assigned by add_image(), with image info dictionary
        self.image_info[image_id]['annotation'] get the annotation file
        parse annotation file via extract_boxes()
        return an array of 'masks' in an image (mark 1 for object)
        
    load image reference
        return the path for a given image_id
        self.image_info[image_id]['path']

In [4]:
from os import listdir
from xml.etree import ElementTree
from numpy import zeros
from numpy import asarray
from mrcnn.utils import Dataset

class DogDataset(Dataset):
    # load dataset definitions and images
    def load_dataset(self, dataset_dir, is_train=True):
    #def load_dataset(self, dataset_dir):
        #define the class
        self.add_class("dataset", 1, "dog")
        #data locations
        img_dir = dataset_dir+'/images/'
        ann_dir = dataset_dir+'/annotations/xmls/'
        for filename in listdir(img_dir):
            image_id = filename[4:-4] # extract id of image dog_001.jpg
        
            # trainset: 1-40
            if is_train and int(image_id) > 40:
                continue
            # testset: 41-50
            if not is_train and int(image_id) <= 40:
                continue
        
            img_path = img_dir + filename
            ann_path = ann_dir + filename[:-4] + '.xml'
            # add image to dataset
            self.add_image('dataset', image_id = image_id, path=img_path, annotation=ann_path)
    
    # load the masks for an image    
    def load_mask(self, image_id):
        # get image info dictionary
        info = self.image_info[image_id]
        # get the annotation file
        ann_file = info['annotation']
        # parse ann file
        (boxes, w, h) = extract_boxes(ann_file)
        
        #create masks (each mask on a different channel)
        masks = zeros([h, w, len(boxes)], dtype='uint8')
        class_ids = list()
        for i in range(len(boxes)):
            box = boxes[i]
            ymin, ymax = box[1], box[3]
            xmin, xmax = box[0], box[2]
            masks[ymin:ymax, xmin:xmax, i] = 1
            class_ids.append(self.class_names.index('dog'))
        return masks, asarray(class_ids, dtype='int32')
    
    def image_reference(self, image_id):
        info = self.image_info[image_id]
        return info['path']

#### Split Train and Test

In [5]:
dataset_dir = './dataset/dog'
train = DogDataset()
train.load_dataset(dataset_dir, is_train=True)
train.prepare()

test = DogDataset()
test.load_dataset(dataset_dir, is_train=False)
test.prepare()

#### Test on one image
load_image(image_id): load an image via  
load_mask(image_id): load the mask for the image  
both arrays have the same dimension but different channels
mrcnn.utils.extract_bboxes(mask): extract bounding box via mask
mrcnn.visualize.display_instances(image,bbox,mask,class_id,class_names)

In [6]:
for image_id in train.image_ids:
    info = train.image_info[image_id]
    print(info)

{'id': '001', 'source': 'dataset', 'path': './dataset/dog/images/dog_001.jpg', 'annotation': './dataset/dog/annotations/xmls/dog_001.xml'}
{'id': '002', 'source': 'dataset', 'path': './dataset/dog/images/dog_002.jpg', 'annotation': './dataset/dog/annotations/xmls/dog_002.xml'}
{'id': '003', 'source': 'dataset', 'path': './dataset/dog/images/dog_003.jpg', 'annotation': './dataset/dog/annotations/xmls/dog_003.xml'}
{'id': '004', 'source': 'dataset', 'path': './dataset/dog/images/dog_004.jpg', 'annotation': './dataset/dog/annotations/xmls/dog_004.xml'}
{'id': '005', 'source': 'dataset', 'path': './dataset/dog/images/dog_005.jpg', 'annotation': './dataset/dog/annotations/xmls/dog_005.xml'}
{'id': '006', 'source': 'dataset', 'path': './dataset/dog/images/dog_006.jpg', 'annotation': './dataset/dog/annotations/xmls/dog_006.xml'}
{'id': '007', 'source': 'dataset', 'path': './dataset/dog/images/dog_007.jpg', 'annotation': './dataset/dog/annotations/xmls/dog_007.xml'}
{'id': '008', 'source': 'da

In [7]:
from mrcnn.visualize import display_instances
from mrcnn.utils import extract_bboxes

image_id = 0
image = train.load_image(image_id)
print(image.shape)
mask, class_ids = train.load_mask(image_id)
print(mask.shape)
bbox = extract_bboxes(mask)
display_instances(image, bbox, mask, class_ids, train.class_names)

(480, 640, 3)
(480, 640, 1)


<Figure size 1600x1600 with 1 Axes>

#### Training

In [8]:
from mrcnn.config import Config
from mrcnn.model import MaskRCNN

class DogConfig(Config):
    NAME = "dog_cfg"
    NUM_CLASSES = 1 + 1
    STEPS_PER_EPOCH = 10
    GPU_COUNT = 1
    IMAGES_PER_GPU = 10
    IMAGE_MAX_DIM = 640
    IMAGE_MIN_DIM = 0
    LEARNING_RATE = 0.01
    TRAIN_ROIS_PER_IMAGE = 2

config = DogConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     10
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 10
IMAGE_MAX_DIM                  640
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  0
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [640 640   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.01
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES 

Using TensorFlow backend.


In [None]:
model = MaskRCNN(mode='training', model_dir=dataset_dir, config=config)
#model.load_weights('./Mask_RCNN/mask_rcnn_coco.h5', by_name=True, exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", "mrcnn_bbox", "mrcnn_mask"])
# need keras v2.2.5 here
model.train(train, test, learning_rate=config.LEARNING_RATE, epochs=5, layers='heads')







Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
box_ind is deprecated, use box_indices instead

Starting at epoch 0. LR=0.01

Checkpoint Path: ./dataset/dog/dog_cfg20191128T1447/mask_rcnn_dog_cfg_{epoch:04d}.h5
Selecting layers to train
fpn_c5p5               (Conv2D)
fpn_c4p4               (Conv2D)
fpn_c3p3               (Conv2D)
fpn_c2p2               (Conv2D)
fpn_p5                 (Conv2D)
fpn_p2                 (Conv2D)
fpn_p3                 (Conv2D)
fpn_p4                 (Conv2D)
In model:  rpn_model
    rpn_conv_shared        (Conv2D)
    rpn_class_raw          (Conv2D)
    rpn_bbox_pred          (Conv2D)
mrcnn_mask_conv1       (TimeDistributed)
mrcnn_mask_bn1         (TimeDistributed)
mrcnn_mask_conv2       (TimeDistributed)
mrcnn_mask_bn2         (TimeDistributed)
mrcnn_class_conv1      (TimeDistributed)
mrcnn_class_bn1        (TimeDistributed)
mrcnn_mask_conv3       (TimeDistributed)
mrcnn_mask

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "
















Epoch 1/5


