# Mask R-CNN - Training Model

<i> Sébastien Ohleyer </i>

Training notebook.

Python 3

In [1]:
import os
import sys
import itertools
import math
import logging
import json
import re
import random
from collections import OrderedDict
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
import matplotlib.lines as lines
from matplotlib.patches import Polygon

import utils
import visualize
from visualize import display_images
import model as modellib
from model import log

%matplotlib inline
ROOT_DIR = os.getcwd()

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


## Configuration

In [9]:
import aerial
config = aerial.AerialConfig()

# Local
#AERIAL_DIR = "/Users/sebastienohleyer/Documents/ENS MVA/Object recognition/AerialImageDataset/"  # TODO: enter value here
#COCO_MODEL_PATH = "~/Document/ENS MVA/Object recognition/Mask_RCNN-coco/coco_weigths/mask_rcnn_coco.h5"
#MODEL_DIR = "../trained_model/"

# Floydhub
#AERIAL_DIR = "/"  # TODO: enter value here
#COCO_MODEL_PATH = "/coco_weights/mask_rcnn_aerial_0035.h5"
#MODEL_DIR = "/output/trained_model/"
# run : floyd run --data sohleyer/datasets/aerialimagedataset_train/1:/train --data sohleyer/datasets/coco_weights/1:/coco_weights --env tensorflow-1.3 --mode jupyter

# AWS
AERIAL_DIR = "/home/ubuntu/aerialimagedataset"  # TODO: enter value here
COCO_MODEL_PATH = "/home/ubuntu/mask_rcnn/trained_model/11_mask_rcnn_aerial_0010.h5"
MODEL_DIR = "/home/ubuntu/mask_rcnn/output/"
# run : floyd run --data sohleyer/datasets/aerialimagedataset_train/1:/train --data sohleyer/datasets/coco_weights/1:/coco_weights --env tensorflow-1.3 --mode jupyter

TOWN_LIST = ["austin", "chicago", "kitsap", "tyrol-w", "vienna"]
IMAGE_PER_TOWN = None
SUBIMAGE_LIST = [(2,3), (3,2)]

config.display()


Configurations:
BACKBONE_SHAPES                [[256 256]
 [128 128]
 [ 64  64]
 [ 32  32]
 [ 16  16]]
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     2
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.85
DETECTION_NMS_THRESHOLD        0.3
GPU_COUNT                      1
IMAGES_PER_GPU                 2
IMAGE_MAX_DIM                  1024
IMAGE_MIN_DIM                  800
IMAGE_PADDING                  True
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES               100
MEAN_PIXEL                     [123.7 116.8 103.9]
MINI_MASK_SHAPE                (128, 128)
NAME                           aerial
NUM_CLASSES                    2
POOL_SIZE                      7
POST_NMS_ROIS_INFERENCE        1000
POST_NMS_ROIS_TRAINING 

In [10]:
# Load dataset train
dataset_train = aerial.AerialDataset()
dataset_train.load_aerial(dataset_dir=AERIAL_DIR, subset="train", subimage_list=SUBIMAGE_LIST, town_list=TOWN_LIST, image_per_town=IMAGE_PER_TOWN)
dataset_train.prepare()

print("Image Count: {}".format(len(dataset_train.image_ids)))
print("Class Info: {}".format(dataset_train.class_info))

Image Count: 360
Class Info: [{'source': '', 'id': 0, 'name': 'BG'}, {'source': 'aerial', 'id': 1, 'name': 'building'}]


In [11]:
#[dataset_train.image_info[i]["image_name"] for i in dataset_train.image_ids]

In [12]:
# Load dataset val
dataset_val = aerial.AerialDataset()
dataset_val.load_aerial(dataset_dir=AERIAL_DIR, subset="val", subimage_list=SUBIMAGE_LIST, town_list=TOWN_LIST, image_per_town=IMAGE_PER_TOWN)
dataset_val.prepare()

print("Image Count: {}".format(len(dataset_val.image_ids)))
print("Class Info: {}".format(dataset_val.class_info))

Image Count: 310
Class Info: [{'source': '', 'id': 0, 'name': 'BG'}, {'source': 'aerial', 'id': 1, 'name': 'building'}]


In [13]:
#[dataset_val.image_info[i]["image_name"] for i in dataset_val.image_ids]

## Create model

In [14]:
# Create model in training mode
model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)

In [15]:
# Which weights to start with?
init_with = "last"  # imagenet, coco, or last

if init_with == "imagenet":
    model.load_weights(model.get_imagenet_weights(), by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    # See README for instructions to download the COCO weights
    model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    model.load_weights(COCO_MODEL_PATH, by_name=True)

In [16]:
COCO_MODEL_PATH

'/home/ubuntu/mask_rcnn/trained_model/11_mask_rcnn_aerial_0010.h5'

## Training

In [10]:
# Stage 1 : Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
#model.train(dataset_train, dataset_val, learning_rate=config.LEARNING_RATE, epochs=10, layers='heads')

In [11]:
# Stage 2
# Finetune layers from ResNet stage 4 and up
model.train(dataset_train, dataset_val, learning_rate=config.LEARNING_RATE/4, epochs=10, layers='4+')


Starting at epoch 0. LR=0.00025

Checkpoint Path: /home/ubuntu/mask_rcnn/output/aerial20180116T1815/mask_rcnn_aerial_{epoch:04d}.h5
Selecting layers to train
res4a_branch2a         (Conv2D)
bn4a_branch2a          (BatchNorm)
res4a_branch2b         (Conv2D)
bn4a_branch2b          (BatchNorm)
res4a_branch2c         (Conv2D)
res4a_branch1          (Conv2D)
bn4a_branch2c          (BatchNorm)
bn4a_branch1           (BatchNorm)
res4b_branch2a         (Conv2D)
bn4b_branch2a          (BatchNorm)
res4b_branch2b         (Conv2D)
bn4b_branch2b          (BatchNorm)
res4b_branch2c         (Conv2D)
bn4b_branch2c          (BatchNorm)
res4c_branch2a         (Conv2D)
bn4c_branch2a          (BatchNorm)
res4c_branch2b         (Conv2D)
bn4c_branch2b          (BatchNorm)
res4c_branch2c         (Conv2D)
bn4c_branch2c          (BatchNorm)
res4d_branch2a         (Conv2D)
bn4d_branch2a          (BatchNorm)
res4d_branch2b         (Conv2D)
bn4d_branch2b          (BatchNorm)
res4d_branch2c         (Conv2D)
bn4d_

  if issubdtype(ts, int):
  elif issubdtype(type(size), float):
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


bn4w_branch2c          (BatchNorm)
res5a_branch2a         (Conv2D)
bn5a_branch2a          (BatchNorm)
res5a_branch2b         (Conv2D)
bn5a_branch2b          (BatchNorm)
res5a_branch2c         (Conv2D)
res5a_branch1          (Conv2D)
bn5a_branch2c          (BatchNorm)
bn5a_branch1           (BatchNorm)
res5b_branch2a         (Conv2D)
bn5b_branch2a          (BatchNorm)
res5b_branch2b         (Conv2D)
bn5b_branch2b          (BatchNorm)
res5b_branch2c         (Conv2D)
bn5b_branch2c          (BatchNorm)
res5c_branch2a         (Conv2D)
bn5c_branch2a          (BatchNorm)
res5c_branch2b         (Conv2D)
bn5c_branch2b          (BatchNorm)
res5c_branch2c         (Conv2D)
bn5c_branch2c          (BatchNorm)
fpn_c5p5               (Conv2D)
fpn_c4p4               (Conv2D)
fpn_c3p3               (Conv2D)
fpn_c2p2               (Conv2D)
fpn_p5                 (Conv2D)
fpn_p2                 (Conv2D)
fpn_p3                 (Conv2D)
fpn_p4                 (Conv2D)
In model:  rpn_model
    rpn_conv_share



 34/180 [====>.........................] - ETA: 10:13 - loss: 1.6438 - rpn_class_loss: 0.1298 - rpn_bbox_loss: 0.5937 - mrcnn_class_loss: 0.2988 - mrcnn_bbox_loss: 0.2585 - mrcnn_mask_loss: 0.3629



Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [19]:
# Stage 3 : Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val, learning_rate=config.LEARNING_RATE / 20, epochs=20, layers="all")


Starting at epoch 10. LR=5e-05

Checkpoint Path: /home/ubuntu/mask_rcnn/output/aerial20180118T1557/mask_rcnn_aerial_{epoch:04d}.h5
Selecting layers to train
conv1                  (Conv2D)
bn_conv1               (BatchNorm)
res2a_branch2a         (Conv2D)
bn2a_branch2a          (BatchNorm)
res2a_branch2b         (Conv2D)
bn2a_branch2b          (BatchNorm)
res2a_branch2c         (Conv2D)
res2a_branch1          (Conv2D)
bn2a_branch2c          (BatchNorm)
bn2a_branch1           (BatchNorm)
res2b_branch2a         (Conv2D)
bn2b_branch2a          (BatchNorm)
res2b_branch2b         (Conv2D)
bn2b_branch2b          (BatchNorm)
res2b_branch2c         (Conv2D)
bn2b_branch2c          (BatchNorm)
res2c_branch2a         (Conv2D)
bn2c_branch2a          (BatchNorm)
res2c_branch2b         (Conv2D)
bn2c_branch2b          (BatchNorm)
res2c_branch2c         (Conv2D)
bn2c_branch2c          (BatchNorm)
res3a_branch2a         (Conv2D)
bn3a_branch2a          (BatchNorm)
res3a_branch2b         (Conv2D)
bn3a_b

  if issubdtype(ts, int):
  elif issubdtype(type(size), float):
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


Epoch 11/20
  3/180 [..............................] - ETA: 20:15 - loss: 1.7864 - rpn_class_loss: 0.0947 - rpn_bbox_loss: 0.9788 - mrcnn_class_loss: 0.2094 - mrcnn_bbox_loss: 0.1641 - mrcnn_mask_loss: 0.3394



 17/180 [=>............................] - ETA: 13:53 - loss: 1.4600 - rpn_class_loss: 0.1018 - rpn_bbox_loss: 0.5479 - mrcnn_class_loss: 0.3048 - mrcnn_bbox_loss: 0.1827 - mrcnn_mask_loss: 0.3228



Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
