## Mask R-CNN - Inspect Trained Model

Code and visualizations to test, debug, and evaluate the Mask R-CNN model.

In [1]:
"""
Based on the work of Waleed Abdulla (Matterport)
Modified by github.com/GustavZ
"""

import os
import sys
import numpy as np
import tensorflow as tf

# Root directory of the project
ROOT_DIR = os.path.abspath("../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library

# Model  Directory 
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

## Configurations

In [2]:
# MS COCO Dataset
import coco
config = coco.CocoConfig()
COCO_DIR = os.path.join(ROOT_DIR,"data/coco")

Using TensorFlow backend.


In [3]:
# Override the training configurations with a few
# changes for inferencing.
class InferenceConfig(config.__class__):
    # Run detection on one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()
config.display()


Configurations:
BACKBONE                       mobilenetv1
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  512
IMAGE_META_SIZE                14
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [512 512   3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'mrcnn_mask_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'rpn_class_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES               100
MEAN_PIXEL                     [123.7 116.8 103.9]


## Notebook Preferences

In [4]:
# Device to load the neural network on.
# Useful if you're training a model on the same 
# machine, in which case use CPU and leave the
# GPU for training.
#DEVICE = "/cpu:0"
DEVICE = "/gpu:0"

# Inspect the model in training or inference modes
# values: 'inference' or 'training'
# TODO: code for 'training' test mode not ready yet
TEST_MODE = "inference"
#TEST_MODE = "training"

In [5]:
def get_ax(rows=1, cols=1, size=16):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Adjust the size attribute to control how big to render images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

## Load Model

In [8]:
# Local path to trained h5 weights file
MODEL_NAME = 'mask_rcnn_512_cocoperson_0396' # TODO: enter value here
H5_MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME+".h5") # TODO: enter value here

In [9]:
import mmrcnn.model as modellib

# Create model in inference mode
with tf.device(DEVICE):
    model = modellib.MaskRCNN(mode=TEST_MODE, model_dir=MODEL_DIR,config=config)

# Set path to model weights
weights_path = H5_MODEL_PATH
#weights_path = model.find_last()[1]

# Load weights
print("Loading weights ", weights_path)
model.load_weights(weights_path, by_name=True)

('Loading weights ', '/home/gustav/workspace/Mobile_Mask_RCNN/logs/mask_rcnn_512_cocoperson_0396.h5')


## Prepare and Export Model

In [10]:
# name of the pb file we want to output
MODEL_NAME = 'mask_rcnn_512_cocoperson_0396' # TODO: enter value here

# Chose whether to quantize the graph
QUANTIZE = True

In [11]:
from tensorflow.tools.graph_transforms import TransformGraph
from tensorflow.python.framework import graph_util
from tensorflow.python.framework import graph_io
from keras import backend as K

# Get keras model and save
model_keras= model.keras_model
# All new operations will be in test mode from now on.
K.set_learning_phase(0)

# Create output layer with customized names
num_output = 7
pred_node_names = ["detections", "mrcnn_class", "mrcnn_bbox", "mrcnn_mask", "rois", "rpn_class", "rpn_bbox"]
pred_node_names = ["output_" + name for name in pred_node_names]
pred = [tf.identity(model_keras.outputs[i], name = pred_node_names[i])for i in range(num_output)]

# Get the object detection graph
sess = K.get_session()
if QUANTIZE:
    # Transformations
    transforms = ["quantize_weights", "quantize_nodes"]
    transformed_graph_def = TransformGraph(sess.graph.as_graph_def(), [], pred_node_names, transforms)
    constant_graph = graph_util.convert_variables_to_constants(sess, transformed_graph_def, pred_node_names)
    PB_MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME+'.pb') 
else:
    constant_graph = graph_util.convert_variables_to_constants(sess, sess.graph.as_graph_def(), pred_node_names)
    PB_MODEL_PATH = os.path.join(MODEL_DIR, MODEL_NAME+'_quantized'+'.pb') 

INFO:tensorflow:Froze 143 variables.


INFO:tensorflow:Froze 143 variables.


Converted 143 variables to const ops.


In [12]:
# Write Output pb File
graph_io.write_graph(constant_graph, "/", PB_MODEL_PATH, as_text=False)

# Output Info
print('{} ops in the frozen graph.'.format(len(constant_graph.node)))
print('saved the freezed graph (ready for inference) at: ', PB_MODEL_PATH)

2377 ops in the frozen graph.
('saved the freezed graph (ready for inference) at: ', '/home/gustav/workspace/Mobile_Mask_RCNN/logs/mask_rcnn_512_cocoperson_0396.pb')


## Inference Model
Now, we can load the model from the pb file and then use it to infere

In [27]:
def mold_inputs(images):
        """Takes a list of images and modifies them to the format expected
        as an input to the neural network.
        images: List of image matricies [height,width,depth]. Images can have
            different sizes.

        Returns 3 Numpy matricies:
        molded_images: [N, h, w, 3]. Images resized and normalized.
        image_metas: [N, length of meta data]. Details about each image.
        windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the
            original image (padding excluded).
        """
        molded_images = []
        image_metas = []
        windows = []
        for image in images:
            # Resize image to fit the model expected size
            # TODO: move resizing to mold_image()
            molded_image, window, scale, padding = utils.resize_image(
                image,
                max_dim=config.IMAGE_MAX_DIM)
            print(image.shape)
            print('Image resized at: ', molded_image.shape)
            print(window)
            print(scale)
            """Takes RGB images with 0-255 values and subtraces
                   the mean pixel and converts it to float. Expects image
                   colors in RGB order."""
            molded_image = mold_image(molded_image, config)
            print('Image molded')
            #print(a)
            """Takes attributes of an image and puts them in one 1D array."""
            image_meta = compose_image_meta(
                0, image.shape, window,
                np.zeros([config.NUM_CLASSES], dtype=np.int32))
            print('Meta of image prepared')
            # Append
            molded_images.append(molded_image)
            windows.append(window)
            image_metas.append(image_meta)
        # Pack into arrays
        molded_images = np.stack(molded_images)
        image_metas = np.stack(image_metas)
        windows = np.stack(windows)
        return molded_images, image_metas, windows

def mold_image(images, config):
    return images.astype(np.float32) - config.MEAN_PIXEL

def compose_image_meta(image_id, image_shape, window, active_class_ids):
    """Takes attributes of an image and puts them in one 1D array.

    image_id: An int ID of the image. Useful for debugging.
    image_shape: [height, width, channels]
    window: (y1, x1, y2, x2) in pixels. The area of the image where the real
            image is (excluding the padding)
    active_class_ids: List of class_ids available in the dataset from which
        the image came. Useful if training on images from multiple datasets
        where not all classes are present in all datasets.
    """
    meta = np.array(
        [image_id] +            # size=1
        list(image_shape) +     # size=3
        list(window) +          # size=4 (y1, x1, y2, x2) in image cooredinates
        list(active_class_ids)  # size=num_classes
    )
    return meta

def unmold_detections(detections, mrcnn_mask, image_shape, window):
    """Reformats the detections of one image from the format of the neural
    network output to a format suitable for use in the rest of the
    application.

    detections: [N, (y1, x1, y2, x2, class_id, score)]
    mrcnn_mask: [N, height, width, num_classes]
    image_shape: [height, width, depth] Original size of the image before resizing
    window: [y1, x1, y2, x2] Box in the image where the real image is excluding the padding.

        Returns:
        boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels
        class_ids: [N] Integer class IDs for each bounding box
        scores: [N] Float probability scores of the class_id
        masks: [height, width, num_instances] Instance masks
        """
    # How many detections do we have?
    # Detections array is padded with zeros. Find the first class_id == 0.
    zero_ix = np.where(detections[:, 4] == 0)[0]
    N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0]
    print('Number of detections: ',N)
    print('Window: ',window)
    # Extract boxes, class_ids, scores, and class-specific masks
    boxes = detections[:N, :4]
    print('boxes',boxes.shape,' ',boxes)
    class_ids = detections[:N, 4].astype(np.int32)
    print('Class_ids: ',class_ids.shape,' ',class_ids)
    scores = detections[:N, 5]
    print('Scores: ',scores.shape,' ',scores)
    masks = mrcnn_mask[np.arange(N), :, :, class_ids]
    print('Masks: ',masks.shape)# masks)
    # Compute scale and shift to translate coordinates to image domain.
    print(image_shape[0])
    print(window[2] - window[0])
    h_scale = image_shape[0] / (window[2] - window[0])
    print('h_scale: ',h_scale)
    w_scale = image_shape[1] / (window[3] - window[1])
    print('w_scale: ',w_scale)
    scale = min(h_scale, w_scale)
    shift = window[:2]  # y, x
    print('shift: ',shift)
    scales = np.array([scale, scale, scale, scale])
    print('scales: ',scales)
    shifts = np.array([shift[0], shift[1], shift[0], shift[1]])
    print('shifts: ',shifts)
    # Translate bounding boxes to image domain
    boxes = np.multiply(boxes - shifts, scales).astype(np.int32)
    print('boxes: ',boxes.shape,' ',boxes)
    # Filter out detections with zero area. Often only happens in early
    # stages of training when the network weights are still a bit random.
    exclude_ix = np.where(
        (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0]
    if exclude_ix.shape[0] > 0:
        boxes = np.delete(boxes, exclude_ix, axis=0)
        class_ids = np.delete(class_ids, exclude_ix, axis=0)
        scores = np.delete(scores, exclude_ix, axis=0)
        masks = np.delete(masks, exclude_ix, axis=0)
        N = class_ids.shape[0]

    # Resize masks to original image size and set boundary threshold.
    full_masks = []
    for i in range(N):
        # Convert neural network mask to full size mask
        full_mask = utils.unmold_mask(masks[i], boxes[i], image_shape)
        full_masks.append(full_mask)
    full_masks = np.stack(full_masks, axis=-1)\
        if full_masks else np.empty((0,) + masks.shape[1:3])

    return boxes, class_ids, scores, full_masks

In [28]:
import cv2
from mmrcnn import utils
from mmrcnn import visualize

with tf.gfile.FastGFile(PB_MODEL_PATH, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        _ = tf.import_graph_def(graph_def, name='')
print('Graph loaded.')

testImage =os.path.join(ROOT_DIR,'ski.jpg') #image of the size defined in the config
image = cv2.cvtColor(cv2.imread(testImage), cv2.COLOR_BGR2RGB)

sess = tf.InteractiveSession()
molded_images, image_metas, windows = mold_inputs(image)
print(molded_images.shape)
print('Images meta: ',image_metas)
img_ph = sess.graph.get_tensor_by_name('input_image:0')
print(img_ph)
img_meta_ph = sess.graph.get_tensor_by_name('input_image_meta:0')
print(img_meta_ph)
detectionsT = sess.graph.get_tensor_by_name('output_detections:0')
print('Found ',detectionsT)
mrcnn_classT = sess.graph.get_tensor_by_name('output_mrcnn_class:0')
print('Found ',mrcnn_classT)
mrcnn_bboxT = sess.graph.get_tensor_by_name('output_mrcnn_bbox:0')
print('Found ', mrcnn_bboxT)
mrcnn_maskT = sess.graph.get_tensor_by_name('output_mrcnn_mask:0')
print('Found ', mrcnn_maskT)
roisT = sess.graph.get_tensor_by_name('output_rois:0')
print('Found ', roisT)
        
detections = sess.run(detectionsT, feed_dict={img_ph: molded_images, img_meta_ph: image_metas})
#print('Detections: ',detections[0].shape, detections[0])
mrcnn_class = sess.run(mrcnn_classT, feed_dict={img_ph: molded_images, img_meta_ph: image_metas})
#print('Classes: ',mrcnn_class[0].shape, mrcnn_class[0])
mrcnn_bbox = sess.run(mrcnn_bboxT, feed_dict={img_ph: molded_images, img_meta_ph: image_metas})
#print('BBoxes: ',mrcnn_bbox[0].shape, mrcnn_bbox[0])
mrcnn_mask = sess.run(mrcnn_maskT, feed_dict={img_ph: molded_images, img_meta_ph: image_metas})
#print('Masks: ',mrcnn_mask[0].shape )#, outputs1[0])
rois = sess.run(roisT, feed_dict={img_ph: molded_images, img_meta_ph: image_metas})
#print('Rois: ',rois[0].shape, rois[0])

results = []
for i, image in enumerate(images):
    print('Calculating results for image#',i)
    final_rois, final_class_ids, final_scores, final_masks =\
    unmold_detections(detections[i], mrcnn_mask[i],
                                    image.shape, windows[i])
    results.append({
        "rois": final_rois,
        "class_ids": final_class_ids,
        "scores": final_scores,
        "masks": final_masks,
    })
r = results[0]
#print(r)
print (r['scores'][0])
print (r['class_ids'][0])
print (r['rois'][0])
print (r['masks'][0].shape)

class_names = ["BG","Person"]
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], class_names, r['scores'], ax=get_ax())
print('Done')

Graph loaded.


ValueError: Unable to create correctly shaped tuple from [(0, 0), (255, 255), (0, 0)]