# Mask R-CNN - Train on NewShapes Dataset

### Notes from implementation

This notebook shows how to train Mask R-CNN on your own dataset. To keep things simple we use a synthetic dataset of shapes (squares, triangles, and circles) which enables fast training. You'd still need a GPU, though, because the network backbone is a Resnet101, which would be too slow to train on a CPU. On a GPU, you can start to get okay-ish results in a few minutes, and good results in less than an hour.

The code of the *Shapes* dataset is included below. It generates images on the fly, so it doesn't require downloading any data. And it can generate images of any size, so we pick a small image size to train faster. 


In [3]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:95% !important; }</style>"))

%matplotlib inline
%load_ext autoreload
%autoreload 2
import os
import sys
import random
import math
import re
import  gc
import time
import numpy as np
import cv2
import matplotlib
import matplotlib.pyplot as plt
import tensorflow as tf
import keras
import pprint
import keras.backend as KB
sys.path.append('../')

import mrcnn.model      as modellib
import mrcnn.visualize  as visualize
# import mrcnn.shapes     as shapes

import mrcnn.new_shapes as new_shapes
from mrcnn.model       import log
from mrcnn.dataset     import Dataset 

from mrcnn.utils       import stack_tensors, stack_tensors_3d
from mrcnn.datagen     import data_generator, load_image_gt
from mrcnn.callbacks   import get_layer_output_1,get_layer_output_2
from mrcnn.visualize   import plot_gaussian
# from mrcnn.pc_layer    import PCTensor
# from mrcnn.pc_layer   import PCNLayer

# Root directory of the project
ROOT_DIR = os.getcwd()
MODEL_PATH = 'E:\Models'
# Directory to save logs and trained model
MODEL_DIR = os.path.join(MODEL_PATH, "mrcnn_logs")
# Path to COCO trained weights
COCO_MODEL_PATH   = os.path.join(MODEL_PATH, "mask_rcnn_coco.h5")
RESNET_MODEL_PATH = os.path.join(MODEL_PATH, "resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5")

print("Tensorflow Version: {}   Keras Version : {} ".format(tf.__version__,keras.__version__))
pp = pprint.PrettyPrinter(indent=2, width=100)
np.set_printoptions(linewidth=100,precision=4)
 

# Build configuration object -----------------------------------------------
config = new_shapes.NewShapesConfig()
config.BATCH_SIZE      = 5                  # Batch size is 2 (# GPUs * images/GPU).
config.IMAGES_PER_GPU  = 5                  # Must match BATCH_SIZE
config.STEPS_PER_EPOCH = 4
config.FCN_INPUT_SHAPE = config.IMAGE_SHAPE[0:2]

# Build shape dataset        -----------------------------------------------
# Training dataset
# generate 500 shapes 
dataset_train = new_shapes.NewShapesDataset()
dataset_train.load_shapes(3000, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_train.prepare()

# Validation dataset
dataset_val = new_shapes.NewShapesDataset()
dataset_val.load_shapes(500, config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1])
dataset_val.prepare()

# config.LAST_EPOCH_RAN  = 5686  # <---- if we want to continue training from a previously run

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Tensorflow Version: 1.6.0   Keras Version : 2.1.4 
 Initialize config object - super
(56, 56)
 Shapes Per Image:  7
 Add image --->  0
    ===> Image Id : ( 21 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('cloud', (153, 85, 212), (34, 24, 23, 7)),
        ('cloud', (104, 110, 87), (66, 27, 29, 5)),
        ('building', (125, 42, 25), (31, 50, 11, 13)),
        ('person', (174, 137, 188), (63, 81, 3, 18)),
        ('building', (215, 61, 229), (61, 85, 20, 30)),
        ('person', (80, 240, 65), (49, 92, 4, 21))]
    ------ shapes after removal of totally hidden shapes ------
[       ('cloud', (153, 85, 212), (34, 24, 23, 7)),
        ('cloud', (104, 110, 87), (66, 27, 29, 5)),
        ('building', (125, 42, 25), (31, 50, 11, 13)),
        ('building', (215, 61, 229), (61, 85, 20, 30)),
        ('person', (80, 240, 65), (49, 92, 4, 21))]
    Number of shapes now is :  5
 

 Add image --->  425
    ===> Image Id : ( 427 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('sun', (13, 230, 171), (14, 11, 5, 5)),
        ('cloud', (248, 212, 105), (114, 29, 33, 8)),
        ('tree', (139, 3, 148), (70, 54, 10, 10)),
        ('building', (106, 94, 62), (62, 60, 14, 18)),
        ('person', (62, 76, 152), (75, 64, 3, 15)),
        ('person', (202, 74, 140), (102, 101, 4, 23))]
    ------ shapes after removal of totally hidden shapes ------
[       ('sun', (13, 230, 171), (14, 11, 5, 5)),
        ('cloud', (248, 212, 105), (114, 29, 33, 8)),
        ('building', (106, 94, 62), (62, 60, 14, 18)),
        ('person', (62, 76, 152), (75, 64, 3, 15)),
        ('person', (202, 74, 140), (102, 101, 4, 23))]
    Number of shapes now is :  5
 Add image --->  450
 Add image --->  475
    ===> Image Id : ( 484 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('cloud', (126, 76, 20), (45, 21, 17, 5)),
        ('building', (24

 Add image --->  675
 Add image --->  700
    ===> Image Id : ( 710 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('sun', (238, 58, 163), (119, 23, 9, 9)),
        ('cloud', (134, 253, 175), (79, 26, 27, 5)),
        ('cloud', (238, 101, 207), (25, 28, 31, 6)),
        ('tree', (253, 255, 147), (60, 57, 11, 11)),
        ('building', (98, 76, 97), (55, 62, 14, 19)),
        ('car', (137, 156, 52), (51, 95, 25, 12))]
    ------ shapes after removal of totally hidden shapes ------
[       ('sun', (238, 58, 163), (119, 23, 9, 9)),
        ('cloud', (134, 253, 175), (79, 26, 27, 5)),
        ('cloud', (238, 101, 207), (25, 28, 31, 6)),
        ('building', (98, 76, 97), (55, 62, 14, 19)),
        ('car', (137, 156, 52), (51, 95, 25, 12))]
    Number of shapes now is :  5
 Add image --->  725
    ===> Image Id : ( 734 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('sun', (114, 43, 166), (85, 11, 5, 5)),
        ('tree', (62, 215, 7), (

    ===> Image Id : ( 1059 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('building', (173, 109, 158), (24, 43, 10, 10)),
        ('building', (46, 160, 131), (28, 52, 12, 14)),
        ('tree', (107, 198, 171), (85, 65, 13, 13)),
        ('building', (166, 19, 58), (91, 78, 18, 26)),
        ('building', (61, 226, 146), (98, 81, 19, 28))]
    ------ shapes after removal of totally hidden shapes ------
[       ('building', (173, 109, 158), (24, 43, 10, 10)),
        ('building', (46, 160, 131), (28, 52, 12, 14)),
        ('building', (166, 19, 58), (91, 78, 18, 26)),
        ('building', (61, 226, 146), (98, 81, 19, 28))]
    Number of shapes now is :  4
 Add image --->  1075
 Add image --->  1100
 Add image --->  1125
 Add image --->  1150
    ===> Image Id : ( 1166 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('cloud', (80, 26, 206), (83, 22, 19, 6)),
        ('sun', (100, 237, 46), (57, 25, 10, 10)),
        ('building', (66, 

    ===> Image Id : ( 1514 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('sun', (154, 43, 63), (82, 22, 9, 9)),
        ('tree', (14, 144, 29), (103, 56, 11, 11)),
        ('building', (184, 10, 90), (99, 56, 13, 16)),
        ('person', (155, 54, 12), (29, 64, 3, 15))]
    ------ shapes after removal of totally hidden shapes ------
[       ('sun', (154, 43, 63), (82, 22, 9, 9)),
        ('building', (184, 10, 90), (99, 56, 13, 16)),
        ('person', (155, 54, 12), (29, 64, 3, 15))]
    Number of shapes now is :  3
 Add image --->  1525
 Add image --->  1550
    ===> Image Id : ( 1555 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('cloud', (251, 2, 106), (103, 28, 31, 7)),
        ('tree', (117, 254, 206), (83, 62, 12, 12)),
        ('building', (168, 81, 191), (84, 72, 16, 23)),
        ('car', (205, 252, 205), (88, 107, 30, 15))]
    ------ shapes after removal of totally hidden shapes ------
[       ('cloud', (251, 2, 106), 

 Add image --->  1950
 Add image --->  1975
 Add image --->  2000
    ===> Image Id : ( 2000 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('cloud', (232, 12, 146), (87, 26, 27, 5)),
        ('cloud', (73, 191, 29), (88, 29, 33, 11)),
        ('person', (122, 19, 38), (67, 81, 3, 18)),
        ('person', (112, 115, 233), (33, 97, 4, 22))]
    ------ shapes after removal of totally hidden shapes ------
[       ('cloud', (73, 191, 29), (88, 29, 33, 11)),
        ('person', (122, 19, 38), (67, 81, 3, 18)),
        ('person', (112, 115, 233), (33, 97, 4, 22))]
    Number of shapes now is :  3
 Add image --->  2025
    ===> Image Id : ( 2026 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('cloud', (219, 206, 128), (108, 23, 21, 7)),
        ('cloud', (199, 9, 19), (69, 23, 21, 5)),
        ('tree', (131, 78, 173), (33, 60, 12, 12)),
        ('building', (86, 184, 99), (36, 61, 14, 18)),
        ('person', (215, 163, 238), (102, 76, 3, 1

    ===> Image Id : ( 2397 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('sun', (22, 207, 133), (30, 18, 7, 7)),
        ('cloud', (205, 210, 184), (15, 30, 35, 8)),
        ('cloud', (0, 126, 82), (25, 31, 37, 12)),
        ('tree', (143, 63, 230), (99, 58, 11, 11)),
        ('car', (1, 170, 16), (91, 75, 18, 9)),
        ('car', (6, 151, 198), (26, 100, 27, 13))]
    ------ shapes after removal of totally hidden shapes ------
[       ('sun', (22, 207, 133), (30, 18, 7, 7)),
        ('cloud', (0, 126, 82), (25, 31, 37, 12)),
        ('tree', (143, 63, 230), (99, 58, 11, 11)),
        ('car', (1, 170, 16), (91, 75, 18, 9)),
        ('car', (6, 151, 198), (26, 100, 27, 13))]
    Number of shapes now is :  5
 Add image --->  2400
 Add image --->  2425
 Add image --->  2450
 Add image --->  2475
 Add image --->  2500
    ===> Image Id : ( 2524 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('sun', (57, 189, 49), (113, 7, 4, 4)),
    

        ('cloud', (54, 44, 137), (74, 28, 31, 7)),
        ('cloud', (245, 132, 44), (78, 28, 31, 7)),
        ('building', (175, 178, 158), (72, 81, 19, 28)),
        ('person', (11, 36, 9), (98, 88, 4, 20)),
        ('person', (103, 69, 21), (68, 90, 4, 21))]
    Number of shapes now is :  6
 Add image --->  75
 Add image --->  100
 Add image --->  125
    ===> Image Id : ( 125 )   ---- Zero Mask Encountered 
    ------ Original Shapes ------
[       ('sun', (232, 228, 88), (70, 11, 5, 5)),
        ('tree', (193, 156, 177), (59, 44, 8, 8)),
        ('building', (36, 29, 16), (71, 45, 10, 11)),
        ('building', (154, 78, 43), (50, 54, 12, 15)),
        ('person', (74, 250, 1), (96, 74, 3, 17)),
        ('person', (151, 157, 165), (60, 77, 3, 18)),
        ('building', (109, 104, 78), (63, 85, 20, 30))]
    ------ shapes after removal of totally hidden shapes ------
[       ('sun', (232, 228, 88), (70, 11, 5, 5)),
        ('tree', (193, 156, 177), (59, 44, 8, 8)),
        ('buildin

In [None]:
config.display() 

In [None]:
# Load and display random samples
image_ids = np.random.choice(dataset_train.image_ids, 4)
for image_id in image_ids:
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names, limit=6)

In [4]:
try :
    del model, train_generator, val_generator, mm
    gc.collect()
except: 
    pass
KB.clear_session()

# Load and display random samples
# image_ids = np.random.choice(dataset_train.image_ids, 3)
# for image_id in [3]:
#     image = dataset_train.load_image(image_id)
#     mask, class_ids = dataset_train.load_mask(image_id)
#     visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)

## Create Model

In [5]:
model = modellib.MaskRCNN(mode="training", config=config, model_dir=MODEL_DIR)

    set_log_dir: Checkpoint path set to : E:\Models\mrcnn_logs\shapes20180510T1452\mask_rcnn_shapes_{epoch:04d}.h5
    set_log_dir: self.epoch set to 0 

>>> Resnet Graph 
     Input_image shape : (?, 128, 128, 3)
     After ZeroPadding2D  : (?, 134, 134, 3) (?, 134, 134, 3)
     After Conv2D padding : (?, 64, 64, 64) (?, 64, 64, 64)
     After BatchNorm      : (?, 64, 64, 64) (?, 64, 64, 64)
     After MaxPooling2D   : (?, 32, 32, 64) (?, 32, 32, 64)

>>> Feature Pyramid Network (FPN) Graph 
     FPN P2 shape : (None, 32, 32, 256)
     FPN P3 shape : (None, 16, 16, 256)
     FPN P4 shape : (None, 8, 8, 256)
     FPN P5 shape : (None, 4, 4, 256)
     FPN P6 shape : (None, 2, 2, 256)

>>> RPN Layer 
     Input_feature_map shape : (?, ?, ?, 256)
     anchors_per_location    : 3
     depth                   : 256
     Input_feature_map shape : (?, ?, ?, 256)
     anchors_per_location    : 3
     anchor_stride           : 1

>>> RPN Outputs  <class 'list'>
      rpn_class_logits/concat:0
 


>>> mrcnn_bbox_loss_graph 
    target_class_ids  size : (5, ?)
    pred_bbox size         : (?, 32, 7, 4)
    target_bbox size       : (5, ?, ?)

>>> mrcnn_bbox_loss_graph 
    target_class_ids  size : (?, 1)
    pred_bbox size         : (?, 32, 7, 4)
    target_bbox size       : (?, 32, 4)

>>> mrcnn_mask_loss_graph 
    target_class_ids shape : (5, ?)
    target_masks     shape : (5, ?, ?, ?)
    pred_masks       shape : (?, 32, 28, 28, 7)
    target_class_ids shape : (?,)
    target_shape       shape : (4,)
    target_masks     shape : (?, ?, ?)
    pred_shape       shape : (5,)
    pred_masks       shape : (?, ?, ?, ?)
     y_true shape: (?, ?, ?)
     y_pred shape: (?, ?, ?)
     final loss shape: (1, 1) <class 'tensorflow.python.framework.ops.Tensor'> False

>>> mrcnn_mask_loss_graph 
    target_class_ids shape : (?, 1)
    target_masks     shape : (?, 32, 28, 28)
    pred_masks       shape : (?, 32, 28, 28, 7)
    target_class_ids shape : (?,)
    target_shape       shape : (4,

In [6]:
print('MODEL_PATH        : ', MODEL_PATH)
print('COCO_MODEL_PATH   : ', COCO_MODEL_PATH)
print('RESNET_MODEL_PATH : ', RESNET_MODEL_PATH)
print('MODEL_DIR         : ', MODEL_DIR)
print('Last Saved Model  : ', model.find_last())

MODEL_PATH        :  E:\Models
COCO_MODEL_PATH   :  E:\Models\mask_rcnn_coco.h5
RESNET_MODEL_PATH :  E:\Models\resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
MODEL_DIR         :  E:\Models\mrcnn_logs
Last Saved Model  :  ('E:\\Models\\mrcnn_logs\\shapes20180509T1928', 'E:\\Models\\mrcnn_logs\\shapes20180509T1928\\mask_rcnn_shapes_2500.h5')


###  Print some model information 

In [None]:
model.config.display()

In [None]:
print('\n Outputs: ') 
pp.pprint(model.keras_model.outputs)
# print('\n Losses (model.metrics_names): ') 
# pp.pprint(model.get_deduped_metrics_names())
# model.keras_model.summary(line_length = 150) 

In [None]:
# model.compile_only(learning_rate=config.LEARNING_RATE, layers='all')
# sys.setrecursionlimit(5000)
# tst = model.keras_model.to_json()
# save_model(MODEL_DIR, 'my_saved_model')
#model.keras_model.summary(line_length = 120) 
# model.compile_only(learning_rate=config.LEARNING_RATE, layers='heads')
# KB.set_learning_phase(1)

### Load weights file

In [None]:
# Which weights to start with?
init_with = "coco"  # imagenet, coco, or last

if init_with == "imagenet":
#     loc=model.load_weights(model.get_imagenet_weights(), by_name=True)
    loc=model.load_weights(RESNET_MODEL_PATH, by_name=True)
elif init_with == "coco":
    # Load weights trained on MS COCO, but skip layers that
    # are different due to the different number of classes
    
    # See README for instructions to download the COCO weights
    loc=model.load_weights(COCO_MODEL_PATH, by_name=True,
                       exclude=["mrcnn_class_logits", "mrcnn_bbox_fc", 
                                "mrcnn_bbox", "mrcnn_mask"])
elif init_with == "last":
    # Load the last model you trained and continue training
    loc= model.load_weights(model.find_last()[1], by_name=True)
print('Load weights complete')

## Training

Train in two stages:
1. Only the heads. Here we're freezing all the backbone layers and training only the randomly initialized layers (i.e. the ones that we didn't use pre-trained weights from MS COCO). To train only the head layers, pass `layers='heads'` to the `train()` function.

    - #### Or now we can pass a list of layers we want to train in layers !
2. Fine-tune all layers. For this simple example it's not necessary, but we're including it to show the process. Simply pass `layers="all` to train all layers.

## Training head using  Keras.model.fit_generator()

In [None]:
# Train the head branches
# Passing layers="heads" freezes all layers except the head
# layers. You can also pass a regular expression to select
# which layers to train by name pattern.
# Wed 09-05-2018
# train_layers = ['mrcnn', 'fpn','rpn']
# loss_names   = [  "rpn_class_loss", "rpn_bbox_loss" , "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"]
train_layers = ['mrcnn', 'fpn','rpn']
loss_names   = [  "rpn_class_loss", "rpn_bbox_loss" , "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"]


model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE, 
            epochs = 2500,
#             epochs_to_run =2, 
            layers = train_layers,
            losses= loss_names
            )

## - Training heads using train_in_batches ()

We need to use this method for the time being as the fit generator does not have provide EASY access to the output in Keras call backs. By training in batches, we pass a batch through the network, pick up the generated RoI detections and bounding boxes and generate our semantic / gaussian tensors ...


In [None]:
model.train_in_batches(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE/6, 
            epochs_to_run = 3,
            layers='heads')

## Fine Tuning
Fine tune all layers

In [None]:
# Fine tune all layers
# Passing layers="all" trains all layers. You can also 
# pass a regular expression to select which layers to
# train by name pattern.
model.train(dataset_train, dataset_val, 
            learning_rate=config.LEARNING_RATE / 10,
            epochs=211,
            layers="all")

## Save 

In [None]:
# Save weights
# Typically not needed because callbacks save after every epoch
# Uncomment to save manually
model_path = os.path.join(MODEL_DIR, "mask_rcnn_shapes_2500.h5")
model.keras_model.save_weights(model_path)

###  Define Data Generators, get next shapes from generator and display loaded shapes

### Define Data Generator

In [None]:
train_generator = data_generator(dataset_train, model.config, shuffle=True,
                                 batch_size=model.config.BATCH_SIZE,
                                 augment = False)
val_generator = data_generator(dataset_val, model.config, shuffle=True, 
                                batch_size=model.config.BATCH_SIZE,
                                augment=False)

### Get next shapes from generator and display loaded shapes

In [None]:
train_batch_x, train_batch_y = next(train_generator)

In [None]:
# train_batch_x, train_batch_y = next(train_generator)
imgmeta_idx = model.keras_model.input_names.index('input_image_meta')
img_meta    = train_batch_x[imgmeta_idx]

for img_idx in range(config.BATCH_SIZE):
    image_id = img_meta[img_idx,0]
    image = dataset_train.load_image(image_id)
    mask, class_ids = dataset_train.load_mask(image_id)
    print('Image id: ',image_id)
    print('Image meta', img_meta[img_idx])
    print('Classes (1: circle, 2: square, 3: triangle ): ',class_ids)
    visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)



### Push Data thru model using get_layer_output()

In [None]:

layers_out = get_layer_output_2(model.keras_model, train_batch_x, 1)


In [None]:
input_gt_class_ids = train_batch_x[4]

target_class_ids = layers_out[5]
mrcnn_class_logits = layers_out[9]
rpn_class_loss   = layers_out[13]
rpn_bbox_loss    = layers_out[14]
mrcnn_class_loss = layers_out[15]
mrcnn_bbox_loss  = layers_out[16]
mrcnn_mask_loss  = layers_out[17]
active_class_ids = layers_out[20]
# pred_masks = tf.identity(layers_out[18])
# gt_masks   = tf.identity(layers_out[19])

# shape = KB.int_shape(pred_masks)

In [None]:
print(rpn_class_loss, rpn_bbox_loss)
print(mrcnn_class_loss, mrcnn_bbox_loss, mrcnn_mask_loss)
print(active_class_ids)
print()
print(target_class_ids[1])
print()
print(mrcnn_class_logits[1])
print('gt class ids')
print(input_gt_class_ids)

###  Simulate `mrcnn_class_loss` computation

In [None]:
print('\n>>> mrcnn_class_loss_graph ' )
print('    target_class_ids  size :', target_class_ids.shape)
print('    pred_class_logits size :', mrcnnpred_class_logits.shape)
print('    active_class_ids  size :', active_class_ids.shape)    
target_class_ids = tf.cast(target_class_ids, 'int64')

# Find predictions of classes that are not in the dataset.

pred_class_ids = tf.argmax(pred_class_logits, axis=2)

# TODO: Update this line to work with batch > 1. Right now it assumes all
#       images in a batch have the same active_class_ids
pred_active = tf.gather(active_class_ids[0], pred_class_ids)

# Loss
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
    labels=target_class_ids, logits=pred_class_logits)

# Erase losses of predictions of classes that are not in the active
# classes of the image.
loss = loss * pred_active

# Computer loss mean. Use only predictions that contribute
# to the loss to get a correct mean.
loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active)
loss = KB.reshape(loss, [1, 1])
return loss


## Plot Predicted and Ground Truth Probability Heatmaps `pred_gaussian` and `gt_gaussian` (Tensorflow)

`pred_gaussian2` and `gt_gaussian2` from Tensorflow PCN layer

In [None]:
# gt_heatmap  = layers_out[27]     # gt_gaussiam 
# pred_heatmap= layers_out[24]  # pred_gaussian
gt_heatmap  = layers_out[21]     # gt_gaussiam 
pred_heatmap= layers_out[18]  # pred_gaussian
print('gt_gaussian heatmap shape : ', gt_heatmap.shape, ' pred_gaussian heatmap shape: ', pred_heatmap.shape)
num_images = 1 # config.IMAGES_PER_GPU
num_classes = config.NUM_CLASSES

img = 2

image_id = img_meta[img,0]
print('Image id: ',image_id)
print('Classes (1: circle, 2: square, 3: triangle ): ')
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)


for cls in range(num_classes):
    ttl = 'GROUND TRUTH HEATMAP - image :  {} class: {} '.format(img,cls)
    print(' *** Zout  ', gt_heatmap[img,:,:,cls].shape, ttl)   
    plot_gaussian( gt_heatmap[img,:,:,cls], title = ttl)
    
    ttl = 'PREDICTED heatmap  - image :  {} class: {} '.format(img,cls)     
    print(' *** pred_heatmap ', pred_heatmap[img,:,:,cls].shape, ttl)   
    plot_gaussian(pred_heatmap[img,:,:,cls], title = ttl)  



### Plot Output from FCN network `fcn_bilinear` and compare with `pred_gaussian`

In [None]:
from mrcnn.visualize import plot_gaussian
import matplotlib as plt

%matplotlib inline
img = 2
image_id = img_meta[img,0]
print('Image id: ',image_id)
print('Classes (1: circle, 2: square, 3: triangle ): ')
image = dataset_train.load_image(image_id)
mask, class_ids = dataset_train.load_mask(image_id)
visualize.display_top_masks(image, mask, class_ids, dataset_train.class_names)


Zout  = layers_out[21]     # gt_gaussiam 
Zout2 = layers_out[12]     # fcn_bilinear

print(Zout.shape, Zout2.shape)
num_images = config.IMAGES_PER_GPU
num_classes = config.NUM_CLASSES


for cls in range(num_classes):
    ttl = 'GroundTruth - image :  {} class: {} '.format(img,cls)
    print(' *** Zout  ', Zout[img,:,:,cls].shape, ttl)   
    plot_gaussian( Zout[img,:,:,cls], title = ttl)
    
    ttl = 'FCN_Bilinear- image :  {} class: {} '.format(img,cls)     
    print(' *** Zout2 ', Zout2[img,:,:,cls].shape, ttl)   
    plot_gaussian(Zout2[img,:,:,cls], title = ttl)  


### Display ground truth bboxes from Shapes database (using `load_image_gt` )

Here we are displaying the ground truth bounding boxes as provided by the dataset

In [None]:
img = 0
image_id = img_meta[img,0]
print('Image id: ',image_id)
p_original_image, p_image_meta, p_gt_class_id, p_gt_bbox, p_gt_mask =  \
            load_image_gt(dataset_train, config, image_id, augment=False, use_mini_mask=True)
# print(p_gt_class_id.shape, p_gt_bbox.shape, p_gt_mask.shape)
print(p_gt_bbox[0:3,:])
print(p_gt_class_id)
visualize.draw_boxes(p_original_image, p_gt_bbox[0:3])

# image_id = img_meta[img,0]
# print('Image id: ',image_id)
# p_original_image, p_image_meta, p_gt_class_id, p_gt_bbox, p_gt_mask =  \
#             load_image_gt(dataset_train, config, image_id, augment=False, use_mini_mask=True)
# # print(p_gt_class_id.shape, p_gt_bbox.shape, p_gt_mask.shape)
# print(p_gt_bbox)
# print(p_gt_class_id)
# visualize.draw_boxes(p_original_image, p_gt_bbox)

### Display Predicted  Ground Truth Bounding Boxes  `gt_tensor` and `gt_tensor2`

layers_out[22]  `gt_tensor` is based on input_gt_class_ids and input_normlzd_gt_boxes
layers_out[28]  `gt_tensor2` is based on input_gt_class_ids and input_normlzd_gt_boxes, generated using Tensorflow

Display the Ground Truth bounding boxes from the tensor we've constructed

In [None]:
from mrcnn.utils  import stack_tensors, stack_tensors_3d
# print(gt_bboxes)
# visualize.display_instances(p_original_image, p_gt_bbox, p_gt_mask, p_gt_class_id, 
#                             dataset_train.class_names, figsize=(8, 8))
# pp.pprint(gt_bboxes)
img = 0
image_id = img_meta[img,0]

print('Image id: ',image_id)
p_image, p_image_meta, p_gt_class_id, p_gt_bbox, p_gt_mask =  \
            load_image_gt(dataset_train, config, image_id, augment=False, use_mini_mask=True)   
gt_bboxes_stacked = stack_tensors_3d(layers_out[22][img])
print(gt_bboxes_stacked)
visualize.draw_boxes(p_image, gt_bboxes_stacked[0:2,2:6])


## Display RoI proposals `pred_bboxes` generated for one class

Display bounding boxes from tensor of proposals produced by the network 
Square: 1 , Circle:2 , Triangle 3

In [None]:
img = 0
cls = 1 # <==== Class to display
pred_tensor = layers_out[19]   # numpy pred_tesnor
# pred_tensor = layers_out[25]   # tensorflow pred_tensor 

image_id = img_meta[img,0]
print('Image id: ',image_id)
p_image, p_image_meta, p_gt_class_id, p_gt_bbox, p_gt_mask =  \
            load_image_gt(dataset_train, config, image_id, augment=False, use_mini_mask=True)
print(p_image_meta)
print(pred_tensor[img,cls,:].shape)
print(pred_tensor[img,cls])
#+'-'+str(np.around(int(x[1]),decimals = 3))
# class id: str(int(x[6]))+'-'+
caps = [str(int(x[0]))+'-'+str(np.around(x[1],decimals = 3))  for x in pred_tensor[img,cls,:].tolist() ]
print(caps)

visualize.draw_boxes(p_image, pred_tensor[img,cls,:,2:6], captions = caps)

In [None]:
layers_out[0][0] * [128, 128,128,128]   #output_rois*

### Calculate  mrcnn_bbox_loss

In [None]:
import keras.backend as K

from mrcnn.utils import apply_box_deltas
from mrcnn.loss  import smooth_l1_loss

target_class_ids = layers_out[1][0:1]
target_bbox      = layers_out[2][0:1]
mrcnn_bbox       = layers_out[10][0:1]
mrcnn_class_ids  = np.argmax(layers_out[9][0:1],axis = -1)     # mrcnn_class_ids

print('target_class_ids', target_class_ids.shape)
print(target_class_ids)  # tgt_class_ids
print(' class with max probability', mrcnn_class_ids.shape)
print(mrcnn_class_ids)
print('target_bboxes', target_bbox.shape)
# print(target_bbox)  # tgt_bounding boxes
print('mrcnn_bboxes',mrcnn_bbox.shape)
# print(mrcnn_bbox)  #mrcnn_bboxes
pred_bbox = mrcnn_bbox

# calc mrcnn_bbox_loss
target_class_ids = K.reshape(target_class_ids, (-1,))
print(target_class_ids.shape)
target_bbox      = K.reshape(target_bbox, (-1, 4))
print('target_bboxx: ', target_bbox.shape)
pred_bbox        = K.reshape(pred_bbox, (-1, pred_bbox.shape[2], 4))
print('pred_bbox : ', pred_bbox.shape)

positive_roi_ix        = tf.where(target_class_ids > 0)[:, 0]
print(positive_roi_ix.eval())
positive_roi_class_ids = tf.cast( tf.gather(target_class_ids, positive_roi_ix), tf.int64)
print(positive_roi_class_ids.eval())
indices                = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1)
print(indices.eval())


target_bbox = tf.gather(target_bbox, positive_roi_ix)
print(target_bbox.eval())
pred_bbox   = tf.gather_nd(pred_bbox, indices)
print(pred_bbox.eval())

print('tf.size ',tf.size(target_bbox).eval())

diff = K.abs(target_bbox - pred_bbox)
print(diff.eval())

less_than_one = K.cast(K.less(diff, 1.0), "float32")
# print(less_than_one.eval())

loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
# print( (1-less_than_one).eval())



# loss        = K.switch(tf.size(target_bbox) > 0,
#                 smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox),
#                 tf.constant(0.0))
print(loss.eval())
sumloss = K.sum(loss)
print(sumloss.eval())
print((sumloss/40).eval())
meanloss        = K.mean(loss)
print(meanloss.eval())

###  Calculate mrcnn_class_loss

In [None]:
import keras.backend as K

from mrcnn.utils import apply_box_deltas
from mrcnn.loss  import smooth_l1_loss

target_class_ids = layers_out[1][0:1]
pred_class_logits = layers_out[8][0:1]
active_class_ids    = np.array([1,1,1,1])

# mrcnn_class_ids  = np.argmax(layers_out[9][0:1],axis = -1)     # mrcnn_class_ids

print(' target_class_ids', target_class_ids.shape)
print(target_class_ids)  # tgt_class_ids
print(' class logits', pred_class_logits.shape)
print(pred_class_logits)
print(' active, class_ids ', active_class_ids.shape)
print(active_class_ids)  # tgt_bounding boxes

pred_class_ids = tf.argmax(pred_class_logits, axis=2)
print(pred_class_ids.eval())  #mrcnn_bboxes
mrcnn_class_ids  = np.argmax(layers_out[9][0:1],axis = -1)     # mrcnn_class_ids
print(mrcnn_class_ids)
# pred_bbox = mrcnn_bbox
pred_active = tf.to_float(tf.gather(active_class_ids, pred_class_ids))
print(pred_active.eval())
# calc mrcnn_bbox_loss
loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
       labels=target_class_ids, logits=pred_class_logits)
print(loss.eval())

loss = loss * tf.to_float(pred_active)
print(loss.eval())

print(tf.reduce_sum(loss).eval())
print(tf.reduce_sum(pred_active).eval())
loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active)
print(loss.eval())

###  Calculate mrcnn_mask_loss

In [None]:
import keras.backend as K

from mrcnn.utils import apply_box_deltas
from mrcnn.loss  import smooth_l1_loss

target_class_ids    = layers_out[1][0:3]
target_masks        = layers_out[3][0:3]
pred_masks          = layers_out[11][0:3]
# mrcnn_class_ids  = np.argmax(layers_out[9][0:1],axis = -1)     # mrcnn_class_ids
print('    target_class_ids shape :', target_class_ids.shape)
print('    target_masks     shape :', target_masks.shape)
print('    pred_masks       shape :', pred_masks.shape)    


target_class_ids = K.reshape(target_class_ids, (-1,))
print('    target_class_ids shape :', target_class_ids.shape, '\n', target_class_ids.eval())

mask_shape       = tf.shape(target_masks)
print('    mask_shape       shape :', mask_shape.shape, mask_shape.eval())    

target_masks     = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3]))
print('    target_masks     shape :', tf.shape(target_masks).eval())        

pred_shape       = tf.shape(pred_masks)
print('    pred_shape       shape :', pred_shape.shape, pred_shape.eval())        

pred_masks       = K.reshape(pred_masks, (-1, pred_shape[2], pred_shape[3], pred_shape[4]))
print('    pred_masks       shape :', tf.shape(pred_masks).eval())        


pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2])
print('    pred_masks       shape :', tf.shape(pred_masks).eval())        

# Only positive ROIs contribute to the loss. And only
# the class specific mask of each ROI.
positive_ix        = tf.where(target_class_ids > 0)[:, 0]
positive_class_ids = tf.cast(tf.gather(target_class_ids, positive_ix), tf.int64)
indices            = tf.stack([positive_ix, positive_class_ids], axis=1)
print(indices.eval())



y_true = tf.gather(target_masks, positive_ix)
print('     y_true shape:', tf.shape(y_true).eval())
y_pred = tf.gather_nd(pred_masks, indices)
print('     y_pred shape:', tf.shape(y_pred).eval())

loss = K.switch(tf.size(y_true) > 0,
                K.binary_crossentropy(target=y_true, output=y_pred),
                tf.constant(0.0))
print(tf.shape(loss).eval())

loss = K.mean(loss)
print('     final loss shape:', tf.shape(loss).eval())
print(loss.eval())
loss = K.reshape(loss, [1, 1])
print('     final loss shape:', tf.shape(loss).eval())
print(loss.eval())

### Calculate a pixel loss on fcn_gaussian and gt_gaussian 

In [None]:
import keras.backend as K

from mrcnn.utils import apply_box_deltas
from mrcnn.loss  import smooth_l1_loss
pred_masks          = layers_out[12][0:3]
target_masks        = layers_out[27][0:3]

print('    target_masks     shape :', tf.shape(target_masks).eval())
print('    pred_masks       shape :', tf.shape(pred_masks).eval())    

diff = K.abs(target_masks - pred_masks)
print(tf.shape(diff).eval())

less_than_one = K.cast(K.less(diff, 1.0), "float32")
print(tf.shape(less_than_one).eval())

loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
print(tf.shape(loss).eval())

# print( (1-less_than_one).eval())

# loss = K.switch(tf.size(y_true) > 0,
#                 K.binary_crossentropy(target=y_true, output=y_pred),
#                 tf.constant(0.0))
meanloss = K.mean(loss)
print(tf.shape(meanloss).eval())
print(meanloss.eval())
# loss = K.reshape(loss, [1, 1])
# print('     final loss shape:', loss.get_shape())
# return loss


mask_shape       = tf.shape(target_masks)
print('    mask_shape       shape :', tf.shape(mask_shape).eval())    

target_masks     = K.reshape(target_masks, (-1, mask_shape[1], mask_shape[2]))
print('    target_masks     shape :', tf.shape(target_masks).eval())        

pred_shape       = tf.shape(pred_masks)
print('    pred_shape       shape :', tf.shape(pred_shape).eval())        

pred_masks       = K.reshape(pred_masks, (-1, pred_shape[1], pred_shape[2]))
print('    pred_masks       shape :', tf.shape(pred_masks).eval())
# Permute predicted masks to [N, num_classes, height, width]
# diff = K.abs(target_masks - pred_masks)
# print(tf.shape(diff).eval())

# less_than_one = K.cast(K.less(diff, 1.0), "float32")
# print(tf.shape(less_than_one).eval())

# loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
# print(tf.shape(loss).eval())

# meanloss = K.mean(loss)
# print(tf.shape(meanloss).eval())
# print(meanloss.eval())

loss = K.switch(tf.size(target_masks) > 0,
                smooth_l1_loss(y_true=target_masks, y_pred=pred_masks),
                tf.constant(0.0))
loss = K.mean(loss)
loss = K.reshape(loss, [1, 1])
print('     final loss shape:', loss.get_shape())
print(loss.eval())

###  Mean values of GT, Pred, and FCN heatmaps 

In [None]:
pred_masks = tf.identity(layers_out[24])
gt_masks = tf.identity(layers_out[27])
fcn_masks = tf.identity(layers_out[12])
print(gt_masks.shape, fcn_masks.shape)
for img in range(5):
    for cls in range(4):
        gt_mean = K.mean(gt_masks[img,:,:,cls])
        fcn_mean= K.mean(fcn_masks[img,:,:,cls])
        pred_mean= K.mean(pred_masks[img,:,:,cls])
        print('Img/Cls: ', img, '/', cls,'    gtmean: ', gt_mean.eval(), '\t fcn : ' , fcn_mean.eval(), '\t pred :', pred_mean.eval())

In [None]:
img  = 0
class_probs = layers_out[9][img]   # mrcnn_class
deltas      = layers_out[10][img]       # mrcnn_bbox

print(class_probs.shape)
print('class probabilities')
print(class_probs)
class_ids = np.argmax(layers_out[9][img],axis = 1)     # mrcnn_class_ids
print(' class with max probability')
print(class_ids)


# layers_out[10][2,0,3]
print('deltas.shape :', deltas.shape)
print(deltas[0:4])

deltas_specific = deltas[np.arange(32),class_ids]
print('deltas of max prob class: ', deltas_specific.shape)
print(deltas_specific[0:5])
output_rois = layers_out[0][img]*[128,128,128,128]
print('output_rois: ', output_rois.shape)
print(output_rois[0:])

refined_rois    = apply_box_deltas(output_rois, deltas_specific * config.BBOX_STD_DEV)
print('refined rois: ',refined_rois.shape)
print(refined_rois)

In [None]:
img = 0
cls = 0
fcn_out = layers_out[12][img]
fcn_sum = np.sum(fcn_out, axis=(0,1))
print(fcn_sum)
for cls in range(4):
    print('min :', np.min(fcn_out[:,:,cls]), 'max :', np.max(fcn_out[:,:,cls]), )

In [None]:
print(train_batch_x[4][2])
print(train_batch_x[5][2]/[128,128,128,128])