# Mask R-CNN - Images comparison on Inria Aerial Dataset

<i> Sébastien Ohleyer </i>

Comparing FCN, MLP and Mask R-CNN.

Python 3

In [None]:
import os
import sys
import random
import math
import re
import time
import numpy as np
import tensorflow as tf
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from scipy.misc import imsave

import utils
import visualize
from visualize import display_images
import model as modellib
from model import log

%matplotlib inline 

# Root directory of the project
ROOT_DIR = os.getcwd()

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

# Directory of Aerial dataset
AERIAL_DIR = "/Users/sebastienohleyer/Documents/ENS MVA/Object recognition/AerialImageDataset/"

# Directory of Aerial dataset
AERIAL_MODEL_PATH = "/Users/sebastienohleyer/Documents/ENS MVA/Object recognition/mask_rcnn/trained_model"

## Configurations

In [None]:
import aerial
config = aerial.AerialConfig()
# Override the training configurations with a few
# changes for inferencing.
class InferenceConfig(config.__class__):
    # Run detection on one image at a time
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()
config.display()

## Notebook Preferences

In [None]:
# Device to load the neural network on.
# Useful if you're training a model on the same 
# machine, in which case use CPU and leave the
# GPU for training.
DEVICE = "/cpu:0"  # /cpu:0 or /gpu:0

# Inspect the model in training or inference modes
# values: 'inference' or 'training'
# TODO: code for 'training' test mode not ready yet
TEST_MODE = "inference"

In [None]:
def get_ax(rows=1, cols=1, size=16):
    """Return a Matplotlib Axes array to be used in
    all visualizations in the notebook. Provide a
    central point to control graph sizes.
    
    Adjust the size attribute to control how big to render images
    """
    _, ax = plt.subplots(rows, cols, figsize=(size*cols, size*rows))
    return ax

def find_idx(image_info,image_name):
    for i, image in enumerate(image_info):
        if image["image_name"] == image_name:
            return i
            break

In [None]:
SUBIMAGE_LIST=[]
for i in range(5):
    SUBIMAGE_LIST = SUBIMAGE_LIST + [(i,j) for j in range(5)]

## Ground Truth

In [None]:
# Load dataset
TOWN_LIST = ["chicago"]

dataset = aerial.AerialDataset()
dataset.load_aerial(dataset_dir=AERIAL_DIR, subset="train", subimage_list=SUBIMAGE_LIST, town_list=TOWN_LIST)
dataset.prepare()

print("Image Count: {}".format(len(dataset.image_ids)))
print("Class Info: {}".format(dataset.class_info))

In [None]:
#dataset.image_info

In [None]:
#Load a test image
#image_id = np.random.choice(dataset.image_ids)
im_name = "chicago5_01.tif"
image_idx = find_idx(dataset.image_info, im_name)
image_name = dataset.image_info[image_idx]['image_name']
image = dataset.load_image(image_idx)
mask_gt, class_ids_gt = dataset.load_mask(image_idx)

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True,figsize=(20,10))
ax1.imshow( image )
ax1.axis('off')
ax1.set_title(image_name + ' | image', fontsize=20)
ax2.imshow( np.sum(mask_gt,2) )
ax2.axis('off')
ax2.set_title(image_name + ' | mask', fontsize=20)
plt.show()

In [None]:
# Compute Bounding box
bbox_gt = utils.extract_bboxes(mask_gt)

In [None]:
full_mask_gt = np.sum(mask_gt,2)
print(np.unique(mask_gt))
print(np.unique(full_mask_gt))

In [None]:
# Display image and additional stats
print("image_id ", image_idx, dataset.image_info[image_idx]['image_name'])
log("image", image)
log("mask", mask_gt)
log("class_ids", dataset.class_ids)
log("bbox", bbox_gt)
# Display image and instances
masked_image_gt = visualize.display_instances(image, bbox_gt,  mask_gt, class_ids_gt, dataset.class_names)

## FCN

In [None]:
# Load dataset
dataset_fcn = aerial.AerialDataset()
dataset_fcn.load_aerial(dataset_dir=AERIAL_DIR, subset="fcn", subimage_list=SUBIMAGE_LIST, town_list=TOWN_LIST)
dataset_fcn.prepare()

print("Image Count: {}".format(len(dataset_fcn.image_ids)))
print("Class Info: {}".format(dataset_fcn.class_info))

In [None]:
#dataset_fcn.image_info

In [None]:
#Load a test image
image_idx = find_idx(dataset_fcn.image_info, im_name)
image_name = dataset_fcn.image_info[image_idx]['image_name']
image = dataset_fcn.load_image(image_idx)
mask_fcn, class_ids_fcn = dataset_fcn.load_mask(image_idx)

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True,figsize=(20,10))
ax1.imshow( image )
ax1.axis('off')
ax1.set_title(image_name + ' | image', fontsize=20)
ax2.imshow( np.sum(mask_fcn,2) )
ax2.axis('off')
ax2.set_title(image_name + ' | mask', fontsize=20)
plt.show()

In [None]:
# Compute Bounding box
bbox_fcn = utils.extract_bboxes(mask_fcn)
# Display image and additional stats
print("image_id ", image_idx, dataset_fcn.image_info[image_idx]['image_name'])
log("image", image)
log("mask", mask_fcn)
log("class_ids", dataset_fcn.class_ids)
log("bbox", bbox_fcn)
# Display image and instances
masked_image_fcn = visualize.display_instances(image, bbox_fcn,  mask_fcn, class_ids_fcn, dataset_fcn.class_names)

In [None]:
full_mask_fcn = np.sum(mask_fcn,2)
print(np.unique(mask_fcn))
print(np.unique(full_mask_fcn))

In [None]:
#imsave(image_name_list[0]+'_fcn'+'.jpg', masked_image_fcn)

## MLP

In [None]:
# Load dataset
dataset_mlp = aerial.AerialDataset()
dataset_mlp.load_aerial(dataset_dir=AERIAL_DIR, subset="mlp", subimage_list=SUBIMAGE_LIST)
dataset_mlp.prepare()

print("Image Count: {}".format(len(dataset_mlp.image_ids)))
print("Class Info: {}".format(dataset_mlp.class_info))

In [None]:
#Load a test image
image_idx = find_idx(dataset_mlp.image_info, im_name)
image_name = dataset_mlp.image_info[image_idx]['image_name']
image = dataset_mlp.load_image(image_idx)
mask_mlp, class_ids_mlp = dataset_mlp.load_mask(image_idx)

In [None]:
f, (ax1, ax2) = plt.subplots(1, 2, sharey=True,figsize=(20,10))
ax1.imshow( image )
ax1.axis('off')
ax1.set_title(image_name + ' | image', fontsize=20)
ax2.imshow( np.sum(mask_mlp,2) )
ax2.axis('off')
ax2.set_title(image_name + ' | mask', fontsize=20)
plt.show()

In [None]:
# Compute Bounding box
bbox_mlp = utils.extract_bboxes(mask_mlp)
# Display image and additional stats
print("image_id ", image_idx, dataset_mlp.image_info[image_idx]['image_name'])
log("image", image)
log("mask", mask_mlp)
log("class_ids", dataset_mlp.class_ids)
log("bbox", bbox_mlp)
# Display image and instances
masked_image_mlp = visualize.display_instances(image, bbox_mlp,  mask_mlp, class_ids_mlp, dataset_mlp.class_names)

In [None]:
full_mask_mlp = np.sum(mask_mlp,2)
print(np.unique(mask_mlp))
print(np.unique(full_mask_mlp))

In [None]:
#imsave(image_name_list[0]+'_mlp'+'.jpg', masked_image_mlp)

## Mask R-CNN

#### Load model

In [None]:
# Create model in inference mode
initial_weights = '9_mask_rcnn_aerial_0040.h5'

with tf.device(DEVICE):
    model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

weights_path = os.path.join(AERIAL_MODEL_PATH, initial_weights)

# Load weights
print("Loading weights ", weights_path)
model.load_weights(weights_path, by_name=True)

#### Load image

In [None]:
image_idx = find_idx(dataset.image_info, im_name)
image, image_meta, gt_class_id, gt_bbox, gt_mask =\
    modellib.load_image_gt(dataset, config, image_idx, use_mini_mask=False)
info = dataset.image_info[image_idx]
print("image ID: {}.{} ({}) {}".format(info["source"], info["id"], image_idx, 
                                       dataset.image_reference(image_idx)))

#### Run detection

In [None]:
# Run object detection
results = model.detect([image], verbose=1)

# Display results
ax = get_ax(1)
r = results[0]
masked_image_maskrcnn = visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], dataset.class_names, r['scores'], ax=ax,
                            title="Predictions")
log("gt_class_id", gt_class_id)
log("gt_bbox", gt_bbox)
log("gt_mask", gt_mask)

In [None]:
mask_rcnn = r["masks"]
full_mask_maskrcnn = np.sum(mask_rcnn,2)
full_mask_maskrcnn = np.where(full_mask_maskrcnn<2,full_mask_maskrcnn,1)
print(np.unique(mask_rcnn))
print(np.unique(full_mask_maskrcnn))

In [None]:
#imsave(image_name_list[0]+'_maskrcnn'+'.jpg', masked_image_maskrcnn)

## Comparison

In [None]:
from compute_stats import compute_iou,compute_accuracy
print("FCN")
print("IoU:", compute_iou(full_mask_gt,full_mask_fcn))
print("Accuracy:", compute_accuracy(full_mask_gt,full_mask_fcn))

In [None]:
print("MLP")
print("IoU:", compute_iou(full_mask_gt,full_mask_mlp))
print("Accuracy:", compute_accuracy(full_mask_gt,full_mask_mlp))

In [None]:
print("Mask-RCNN")
print("IoU:", compute_iou(full_mask_gt,full_mask_maskrcnn))
print("Accuracy:", compute_accuracy(full_mask_gt,full_mask_maskrcnn))

## For LateX 

In [None]:
import imageio
im_name = 'tyrol-w3.tif'
plt_image = imageio.imread(os.path.join(AERIAL_DIR,"train/images/"+im_name))
plt_gt = imageio.imread(os.path.join(AERIAL_DIR,"train/gt/"+im_name))/255
plt_fcn = imageio.imread(os.path.join(AERIAL_DIR,"train/fcn/"+im_name))/255
plt_mlp = imageio.imread(os.path.join(AERIAL_DIR,"train/mlp/"+im_name))/255
plt_maskrcnn = imageio.imread(os.path.join(AERIAL_DIR,"train/maskrcnn_aerial20180115T1637/"+im_name))/255



In [None]:
f, ((ax00,ax01), (ax10,ax11), (ax20,ax21), (ax30,ax31), (ax40,ax41)) = plt.subplots(5, 2,figsize=(20,50))
ax00.imshow( plt_image.astype(np.uint8) )
ax00.axis('off')
ax00.set_title('image', fontsize=40)
ax10.imshow( plt_gt.astype(np.uint8) )
ax10.axis('off')
ax10.set_title('ground truth', fontsize=40)
ax20.imshow( plt_fcn.astype(np.uint8) )
ax20.axis('off')
ax20.set_title('fcn', fontsize=40)
ax30.imshow( plt_mlp.astype(np.uint8) )
ax30.axis('off')
ax30.set_title('mlp', fontsize=40)
ax40.imshow( plt_maskrcnn.astype(np.uint8) )
ax40.axis('off')
ax40.set_title('maskrcnn', fontsize=40)

ax01.imshow( image )
ax01.axis('off')
ax01.set_title('image', fontsize=40)
ax11.imshow( masked_image_gt.astype(np.uint8) )
ax11.axis('off')
ax11.set_title('ground truth', fontsize=40)
ax21.imshow( masked_image_fcn.astype(np.uint8) )
ax21.axis('off')
ax21.set_title('fcn', fontsize=40)
ax31.imshow( masked_image_mlp.astype(np.uint8) )
ax31.axis('off')
ax31.set_title('mlp', fontsize=40)
ax41.imshow( masked_image_maskrcnn.astype(np.uint8) )
ax41.axis('off')
ax41.set_title('maskrcnn', fontsize=40)
plt.tight_layout()
plt.savefig(im_name.split('.')[0]+'comparison.eps', format='eps', dpi=200)

In [None]:
#f, (ax1, ax2, ax3, ax4) = plt.subplots(4, 1, sharey=True,figsize=(10,50))
#ax1.imshow( masked_image_gt.astype(np.uint8) )
#ax1.axis('off')
#ax1.set_title('ground truth', fontsize=20)
#ax2.imshow( masked_image_fcn.astype(np.uint8) )
#ax2.axis('off')
#ax2.set_title('fcn', fontsize=20)
#ax3.imshow( masked_image_mlp.astype(np.uint8) )
#ax3.axis('off')
#ax3.set_title('mlp', fontsize=20)
#ax4.imshow( masked_image_maskrcnn.astype(np.uint8) )
#ax4.axis('off')
#ax4.set_title('maskrcnn', fontsize=20)
#plt.savefig('colorscomparison.eps', format='eps', dpi=200)

In [None]:
plt.imshow(image)