In [None]:
#Required import for web cam integration with model

import cv2
import numpy as np
import os
import sys

In [2]:
#Root directory of the project
ROOT_DIR = os.path.abspath("../")

# Import Mask RCNN
sys.path.append(ROOT_DIR)  # To find local version of the library
from mrcnn import utils
from mrcnn import model as modellib
from mrcnn import visualize
from mrcnn import visual

# Import COCO config
sys.path.append(os.path.join(ROOT_DIR, "samples/coco/"))  # To find local version
import coco

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [3]:
# Local path to trained weights file
COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco_0001.h5")
# Download COCO trained weights from Releases if needed
if not os.path.exists(COCO_MODEL_PATH):
    utils.download_trained_weights(COCO_MODEL_PATH)


In [4]:
%matplotlib inline 

# Directory to save logs and trained model
MODEL_DIR = os.path.join(ROOT_DIR, "logs")

In [5]:
#Defining the colours to be added when recognizing the objects
def random_colors(N, bright = True):
    brightness = 1.0 if bright else 0.7
    hsv = [(i / N, 1, brightness) for i in range(N)]
    colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
    random.shuffle(colors)
    return colors

In [6]:
#Applying the mask to the objects inside the rectangular model
def apply_mask(image, mask, color, alpha=0.5):
    """apply mask to image"""
    for n, c in enumerate(color):
        image[:, :, n] = np.where(
            mask == 1,
            image[:, :, n] * (1 - alpha) + alpha * c,
            image[:, :, n]
        )
    return image

In [7]:
#Defining the objects in the frame to be mapped by the mask, box and label 

def display_instances(image, boxes, masks, ids, names, scores):
    
    n_instances = boxes.shape[0]
    colors = random_colors(n_instances)

    if not n_instances:
        print('NO INSTANCES TO DISPLAY')
    else:
        assert boxes.shape[0] == masks.shape[-1] == ids.shape[0]

    for i, color in enumerate(colors):
        if not np.any(boxes[i]):
            continue

        y1, x1, y2, x2 = boxes[i]
        label = names[ids[i]]
        score = scores[i] if scores is not None else None
        caption = '{} {:.2f}'.format(label, score) if score else label
        mask = masks[:, :, i]
        
        if label == 'person':
           image = apply_mask(image, mask,(135,206,250))
           image = cv2.rectangle(image, (x1, y1), (x2, y2), (135,206,250), 1)
           image = cv2.putText(
               image, caption, (x1, y1), cv2.FONT_HERSHEY_DUPLEX, 1,(135,206,250), 2
           )
        elif label == 'car':
           image = apply_mask(image, mask, color)
           image = cv2.rectangle(image, (x1, y1), (x2, y2), (255,165,0), 1)
           image = cv2.putText(
               image, caption, (x1, y1), cv2.FONT_HERSHEY_COMPLEX,1, (255,165,0), 2
           )
        else:
           image = apply_mask(image, mask, color)
           image = cv2.rectangle(image, (x1, y1), (x2, y2), color, 1)
           image = cv2.putText(
               image, caption, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, color, 2
           )

    return image

In [8]:

frame_number = 0
    

In [9]:
class InferenceConfig(coco.CocoConfig):
    # Set batch size to 1 since we'll be running inference on
    # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU
    GPU_COUNT = 1
    IMAGES_PER_GPU = 1

config = InferenceConfig()
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE                     [28, 28]
MAX_GT_INSTANCES               100
MEAN_PIXEL                     [123.7 116.8 103.9

In [11]:
# Create model object in inference mode.
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

# Load weights trained on MS-COCO
model.load_weights(COCO_MODEL_PATH, by_name=True)



In [12]:
#Importing coco and adding the directory 
import coco
config = coco.CocoConfig()
#Enter coco path directory
COCO_DIR = ROOT_DIR 
print("Dir is",COCO_DIR)
if config.NAME == "coco":
    dataset = coco.CocoDataset()
    dataset.load_coco(COCO_DIR, "train")


Dir is C:\Python34\Mask_RCNN
loading annotations into memory...
Done (t=22.77s)
creating index...
index created!


In [13]:
#Importing coco and adding the directory 
import coco
config = coco.CocoConfig()
#Enter coco path directory
COCO_DIR = ROOT_DIR 
print("Dir is",COCO_DIR)
if config.NAME == "last":
    dataset = coco.CocoDataset()
    dataset.load_coco(COCO_DIR, "train")

Dir is C:\Python34\Mask_RCNN


In [14]:
# Must call before using the dataset
dataset.prepare()

#Print count of images, classes and class names 
#print("Image Count: {}".format(len(dataset.image_ids)))
print("Class Count: {}".format(dataset.num_classes))
#for i, info in enumerate(dataset.class_info):
 #   print("{:3}. {:50}".format(i, info['name']))
class_names = dataset.class_names
for i, info in enumerate(dataset.class_info):
     print("{:3}. {:50}".format(i, info['name']))

Class Count: 81
  0. BG                                                
  1. person                                            
  2. bicycle                                           
  3. car                                               
  4. motorcycle                                        
  5. airplane                                          
  6. bus                                               
  7. train                                             
  8. truck                                             
  9. boat                                              
 10. traffic light                                     
 11. fire hydrant                                      
 12. stop sign                                         
 13. parking meter                                     
 14. bench                                             
 15. bird                                              
 16. cat                                               
 17. dog                        

In [15]:

import logging
import random
import itertools
import colorsys

import numpy as np
from skimage.measure import find_contours
import matplotlib.pyplot as plt
from matplotlib import patches,  lines
from matplotlib.patches import Polygon
import IPython.display


In [16]:
#Integrating the model with the webcam 

capture = cv2.VideoCapture(0)


#fourcc = cv2.VideoWriter_fourcc(*'XVID')
#out = cv2.VideoWriter('output_catpure.avi',fourcc, 20.0, (640,480))


#Defining the frame for the model to detect
while True:
    ret, frame = capture.read()
    results = model.detect([frame], verbose=0)
    
    r = results[0]
    frame = display_instances(
            frame, r['rois'], r['masks'], r['class_ids'], class_names, r['scores']
        )
    
    cv2.imshow('frame', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        print("Quit Display")
        break
        
#Releases the webcam on pressing q
#Prints Quit Display
capture.release()
cv2.destroyAllWindows()

Quit Display


In [None]:
#In case the camera errors out run this to stop it. 
#If there is a none type error restart the kernel and clear all outputs
#This happens due to resource allocation and array not getting freed
capture.release()
cv2.destroyAllWindows()