# Applied Computer Vision using API
> ## Mask R-CNN
>> ### Group 8

## Mount Google drive

In [63]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


> Project configuration:
* Clone from other repository (https://github.com/alsombra/Mask_RCNN-TF2)
* Add compatibility code for TensorFlow v1
* Upload more photos from local drive if needed
* Copy photos from local drive to repository images folder

## Downloading the repository
* Clone from other repository (https://github.com/alsombra/Mask_RCNN-TF2)

In [64]:
!git clone https://github.com/alsombra/Mask_RCNN-TF2

fatal: destination path 'Mask_RCNN-TF2' already exists and is not an empty directory.


In [65]:
# change directory
%cd Mask_RCNN-TF2

/content/Mask_RCNN-TF2


In [66]:
# install repository libraries
!pip install -r requirements.txt



In [67]:
# install setup file
!python setup.py install

!!

        ********************************************************************************
        Usage of dash-separated 'description-file' will not be supported in future
        versions. Please use the underscore name 'description_file' instead.

        This deprecation is overdue, please update your project and remove deprecated
        calls to avoid build errors in the future.

        See https://setuptools.pypa.io/en/latest/userguide/declarative_config.html for details.
        ********************************************************************************

!!
  opt = self.warn_dash_deprecation(opt, section)
!!

        ********************************************************************************
        Usage of dash-separated 'license-file' will not be supported in future
        versions. Please use the underscore name 'license_file' instead.

        This deprecation is overdue, please update your project and remove deprecated
        calls to avoid build errors in

In [68]:
# move one level-up
%cd ..

/content


In [69]:
%pwd

'/content'

## Importing the libraries

In [70]:
import shutil
import os
import sys
import cv2
import numpy as np
import skimage.io
import time
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt
import glob
import tensorflow as tf

In [9]:
tf.__version__

'2.15.0'

In [71]:
ROOT_DIR = os.path.abspath('./Mask_RCNN-TF2')
ROOT_DIR

'/content/Mask_RCNN-TF2'

In [72]:
sys.path

['/content',
 '/env/python',
 '/usr/lib/python310.zip',
 '/usr/lib/python3.10',
 '/usr/lib/python3.10/lib-dynload',
 '',
 '/usr/local/lib/python3.10/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.10/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/content/Mask_RCNN-TF2',
 '/',
 '/content/Mask_RCNN-TF2/samples/coco/',
 '/']

In [73]:
sys.path.append(ROOT_DIR)

In [74]:
sys.path

['/content',
 '/env/python',
 '/usr/lib/python310.zip',
 '/usr/lib/python3.10',
 '/usr/lib/python3.10/lib-dynload',
 '',
 '/usr/local/lib/python3.10/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.10/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/content/Mask_RCNN-TF2',
 '/',
 '/content/Mask_RCNN-TF2/samples/coco/',
 '/',
 '/content/Mask_RCNN-TF2']

## Importing Model

In [75]:
from mrcnn import utils
from mrcnn import visualize
import mrcnn.model as modellib

## Importing MS COCO dataset

In [76]:
# https://cocodataset.org/#home
sys.path.append(os.path.join(ROOT_DIR, 'samples/coco/'))

In [77]:
sys.path

['/content',
 '/env/python',
 '/usr/lib/python310.zip',
 '/usr/lib/python3.10',
 '/usr/lib/python3.10/lib-dynload',
 '',
 '/usr/local/lib/python3.10/dist-packages',
 '/usr/lib/python3/dist-packages',
 '/usr/local/lib/python3.10/dist-packages/IPython/extensions',
 '/root/.ipython',
 '/content/Mask_RCNN-TF2',
 '/',
 '/content/Mask_RCNN-TF2/samples/coco/',
 '/',
 '/content/Mask_RCNN-TF2',
 '/content/Mask_RCNN-TF2/samples/coco/']

In [78]:
import coco

In [79]:
MODEL_DIR = os.path.join(ROOT_DIR, 'logs')
IMAGE_DIR = os.path.join(ROOT_DIR, 'images')

In [80]:
MODEL_DIR, IMAGE_DIR

('/content/Mask_RCNN-TF2/logs', '/content/Mask_RCNN-TF2/images')

### [ ! ] Compatibility Update
Run the code below to avoid any issues when running the latest versions of Tensorflow.

In [81]:
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

np.bool = np.bool_



## Loading the pre-trained neural network

In [82]:
COCO_MODEL_PATH = os.path.join(ROOT_DIR, 'mask_rcnn_coco.h5')

In [83]:
utils.download_trained_weights(COCO_MODEL_PATH)

Downloading pretrained model to /content/Mask_RCNN-TF2/mask_rcnn_coco.h5 ...
... done downloading pretrained model!


In [84]:
class InferenceConfig(coco.CocoConfig):
  GPU_COUNT = 1
  IMAGES_PER_GPU = 1


In [85]:
config = InferenceConfig()

In [86]:
config.display()


Configurations:
BACKBONE                       resnet101
BACKBONE_STRIDES               [4, 8, 16, 32, 64]
BATCH_SIZE                     1
BBOX_STD_DEV                   [0.1 0.1 0.2 0.2]
COMPUTE_BACKBONE_SHAPE         None
DETECTION_MAX_INSTANCES        100
DETECTION_MIN_CONFIDENCE       0.7
DETECTION_NMS_THRESHOLD        0.3
FPN_CLASSIF_FC_LAYERS_SIZE     1024
GPU_COUNT                      1
GRADIENT_CLIP_NORM             5.0
IMAGES_PER_GPU                 1
IMAGE_CHANNEL_COUNT            3
IMAGE_MAX_DIM                  1024
IMAGE_META_SIZE                93
IMAGE_MIN_DIM                  800
IMAGE_MIN_SCALE                0
IMAGE_RESIZE_MODE              square
IMAGE_SHAPE                    [1024 1024    3]
LEARNING_MOMENTUM              0.9
LEARNING_RATE                  0.001
LOSS_WEIGHTS                   {'rpn_class_loss': 1.0, 'rpn_bbox_loss': 1.0, 'mrcnn_class_loss': 1.0, 'mrcnn_bbox_loss': 1.0, 'mrcnn_mask_loss': 1.0}
MASK_POOL_SIZE                 14
MASK_SHAPE         

In [87]:
MODEL_DIR

'/content/Mask_RCNN-TF2/logs'

In [88]:
# Create model object in inference mode
model = modellib.MaskRCNN(mode="inference", model_dir=MODEL_DIR, config=config)

In [89]:
# Load pre-trained weights into the model
model.load_weights(COCO_MODEL_PATH, by_name=True)

## Detecting objects

### Labels, or categories

In [90]:
class_names = ['BG', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
               'bus', 'train', 'truck', 'boat', 'traffic light',
               'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird',
               'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear',
               'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
               'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
               'kite', 'baseball bat', 'baseball glove', 'skateboard',
               'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
               'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
               'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
               'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
               'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
               'keyboard', 'cell phone', 'microwave', 'oven', 'toaster',
               'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors',
               'teddy bear', 'hair drier', 'toothbrush']

In [94]:
print("Quantity of Labels = ", len(class_names))

Quantity of Labels =  81


In [95]:
print("Accessing Label with index 1")
class_names[1], class_names.index('person')

Accessing Label with index 1


('person', 1)

In [96]:
# Define image path
image_path = '/content/Mask_RCNN-TF2/images/262985539_1709e54576_z.jpg'

In [97]:
# Load an RGB color image
image = skimage.io.imread(image_path)

In [118]:
# Perform detection
results = model.detect([image], verbose=1)

Processing 1 images
image                    shape: (375, 500, 3)         min:    0.00000  max:  255.00000  uint8
molded_images            shape: (1, 1024, 1024, 3)    min: -123.70000  max:  151.10000  float64
image_metas              shape: (1, 93)               min:    0.00000  max: 1024.00000  float64
anchors                  shape: (1, 261888, 4)        min:   -0.35390  max:    1.29134  float32


In [119]:
r = results[0]

In [121]:
output_dir = '/content/drive/MyDrive/Group-8-Deep-Learning/Mask_RCNN_results'

In [132]:
visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'],
                  class_names, r['scores'])


Output hidden; open in https://colab.research.google.com to view.

In [47]:
print(r)

{'rois': array([[119,   0, 148,  48],
       [ 68, 152, 253, 493],
       [211, 163, 305, 361],
       [249, 336, 293, 497],
       [ 94,   0, 125,  47],
       [246, 288, 368, 499],
       [ 15, 110, 187, 264],
       [153, 104, 197, 210],
       [142, 204, 159, 215],
       [227, 364, 254, 497]], dtype=int32), 'class_ids': array([9, 9, 9, 9, 9, 9, 9, 9, 1, 9], dtype=int32), 'scores': array([0.97456235, 0.9725628 , 0.97184324, 0.9652634 , 0.9584816 ,
       0.9583912 , 0.89046764, 0.76310897, 0.72845   , 0.7212261 ],
      dtype=float32), 'masks': array([[[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, .

In [48]:
class_names[9], class_names[1]

('boat', 'person')

## Upload images
> Note: If needed you can upload images from local drive directly to the repository images folder.

In [None]:
# from google.colab import files
# # Upload images
# uploaded = files.upload()

# # Define the path to the gallery directory
# gallery_path = '/content/Mask_RCNN-TF2/images'

# # Move uploaded images to the gallery directory
# for filename in uploaded.keys():
#     os.rename(filename, os.path.join(gallery_path, filename))

## Copy images from local drive to repository

## Loop through images folder

In [144]:
# Define the gallery path
gallery_path = '/content/drive/MyDrive/Group-8-Deep-Learning/images'

# List all images in the gallery directory
gallery = [os.path.join(gallery_path, f) for f in os.listdir(gallery_path) if os.path.isfile(os.path.join(gallery_path, f))]

# Print Gallery list
gallery

['/content/drive/MyDrive/Group-8-Deep-Learning/images/ukraine_war_2013_tank-flag.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/ukraine-war-bomb-survivor.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/ukraine-war-tank-buses.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/ukraine-war-hospital-scared-people.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/airplane_far.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/premium_photo-airplane-watermark.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/front-airplane.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/3-4-airplane.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/airplane-at-gate.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/1045023827_4ec3e8ba5c_z.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/9118579087_f9ffa19e63_z.jpg',
 '/content/drive/MyDrive/Group-8-Deep-Learning/images/25691390_f9944f61b5_z.jpg',
 '/conte

In [145]:
print("Image Quantity of Gallery = ", len(gallery))

Image Quantity of Gallery =  38


## Batch Processing

In [151]:
import os
import time
import skimage.io
from PIL import Image
import matplotlib.pyplot as plt
# import visualize  # Assuming you have a visualize module with the display_instances function

# Define the output directory for Mask R-CNN results
output_dir = '/content/drive/MyDrive/Group-8-Deep-Learning/Mask_RCNN_results'

# Initialize prediction time counter
t_prediction = 0

# Record start time
t_start = time.time()

# Process each image in the gallery
for image_path in gallery:
    try:
        # Extract the filename from the image path
        image_name = os.path.basename(image_path)

        # Read the current image
        current_image = skimage.io.imread(image_path)

        # Perform detection
        start_prediction_time = time.time()
        results = model.detect([current_image], verbose=0)
        r = results[0]
        t_prediction += (time.time() - start_prediction_time)

        # Display the instances
        visualize.display_instances(current_image, r['rois'], r['masks'],
                                     r['class_ids'], class_names, r['scores'])

        # Save the image with results
        output_image_path = os.path.join(output_dir, image_name)
        plt.savefig(output_image_path)  # Save the displayed image with segmentation results

        # Print prediction details
        print(f"Image reference: {image_name}")
        print(f"Prediction time: {t_prediction / 60}. Average {t_prediction / len(gallery) / 60} per image")
        total_time = (time.time() - t_start) / 60
        print(f"Total Time: {total_time}")
        print("Output path: ", output_image_path)

    except Exception as e:
        # Log any errors that occur during processing
        print(f"Error processing {image_name}: {e}")


Output hidden; open in https://colab.research.google.com to view.

## Image Results
> 38 Images were processed with
Total Time: 10 minutes 56 seconds.

# Video Segmentation

In [159]:
video_footage = cv2.VideoCapture('/content/drive/MyDrive/Group-8-Deep-Learning/videos/dog_birds.mp4')
video_connected, frame = video_footage.read()
video_connected


True

In [160]:
frame.shape

(1080, 1920, 3)

## Save Codec Video file to specific path

In [161]:
save_video_path = '/content/drive/MyDrive/Group-8-Deep-Learning/videos/dog_birds_results--test-results.mp4'
save_video_codec = cv2.VideoWriter_fourcc(*'XVID')
frame_width, frame_height = frame.shape[1], frame.shape[0]
save_video = cv2.VideoWriter(save_video_path, save_video_codec, 24, (frame_width, frame_height))


In [182]:
source_video_function = '/content/drive/MyDrive/Group-8-Deep-Learning/videos/video_functions.py'
destination_video_function = '/content/Mask_RCNN-TF2/mrcnn'

shutil.copy(source_video_function, destination_video_function)

'/content/Mask_RCNN-TF2/mrcnn/video_functions.py'

In [183]:
from mrcnn import video_functions

In [184]:
object_colors = video_functions.random_colors(len(class_names), seed=99)
len(object_colors)

81

In [185]:
print(object_colors)

[(0.0, 0.5925925925925926, 1.0), (0.518518518518519, 0.0, 1.0), (1.0, 0.2222222222222222, 0.0), (1.0, 0.8888888888888888, 0.0), (1.0, 0.5925925925925926, 0.0), (0.0, 1.0, 0.5185185185185182), (0.0, 1.0, 0.8148148148148149), (0.7407407407407405, 0.0, 1.0), (0.0, 0.9629629629629628, 1.0), (1.0, 0.0, 0.29629629629629584), (1.0, 0.7407407407407407, 0.0), (1.0, 0.5185185185185185, 0.0), (0.0, 1.0, 0.8888888888888888), (1.0, 0.0, 0.8148148148148149), (0.8888888888888888, 1.0, 0.0), (0.0, 1.0, 0.9629629629629628), (0.6666666666666667, 1.0, 0.0), (1.0, 0.0, 0.0740740740740744), (1.0, 0.0, 0.14814814814814792), (0.0, 1.0, 0.7407407407407405), (0.07407407407407418, 1.0, 0.0), (1.0, 0.0, 0.7407407407407405), (1.0, 0.0, 0.518518518518519), (1.0, 0.2962962962962963, 0.0), (0.44444444444444464, 0.0, 1.0), (1.0, 0.07407407407407407, 0.0), (0.962962962962963, 1.0, 0.0), (0.5925925925925926, 1.0, 0.0), (1.0, 0.4444444444444444, 0.0), (0.0, 1.0, 0.07407407407407396), (0.8148148148148149, 1.0, 0.0), (0.2

In [186]:
def show(img):
  fig = plt.gcf()
  fig.set_size_inches(16,10)
  plt.axis('off')
  plt.imshow(img)
  plt.show()
  plt.close()


In [187]:
frame_show = 30
current_frame = 0

In [188]:
while (cv2.waitKey(1) < 0):
  video_connected, frame = video_footage.read()

  if not video_connected:
    break

  results = model.detect([frame], verbose=0)
  r = results[0]

  processed_frame = video_functions.display_instances(frame, r['rois'], r['masks'],
                                                      r['class_ids'], class_names, r['scores'], colors=object_colors)

  if current_frame <= frame_show:
    show(processed_frame)
    current_frame += 1

  save_video.write(cv2.cvtColor(processed_frame, cv2.COLOR_BGR2RGB))
save_video.release()

Output hidden; open in https://colab.research.google.com to view.

## Video Results
- Video of 7 seconds was processed in 49 minutes, 30 seconds.
- Video of 2 seconds was processed in 17 minutes.

# Convert the Python Model to a Web-Friendly Format

In [None]:
!pip install tensorflowjs


In [None]:
import tensorflowjs as tfjs
from mrcnn import model as mrcnn_model

# Create an instance of the Mask R-CNN model
mrcnn_model = mrcnn_model.MaskRCNN(mode='inference', config=config, model_dir=MODEL_DIR)

# Load only the weights from the pre-trained model file
mrcnn_model.load_weights(COCO_MODEL_PATH, by_name=True)

In [None]:
# Define the directory where you want to save the TensorFlow.js model
tfjs_target_dir = '/content/drive/MyDrive/Group-8-Deep-Learning/exports/mask_rcnn_tfjs_model'

# Access the underlying Keras model from MaskRCNN
keras_model = mrcnn_model.keras_model

# Convert the Keras model to TensorFlow.js format
tfjs.converters.save_keras_model(keras_model, tfjs_target_dir)

print("Model converted to TensorFlow.js format successfully.")