In [None]:
# You need to update OpenCV if you are using Colab.
# Uncomment this line if you are using Colab.
!pip install opencv-python --upgrade

In [None]:
!pip install pycocotools

In [None]:
if not 'models' in os.listdir():
    !git clone --depth 1 https://github.com/tensorflow/models

In [None]:
# The `%%bash` magic command inside a notebook lets you run a cell run like a shell interface
# Note: the `bash` command works only on Colab.
%%bash

# Change the directory to models/research
cd models/research/

# Compile the API's Protobuf files
protoc object_detection/protos/*.proto --python_out=.

# Copy the required Setup file
cp object_detection/packages/tf2/setup.py .

# Install the API using the setup.py file
python -m pip install .

In [None]:
# To fix:
# - TypeError: Descriptors cannot be created directly,

!pip install protobuf==3.20.0 -q

In [None]:
TRAINED_MODELS = {
                    'CenterNet_HourGlass'  : 'https://www.dropbox.com/s/oilbvq2nzkwwgg9/CenterNet_HourGlass.zip?dl=1',
                    'EffDet_D3'            : 'https://www.dropbox.com/s/0ql06hmwnjbryzz/EffDet_D3.zip?dl=1',
                    'FasterRCNN_ResNet101' : 'https://www.dropbox.com/s/nxv1s0geuu5pafx/FasterRCNN_ResNet101.zip?dl=1',
                    'RetinaNet_101'        : 'https://www.dropbox.com/s/4mpu10gjosbsdbc/RetinaNet_101.zip?dl=1',
                 }

In [None]:
#@title Model Selection { display-mode: "form", run: "auto" }
model_display_name = 'FasterRCNN_ResNet101' # @param ['CenterNet_HourGlass', 'EffDet_D3', 'FasterRCNN_ResNet101', 'RetinaNet_101']
model_handle = TRAINED_MODELS[model_display_name]

print('Selected model:'+ model_display_name)

In [None]:
import os
from zipfile import ZipFile
import requests

In [None]:
def download_file(url, save_name):
    url = url
    file = requests.get(url)

    open(save_name, 'wb').write(file.content)

In [None]:
def unzip(zip_file=None):
    try:
        with ZipFile(zip_file) as z:
            z.extractall("./")
            print("Extracted all")
    except:
        print("Invalid file")

In [None]:
if not os.path.exists(model_display_name):
    download_file(
                  model_handle,
                  f'{model_display_name}.zip'
                 )
    unzip(zip_file=f'{model_display_name}.zip')

In [None]:
import os
# TensorFlow + Keras 2 backwards compatibility
os.environ["TF_USE_LEGACY_KERAS"] = "1"

In [None]:
# Importing required libraries and files
import time

import numpy as np
import tensorflow as tf

from PIL import Image
import cv2

from object_detection.utils import (
                                    dataset_util,
                                    label_map_util,
                                    config_util
                                   )

from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder
from object_detection.utils import ops as utils_ops

%matplotlib inline

In [None]:
if not os.path.exists('val'):
    download_file(
                  'https://www.dropbox.com/s/gzb7a83ov5u1rf5/val.zip?dl=1',
                  'val.zip'
                 )
    unzip(zip_file='val.zip')

In [None]:
# Get Paths of our test images.
TEST_IMAGE_PATHS = ['val/' + f for f in  os.listdir('val') if f.endswith('.jpg')]

In [None]:
# This Fucntion will return a model object that can be directly
# used to run a forward pass on new images.
def get_model_detection_function(model):
    """Get a tf.function for detection."""

    @tf.function(experimental_relax_shapes=True)
    def detect_fn(image):
        """
        Returns detections and predections on an image passed to the model
        """

        # Automatically preproccess according to the trained model configuration.
        image, shapes = model.preprocess(image)

        # Perform the prediciton.
        prediction_dict = model.predict(image, shapes)

        # Postprocess the predictions so they can be visualized.
        detection_dict = model.postprocess(prediction_dict, shapes)

        return detection_dict

    return detect_fn

In [None]:
pipeline_config_filename = 'pipeline.config'
ckpt_dirname = 'training_of_model'

In [None]:
model_ckpt_path = os.path.join(model_display_name, ckpt_dirname)
pipeline_config_path = os.path.join(model_display_name, pipeline_config_filename)

In [None]:
# initialize the configuration file (pipeline.config) and load the model configuration.
configs = config_util.get_configs_from_pipeline_file(pipeline_config_path)
model_config = configs['model']

# Build the detection model from the models configuration.
detection_model = model_builder.build(model_config=model_config, is_training=False)

# Restore the checkpoint model.
ckpt = tf.train.Checkpoint(model=detection_model)
ckpt.restore(tf.train.latest_checkpoint(model_ckpt_path)).expect_partial()

# Call the method to create the model object.
ckpt_inference_fn = get_model_detection_function(detection_model)

In [None]:
# Restore the labelmap.
label_map_path = os.path.join(model_display_name, configs['eval_input_config'].label_map_path)
label_map = label_map_util.load_labelmap(label_map_path)

# Create an index of the categories so our predictions can be labelled accordingly.
category_index = label_map_util.create_category_index_from_labelmap(label_map_path, use_display_name=True)

In [None]:
def get_visualizations_and_inf_time(image, model_det_fn, category_index, use_saved_model=True):

    # Convert image into a tensor.
    if use_saved_model:
        input_tensor = tf.convert_to_tensor(image)
        label_id_offset = 0
    else:
        input_tensor = tf.convert_to_tensor(image, dtype = tf.float32)
        # Set a label id offset because the model will start predicitions from 0 but
        # 0  is the background index for ckpt model format.
        label_id_offset = 1

    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis,...]

    start = time.time()
    # Predict on the tensor image.
    output_dict = model_det_fn(input_tensor)
    end = time.time()

    # Get total no. of detections in the image.
    num_detections = int(output_dict.pop('num_detections'))

    # Convert our output tensor to a numpy array.
    output_dict = {key:value[0, :num_detections].numpy() for key,value in output_dict.items()}
    output_dict['num_detections'] = num_detections

    # Use the TensorFlow visualization_utils.py to draw the bounding boxes along with the class labels and the confidences.
    viz_utils.visualize_boxes_and_labels_on_image_array(

      # Image to draw boxes on.
      image,
      # Array of Detection Boxes.
      output_dict['detection_boxes'],
      # Array of classes
      (output_dict['detection_classes'] + label_id_offset).astype(int),
      # Array of prediction scores of each detection.
      output_dict['detection_scores'],
      # Category index from label map.
      category_index,
      # Normalize the coodindates.
      use_normalized_coordinates=True,
      # Max boxes to draw on the image.
      max_boxes_to_draw=30,
      # Minimum level of confidence for each detection to be considered valid.
      min_score_thresh=.50,
      # Show classes with scores.
      agnostic_mode=False)

    return image, end-start

In [None]:
for i, image_path in enumerate(TEST_IMAGE_PATHS):

    # Read the image as a numpy array.
    image = np.array(Image.open(image_path))

    image_pred, _ = get_visualizations_and_inf_time(image, ckpt_inference_fn, category_index, use_saved_model=False)

    # Displaying the image along with bounding box predictions.
    display(Image.fromarray(image_pred))

    if i>10:
        break

In [1]:
# Directory of your Trained model
output_directory = 'saved_model'

In [None]:
# Run the script to export the model into saved_model format
# You will need to pass, your trained model ckpt directory, pipeline path, output dir.
!python ./models/research/object_detection/exporter_main_v2.py \
    --trained_checkpoint_dir {model_ckpt_path} \
    --output_directory {output_directory} \
    --pipeline_config_path {pipeline_config_path}

In [None]:
# Method to load model from the Saved Model
def load_saved_model(trained_model_path):
    '''
    Args:
    trained_model_path: Directory where saved_model is saved.
    '''

    # Load model from the training directory
    model_dir = os.path.join(trained_model_path, "saved_model")
    model = tf.saved_model.load(model_dir)
    model = model.signatures['serving_default']
    return model

In [None]:
# Call the load model method to load the trained model
saved_model_inference_fn = load_saved_model(output_directory)

In [None]:
# Loop over all test images and predict.
for i, image_path in enumerate(TEST_IMAGE_PATHS):

    # Read the image as a numpy array.
    image = np.array(Image.open(image_path))

    image_pred, _ = get_visualizations_and_inf_time(image, saved_model_inference_fn, category_index, use_saved_model=True)

    # Displaying the image along with bounding box predictions.
    display(Image.fromarray(image_pred))

    if i>10:
        break

In [None]:
def inference_on_video(video_path, model_det_fn, model_name, category_index):
    """
    Read video frames one-by-one, flip it, and write in the other video.
    """

    video = cv2.VideoCapture(video_path)

    # Check if camera opened successfully
    if not video.isOpened():
        print("Error opening video file")
        return

    # create video writer
    width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frames_per_second = video.get(cv2.CAP_PROP_FPS)
    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

    output_fname = '{}_{}_out.mp4'.format(os.path.splitext(video_path)[0], model_name)

    output_file = cv2.VideoWriter(
        filename=output_fname,
        fourcc=cv2.VideoWriter_fourcc(*'mp4v'),
        fps=float(frames_per_second),
        frameSize=(width, height),
        isColor=True,
    )


    i = 0
    while video.isOpened():
        ret, frame = video.read()
        if ret:
            output_frame, inf_time = get_visualizations_and_inf_time(frame[...,::-1], model_det_fn, category_index, use_saved_model=True)
            fps = 1./ inf_time
            disp_fps = "FPS: "+ str(round(fps,2))
            output_frame = np.ascontiguousarray(output_frame)
            output_frame = cv2.putText(output_frame, disp_fps, (10, 50), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (100, 255, 255), 3)
            output_frame = cv2.putText(output_frame, f'Model: {model_name}', (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (100, 255, 255), 3)

            output_file.write(output_frame[...,::-1])

            i += 1
        else:
            break

    video.release()
    output_file.release()

    return

In [None]:
video_file = "video_sample.mp4"
inference_on_video(video_file, saved_model_inference_fn, model_display_name, category_index)

In [None]:
from IPython.display import YouTubeVideo, display
video = YouTubeVideo("X2wnWkT5XsE", width=800, height=450)
display(video)