<a href="https://colab.research.google.com/github/nyp-sit/sdaai-pdc2-students/blob/master/iti107/session-5/od_using_tfod_api/object_detection_using_tfod_api.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab" align="left"/></a>

## 1. Imports

In [None]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
from shutil import copy2

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image
from tqdm import tqdm

## 2. Environment setup

In [None]:
# root Tensorflow model directory. Modify this accordingly
TF_MODELS_RESEARCH_DIR = 'C:\\Users\\user\\Documents\\tensorflow\\models\\research'
TF_SLIM_DIR = os.path.join(TF_MODELS_RESEARCH_DIR, 'slim')
TF_OD_DIR = os.path.join(TF_MODELS_RESEARCH_DIR, 'object_detection')

sys.path.append(TF_MODELS_RESEARCH_DIR)
sys.path.append(TF_SLIM_DIR)
sys.path.append(TF_OD_DIR)

### TFOD API imports
Here are the imports of the required object detection modules in TFOD API

In [None]:
from utils import ops as utils_ops
from utils import label_map_util
from utils import visualization_utils as vis_util

## 3. Model preparation 
choose the model to detect, this is for downloading models like SSD, YOLO, Fast RNN etc Skip this step if you have your own model Frozen inference graph and you wish to try that.
Skip and Go to next step and give the path where your model (frozen inference graph.pb) is stored


In [None]:
# What model to download.
MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'


# List of the strings that is used to add correct label for each box.
LABEL_FILE = 'mscoco_label_map.pbtxt'
PATH_TO_LABELS = os.path.join(TF_OD_DIR, 'data',LABEL_FILE)

copy2(PATH_TO_LABELS, LABEL_FILE)


Give the path to 2 things here
1. label map 
2. frozen inference graph

In [None]:
PATH_TO_FROZEN_GRAPH = 'C:\\Users\\user\\Documents\\tensorflow\\models\\research\\object_detection\\ssd_mobilenet_v1_coco_2018_03_29\\frozen_inference_graph_final.pb'
PATH_TO_LABELS = 'C:\\Users\\user\\Documents\\tensorflow\\models\\research\\object_detection\\ssd_mobilenet_v1_coco_2018_03_29\\my_label_map.pbtxt'

### Download Model

This is to download frozen inference graph for model zoo, skip this as well if you have a custom one.

In [None]:
opener = urllib.request.URLopener()
opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
for file in tar_file.getmembers():
    file_name = os.path.basename(file.name)
    if 'frozen_inference_graph.pb' in file_name:
        tar_file.extract(file, os.getcwd())

### Load the (frozen) Tensorflow model into memory.



In [None]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.compat.v1.GraphDef()
    with tf.compat.v1.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

### Loading label map


In [None]:
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

## 4. Object Detection on Image

In [None]:
# This is needed to display the images.
%matplotlib inline

### Helper code

The image is read using Pillow as an Image object. Image.size gives the dimension of image as widht, height ordering. `Image.getdata()` gives a flattened array of bytes, so we need to reshape it to `(height, width, channels)`

In [None]:
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint8)

In [None]:
def run_inference_for_single_image(image_path, graph):
    image = Image.open(image_path)
    
    with graph.as_default():
        with tf.compat.v1.Session() as sess:
        # Get handles to input and output tensors
            image_tensor = graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = graph.get_tensor_by_name('detection_boxes:0')
            detection_scores = graph.get_tensor_by_name('detection_scores:0')
            detection_classes = graph.get_tensor_by_name('detection_classes:0')
            num_detections = graph.get_tensor_by_name('num_detections:0')

            image_np = load_image_into_numpy_array(image)
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            [detection_boxes, detection_scores, detection_classes, num_detections],
            # Run inference
            
            (boxes, scores, classes, num) = sess.run(
                            [detection_boxes, detection_scores, detection_classes, num_detections],
                            feed_dict={image_tensor: image_np_expanded})
            vis_util.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        np.squeeze(boxes),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        min_score_thresh=0.4,
                        use_normalized_coordinates=True,
                        line_thickness=10)
            

            # Size, in inches, of the output images.
            IMAGE_SIZE = (12, 8)
            plt.figure(figsize=IMAGE_SIZE)
            plt.imshow(image_np)
    

In [None]:
image = 'C:\\Users\\user\\Documents\\tensorflow\\models\\research\\object_detection\\150.jpg'
run_inference_for_single_image(image, detection_graph)

## 5. Object Detection on Video (Optional) 

The following codes will perform detection real-time on video. It reads the video frame one by one and and perform detection and draw the bounding boxes on each frame (image) and then display the image frame directly using cv2.imshow()

Only run this when you are using a local computer, as the cv2 video player window is shown as a separate window on local computer, not within the notebook. 

In [None]:
import cv2

def run_inference_for_video(video_filepath, graph):
    video_player = cv2.VideoCapture(video_filepath)

    with graph.as_default():
        with tf.Session() as sess:
            image_tensor = graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = graph.get_tensor_by_name('detection_boxes:0')
            detection_scores = graph.get_tensor_by_name('detection_scores:0')
            detection_classes = graph.get_tensor_by_name('detection_classes:0')
            num_detections = graph.get_tensor_by_name('num_detections:0')

            while video_player.isOpened():
                ret, image_np = video_player.read()
                if ret:
                    image_np_expanded = np.expand_dims(image_np, axis=0)

                    (boxes, scores, classes, num) = sess.run(
                      [detection_boxes, detection_scores, detection_classes, num_detections],
                      feed_dict={image_tensor: image_np_expanded})

                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        np.squeeze(boxes),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        use_normalized_coordinates=True,
                        line_thickness=10)

                    cv2.imshow('Object Detection', image_np)
                    if cv2.waitKey(1) == 13: #13 is the Enter Key
                        break
                else:
                    break
                    
    # Release camera and close windows
    video_player.release()
    cv2.destroyAllWindows() 
    cv2.waitKey(1)

The following code is slightly modified to read the video file frame by frame and perform detection on the frame and write the detected frame to a video file usig VideoWriter class provided by openCV. 

In [None]:
def write_video(video_in_filepath, video_out_filepath, graph):
    if not os.path.exists(video_in_filepath):
        print('video filepath not valid')
    
    video_reader = cv2.VideoCapture(video_in_filepath)
    
    nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))
    frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT))
    frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH))

    video_writer = cv2.VideoWriter(video_out_filepath,
                               cv2.VideoWriter_fourcc(*'XVID'), 
                               30.0, 
                               (frame_w, frame_h))

    with graph.as_default():
        with tf.compat.v1.Session() as sess:
            image_tensor = graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = graph.get_tensor_by_name('detection_boxes:0')
            detection_scores = graph.get_tensor_by_name('detection_scores:0')
            detection_classes = graph.get_tensor_by_name('detection_classes:0')
            num_detections = graph.get_tensor_by_name('num_detections:0')

            for i in tqdm(range(nb_frames)):
                ret, image_np = video_reader.read()
                image_np_expanded = np.expand_dims(image_np, axis=0)

                (boxes, scores, classes, num) = sess.run(
                  [detection_boxes, detection_scores, detection_classes, num_detections],
                  feed_dict={image_tensor: image_np_expanded})

                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=10)

                video_writer.write(np.uint8(image_np))
                
    # Release camera and close windows
    video_reader.release()
    video_writer.release() 
    cv2.destroyAllWindows() 
    cv2.waitKey(1)

Run this code to create a video file.

In [None]:
video_in_file = 'C:\\Users\\user\\Documents\\tensorflow\\models\\research\\object_detection\\pro.mp4'
video_out_file = 'C:\\Users\\user\\Documents\\tensorflow\\models\\research\\object_detection\\pro_detect.mp4'

write_video(video_in_file, video_out_file, detection_graph)

Run this code to detect and display realtime.

In [None]:
#video_in_file = 'data/tube.mp4'
#run_inference_for_video(video_in_file, detection_graph)