# Object Detection Demo
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start.

# Imports

In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
import time 

import sys 
sys.path.remove('/opt/ros/kinetic/lib/python2.7/dist-packages')
import cv2 
sys.path.append('/opt/ros/kinetic/lib/python2.7/dist-packages')

from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.12.0'):
    raise ImportError('Please upgrade your TensorFlow installation to v1.12.*.')


## Env setup

In [2]:
%matplotlib inline
%pylab inline 
## added by me

Populating the interactive namespace from numpy and matplotlib


## Object detection imports
Here are the imports from the object detection module.

In [3]:
from utils import label_map_util
from utils import visualization_utils as vis_util

# Model preparation 

## Variables

Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file.  

By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

In [4]:
# What model to download.

##change the model name and place the tar.gz file in the same directory 
##make sure that the graph.pb file is also in the same path 

#MODEL_NAME = 'ssd_mobilenet_v1_coco'#_2017_11_17'
MODEL_NAME = 'ssd_mobilenet_v1_coco_2018_01_28'
MODEL_FILE = 'Iterations/' + MODEL_NAME + '.tar.gz'
#DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = 'Iterations/' + MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')

## Download Model

In [5]:
#opener = urllib.request.URLopener()
#opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
tar_file = tarfile.open(MODEL_FILE)
print(MODEL_FILE)
for file in tar_file.getmembers():
    file_name = os.path.basename(file.name)
    if 'frozen_inference_graph.pb' in file_name:
        #tar_file.extract(file, os.getcwd())
        tar_file.extract(file, 'Iterations/')

Iterations/ssd_mobilenet_v1_coco_2018_01_28.tar.gz


## Load a (frozen) Tensorflow model into memory.

In [6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [None]:
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

# Detection

In [8]:
def run_inference_for_single_image(image, graph):
    if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    # Run inference
    output_dict = sess.run(tensor_dict,
                            feed_dict={image_tensor: np.expand_dims(image, 0)})

    # all outputs are float32 numpy arrays, so convert types as appropriate
    output_dict['num_detections'] = int(output_dict['num_detections'][0])
    output_dict['detection_classes'] = output_dict[
        'detection_classes'][0].astype(np.uint8)
    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
    output_dict['detection_scores'] = output_dict['detection_scores'][0]
    if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
    #coordinates of bounding box in non normalized format
    y_min = int(output_dict['detection_boxes'][0][0]*480)
    x_min = int(output_dict['detection_boxes'][0][1]*720)
    y_max = int(output_dict['detection_boxes'][0][2]*480)
    x_max = int(output_dict['detection_boxes'][0][3]*720)
    
    #print(x_min,y_min,x_max,y_max)
    
    #print('class is: ',output_dict['detection_classes'][0])
    #print('bb coordinates are: ',output_dict['detection_boxes'][0])
    #print('score is: ',output_dict['detection_scores'])
    
    return output_dict

In [9]:
%matplotlib inline
%pylab inline 
## added by me

Populating the interactive namespace from numpy and matplotlib


In [10]:
#CHANGE HERE 
cap = cv2.VideoCapture(filename)
counter = 1
timings = [] 

#CHANGE HERE
#recording the detected bounding boxes 
#framewise_time = open(filename + 'framewise_time.txt', 'w')
#sys.stdout = framewise_time

#framewise_coordinates = open(filename + 'framewise_coordinates.txt', 'w')
#sys.stdout = framewise_coordinates

#fps_framewise = open(filename + 'fps_framewise.txt', 'w')
#sys.stdout = fps_framewise

try:
    with detection_graph.as_default():
        with tf.Session() as sess:
                # Get handles to input and output tensors
                ops = tf.get_default_graph().get_operations()
                all_tensor_names = {output.name for op in ops for output in op.outputs}
                tensor_dict = {}
                for key in [
                  'num_detections', 'detection_boxes', 'detection_scores',
                  'detection_classes', 'detection_masks'
                ]:
                    tensor_name = key + ':0'
                    if tensor_name in all_tensor_names:
                        tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                      tensor_name)
                        
                absolute_start = time.time()
                while True:
                    
                    ret, image_np = cap.read()
                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    # Actual detection.
                    #start_time = time.time()
                    output_dict = run_inference_for_single_image(image_np, detection_graph)
                    # Visualization of the results of a detection.
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        output_dict['detection_boxes'],
                        output_dict['detection_classes'],
                        output_dict['detection_scores'],
                        category_index,
                        instance_masks=output_dict.get('detection_masks'),
                        use_normalized_coordinates=True,
                        line_thickness=1)
                    #cv2.imshow('object_detection', image_np)
                    #cv2.waitKey(1000)
                    if output_dict['detection_classes'][0] == 87 or output_dict['detection_classes'][0] == 47:
                        end_time = time.time()
                        #print('success: ',(end_time-start_time)*1000)
                        #timings.append((end_time-start_time)*1000) 
                        y_min = int(output_dict['detection_boxes'][0][0]*480)
                        x_min = int(output_dict['detection_boxes'][0][1]*720)
                        y_max = int(output_dict['detection_boxes'][0][2]*480)
                        x_max = int(output_dict['detection_boxes'][0][3]*720)
                        #print(x_min,y_min,x_max,y_max)
                        
                    #for time recordings replace the '0,0,0,0' with a '0'
                    #else: 
                        #end_time = time.time()
                        #timings.append((end_time-start_time)*1000)
                        #print('failure: ',(end_time-start_time)*1000)
                        #print('0 0 0 0')
                        
                    counter += 1
                    
                    #100 for time measurements and 50 for iou measurements 
                    if counter > 100: 
                        absolute_end = time.time()
                        cv2.destroyAllWindows()
                        break 
                    if cv2.waitKey(25) & 0xFF == ord('q'):
                        cap.release()
                        cv2.destroyAllWindows()
                        break
                    
                    
                        
except Exception as e:
    print(e)
    cap.release()

print(absolute_end-absolute_start)
print(100/(absolute_end-absolute_start))
# plt.plot(np.linspace(0,101,99),timings[1:],'-b')
# plt.xlabel('Frame Number')

# plt.ylabel('Time (in ms)')
# plt.grid()
# plt.show()