# Object Detection Demo
Welcome to the object detection inference walkthrough!  This notebook will walk you step by step through the process of using a pre-trained model to detect objects in an image. Make sure to follow the [installation instructions](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md) before you start.

# Imports

In [1]:
import numpy as np
import os
import six.moves.urllib as urllib
import sys
import tarfile
import tensorflow as tf
import zipfile
%matplotlib inline 
import cv2
from distutils.version import StrictVersion
from collections import defaultdict
from io import StringIO
from matplotlib import pyplot as plt
from PIL import Image

# This is needed since the notebook is stored in the object_detection folder.
sys.path.append("..")
from object_detection.utils import ops as utils_ops

if StrictVersion(tf.__version__) < StrictVersion('1.9.0'):
  raise ImportError('Please upgrade your TensorFlow installation to v1.9.* or later!')


                the kernel may be left running.  Please let us know
                about your system (bitness, Python, etc.) at
                ipython-dev@scipy.org
  ipython-dev@scipy.org""")


## Env setup

In [2]:
# This is needed to display the images.
%matplotlib inline

## Object detection imports
Here are the imports from the object detection module.

In [3]:
from utils import label_map_util

from utils import visualization_utils as vis_util

# Model preparation 

## Variables

Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_FROZEN_GRAPH` to point to a new .pb file.  

By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

In [4]:
# What model to download.
MODEL_NAME = 'potholes_graph'

# Path to frozen detection graph. This is the actual model that is used for the object detection.
PATH_TO_FROZEN_GRAPH = MODEL_NAME + '/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join('training', 'object-detection.pbtxt')

## Download Model

In [5]:
# opener = urllib.request.URLopener()
# opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
# tar_file = tarfile.open(MODEL_FILE)
# for file in tar_file.getmembers():
#   file_name = os.path.basename(file.name)
#   if 'frozen_inference_graph.pb' in file_name:
#     tar_file.extract(file, os.getcwd())

## Load a (frozen) Tensorflow model into memory.

In [6]:
detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')

## Loading label map
Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

In [7]:
category_index = label_map_util.create_category_index_from_labelmap(PATH_TO_LABELS, use_display_name=True)

## Helper code

In [8]:
def load_image_into_numpy_array(image):
  (im_width, im_height) = image.size
  return np.array(image.getdata()).reshape(
      (im_height, im_width, 3)).astype(np.uint8)

# Detection

In [19]:
# For the sake of simplicity we will use only 2 images:
# image1.jpg
# image2.jpg
# If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
PATH_TO_TEST_IMAGES_DIR = 'test_images'
TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'frame{}.jpg'.format(i)) for i in range(1, 4) ]

# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

In [20]:
# import matplotlib
# matplotlib.get_backend()

In [21]:
# with detection_graph.as_default():
#     with tf.Session(graph=detection_graph) as sess:
#         for image_path in TEST_IMAGE_PATHS:
#             image = Image.open(image_path)
#             # the array based representation of the image will be used later in order to prepare the
#             # result image with boxes and labels on it.
#             image_np = load_image_into_numpy_array(image)
#             # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
#             image_np_expanded = np.expand_dims(image_np, axis=0)
#             image_tensor=detection_graph.get_tensor_by_name('image_tensor:0')
#             boxes=detection_graph.get_tensor_by_name('detection_boxes:0')
#             scores = detection_graph.get_tensor_by_name('detection_scores:0')
#             classes = detection_graph.get_tensor_by_name('detection_classes:0')
#             num_detections = detection_graph.get_tensor_by_name('num_detections:0')
#             (boxes, scores, classes, num_detections) = sess.run(
#               [boxes, scores, classes, num_detections],
#               feed_dict={image_tensor: image_np_expanded})
#             vis_util.visualize_boxes_and_labels_on_image_array(
#               image_np,
#               np.squeeze(boxes),
#               np.squeeze(classes).astype(np.int32),
#               np.squeeze(scores),
#               category_index,
#               use_normalized_coordinates=True,
#               line_thickness=8, min_score_thresh=.2)
#             plt.figure(figsize=IMAGE_SIZE)
#             plt.imshow(image_np)
#             plt.show()

In [22]:
def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[1], image.shape[2])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: image})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict

In [23]:
for image_path in TEST_IMAGE_PATHS:
  image = Image.open(image_path)
  
    # the array based representation of the image will be used later in order to prepare the
  # result image with boxes and labels on it.
  image_np = load_image_into_numpy_array(image)
  print(image)
  print("frame type",type(image))
  print(image_np)
  print("image np",type(image_np))
        
  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  image_np_expanded = np.expand_dims(image_np, axis=0)
  # Actual detection.
  output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)
  # Visualization of the results of a detection.
  vis_util.visualize_boxes_and_labels_on_image_array(
      image_np,
      output_dict['detection_boxes'],
      output_dict['detection_classes'],
      output_dict['detection_scores'],
      category_index,
      instance_masks=output_dict.get('detection_masks'),
      use_normalized_coordinates=True,
      line_thickness=8)
  plt.figure(figsize=IMAGE_SIZE)
  plt.imshow(image_np)
  plt.show()

<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1280x720 at 0x1EE5A18E2B0>
frame type <class 'PIL.JpegImagePlugin.JpegImageFile'>
[[[ 62  65  72]
  [ 95  98 105]
  [129 132 139]
  ...
  [122 123 128]
  [ 94  95 100]
  [ 63  64  69]]

 [[107 110 117]
  [147 150 157]
  [189 192 199]
  ...
  [184 185 190]
  [143 144 149]
  [102 103 108]]

 [[118 121 128]
  [163 166 173]
  [212 215 222]
  ...
  [208 209 214]
  [158 159 164]
  [112 113 118]]

 ...

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [ 41  43  42]
  [ 33  35  34]
  [ 25  27  26]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [ 36  38  37]
  [ 29  31  30]
  [ 22  24  23]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [ 28  30  29]
  [ 23  25  24]
  [ 16  18  17]]]
image np <class 'numpy.ndarray'>


  % get_backend())


<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1280x720 at 0x1EE61BB2F98>
frame type <class 'PIL.JpegImagePlugin.JpegImageFile'>
[[[ 62  65  72]
  [ 95  98 105]
  [129 132 139]
  ...
  [122 123 128]
  [ 94  95 100]
  [ 63  64  69]]

 [[107 110 117]
  [147 150 157]
  [189 192 199]
  ...
  [184 185 190]
  [143 144 149]
  [102 103 108]]

 [[118 121 128]
  [163 166 173]
  [212 215 222]
  ...
  [208 209 214]
  [158 159 164]
  [112 113 118]]

 ...

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [ 42  44  43]
  [ 36  38  37]
  [ 26  28  27]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [ 38  40  39]
  [ 32  34  33]
  [ 23  25  24]]

 [[  0   0   0]
  [  0   0   0]
  [  0   0   0]
  ...
  [ 29  31  30]
  [ 25  27  26]
  [ 16  18  17]]]
image np <class 'numpy.ndarray'>
<PIL.JpegImagePlugin.JpegImageFile image mode=RGB size=1280x720 at 0x1EE61BF60B8>
frame type <class 'PIL.JpegImagePlugin.JpegImageFile'>
[[[ 62  65  72]
  [ 95  98 105]
  [129 132 139]
  ...
  [121 12

In [40]:
capture=cv2.VideoCapture(r'C:\Users\mayan\Desktop\potholes.mp4')
colors=[tuple(255*np.random.rand(3)) for i in range(5)]
while (capture.isOpened()):
    ret, frame = capture.read()
    if ret:
#         image_np = load_image_into_numpy_array(frame)
#         print(frame)
#         print("frame type",type(frame))
#         print(image_np)
#         print("image np",type(image_np))
        image_np_expanded = np.expand_dims(frame, axis=0)
  # Actual detection.
        output_dict = run_inference_for_single_image(image_np_expanded, detection_graph)
  # Visualization of the results of a detection.
        vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        output_dict['detection_boxes'],
        output_dict['detection_classes'],
        output_dict['detection_scores'],
        category_index,
        instance_masks=output_dict.get('detection_masks'),
        use_normalized_coordinates=True,
        line_thickness=8)
        cv2.imshow('frame', frame)
#         print('FPS {:.1f}'.format(1 / (time.time() - stime)))
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        capture.release()
        cv2.destroyAllWindows()
        break

InternalError: cuDNN launch failure : input shape([1,3,301,301]) filter shape([3,3,3,32])
	 [[node FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_0/Conv2D (defined at <ipython-input-19-d55b98fd5a78>:7)  = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 2, 2], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_0/Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer, FeatureExtractor/MobilenetV1/Conv2d_0/weights)]]

Caused by op 'FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_0/Conv2D', defined at:
  File "C:\ProgramData\Anaconda3\envs\gpu_tf\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\ProgramData\Anaconda3\envs\gpu_tf\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\ipykernel\kernelapp.py", line 505, in start
    self.io_loop.start()
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\platform\asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "C:\ProgramData\Anaconda3\envs\gpu_tf\lib\asyncio\base_events.py", line 438, in run_forever
    self._run_once()
  File "C:\ProgramData\Anaconda3\envs\gpu_tf\lib\asyncio\base_events.py", line 1451, in _run_once
    handle._run()
  File "C:\ProgramData\Anaconda3\envs\gpu_tf\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\gen.py", line 1233, in inner
    self.run()
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\ipykernel\kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\gen.py", line 346, in wrapper
    runner = Runner(result, future, yielded)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\gen.py", line 1080, in __init__
    self.run()
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\gen.py", line 1147, in run
    yielded = self.gen.send(value)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\ipykernel\kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\ipykernel\kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\ipykernel\kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tornado\gen.py", line 326, in wrapper
    yielded = next(result)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\ipykernel\ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\ipykernel\zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\IPython\core\interactiveshell.py", line 2819, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\IPython\core\interactiveshell.py", line 2845, in _run_cell
    return runner(coro)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\IPython\core\async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\IPython\core\interactiveshell.py", line 3020, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\IPython\core\interactiveshell.py", line 3185, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\IPython\core\interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-19-d55b98fd5a78>", line 7, in <module>
    tf.import_graph_def(od_graph_def, name='')
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tensorflow\python\framework\importer.py", line 442, in import_graph_def
    _ProcessNewOps(graph)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tensorflow\python\framework\importer.py", line 234, in _ProcessNewOps
    for new_op in graph._add_new_tf_operations(compute_devices=False):  # pylint: disable=protected-access
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tensorflow\python\framework\ops.py", line 3440, in _add_new_tf_operations
    for c_op in c_api_util.new_tf_operations(self)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tensorflow\python\framework\ops.py", line 3440, in <listcomp>
    for c_op in c_api_util.new_tf_operations(self)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tensorflow\python\framework\ops.py", line 3299, in _create_op_from_tf_operation
    ret = Operation(c_op, self)
  File "c:\programdata\anaconda3\envs\gpu_tf\lib\site-packages\tensorflow\python\framework\ops.py", line 1770, in __init__
    self._traceback = tf_stack.extract_stack()

InternalError (see above for traceback): cuDNN launch failure : input shape([1,3,301,301]) filter shape([3,3,3,32])
	 [[node FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_0/Conv2D (defined at <ipython-input-19-d55b98fd5a78>:7)  = Conv2D[T=DT_FLOAT, data_format="NCHW", dilations=[1, 1, 1, 1], padding="SAME", strides=[1, 1, 2, 2], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/device:GPU:0"](FeatureExtractor/MobilenetV1/MobilenetV1/Conv2d_0/Conv2D-0-TransposeNHWCToNCHW-LayoutOptimizer, FeatureExtractor/MobilenetV1/Conv2d_0/weights)]]


In [24]:
capture=cv2.VideoCapture(r'C:\Users\mayan\Desktop\potholes.mp4')
colors=[tuple(255*np.random.rand(3)) for i in range(5)]
while (capture.isOpened()):
    ret, frame = capture.read()
    if ret:
        cv2.imshow('frame', frame)
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    else:
        capture.release()
        cv2.destroyAllWindows()
        break

In [25]:
image_np = load_image_into_numpy_array(frame)
        

TypeError: 'int' object is not iterable

In [35]:
type(frame)

numpy.ndarray