# Simple object detection using YOLOv2 and OpenCV
#### Notebook opens webcam and captures each frame and passes to YOLO model to detect presence of any object trained using the COCO dataset.

In [1]:
import cv2
import numpy as np
import os
from darkflow.net.build import TFNet

In [2]:
import tensorflow as tf
import tensorflow.contrib.slim as slim

In [None]:
# Conert image data to TF Example proto
def image_to_tfexample(image_data, image_format, height, width, class_id):
  return tf.train.Example(features=tf.train.Features(feature={
      'image/encoded': bytes_feature(image_data),
      'image/format': bytes_feature(image_format),
      'image/class/label': int64_feature(class_id),
      'image/height': int64_feature(height),
      'image/width': int64_feature(width),
  }))

In [1]:
from matplotlib import pyplot as plt
from PIL import Image
from utils import label_map_util
from utils import visualization_utils as vis_util

This call to matplotlib.use() has no effect because the backend has already
been chosen; matplotlib.use() must be called *before* pylab, matplotlib.pyplot,
or matplotlib.backends is imported for the first time.

The backend was *originally* set to 'module://ipykernel.pylab.backend_inline' by the following code:
  File "c:\program files\python36\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "c:\program files\python36\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "c:\program files\python36\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "c:\program files\python36\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "c:\program files\python36\lib\site-packages\ipykernel\kernelapp.py", line 478, in start
    self.io_loop.start()
  File "c:\program files\python36\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    

In [4]:
cap = cv2.VideoCapture(0)

def grabVideoFeed():
    index, frame = cap.read()
    return frame if index else None

In [None]:
def initialSetup():
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

    # Unpersist graph from file
    with tf.gfile.FastGFile('tensorflow_inception_graph.pb', 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        tf.import_graph_def(graph_def, name='')

initialSetup()

# to print all nodes of graph
#print([n.name for n in tf.get_default_graph().as_graph_def().node])

### References:
https://github.com/tensorflow/models/tree/master/research/slim

https://towardsdatascience.com/building-a-real-time-object-recognition-app-with-tensorflow-and-opencv-b7a2b4ebdc32

In [6]:
# Load the pretrained YOLO model (coco dataset) to detect common objects.

#windows
options = {"model": "D:/scripts/darkflow-master/cfg/yolo.cfg", "load": "D:/scripts/darkflow-master/cfg/yolo.weights", "threshold": 0.5, "gpu": 1.0}


#ubuntu
#options = {"model": "/media/sf_scripts/darkflow-master/cfg/yolo.cfg", "load": "/media/sf_scripts/darkflow-master/cfg/yolo.weights", "threshold": 0.5}

tfnet = TFNet(options)

  cfg_path, FLAGS.model))


Parsing D:/scripts/darkflow-master/cfg/yolo.cfg
Loading D:/scripts/darkflow-master/cfg/yolo.weights ...
Successfully identified 203934260 bytes
Finished in 0.07137560844421387s
Model has a coco model name, loading coco labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 608, 608, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 608, 608, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 304, 304, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 304, 304, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | conv 1x1p0_1  +bnorm  leaky      | (?, 152, 152, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 152, 152, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | 

In [7]:
while(True):
    # Capture frame-by-frame
    frame = grabVideoFeed()
    if frame is None:
        raise SystemError('ERROR: Cannot receive video frame')

    cv2.imshow('Main', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break
    result = tfnet.return_predict(frame)
    print(result)

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

[{'label': 'person', 'confidence': 0.71099794, 'topleft': {'x': 33, 'y': 154}, 'bottomright': {'x': 541, 'y': 477}}]
[{'label': 'person', 'confidence': 0.78867757, 'topleft': {'x': 89, 'y': 116}, 'bottomright': {'x': 614, 'y': 474}}]
[{'label': 'person', 'confidence': 0.77109385, 'topleft': {'x': 71, 'y': 122}, 'bottomright': {'x': 500, 'y': 468}}]
[{'label': 'person', 'confidence': 0.81680351, 'topleft': {'x': 48, 'y': 120}, 'bottomright': {'x': 463, 'y': 471}}]
[{'label': 'person', 'confidence': 0.7676301, 'topleft': {'x': 68, 'y': 120}, 'bottomright': {'x': 499, 'y': 466}}]
[{'label': 'person', 'confidence': 0.80965489, 'topleft': {'x': 60, 'y': 126}, 'bottomright': {'x': 511, 'y': 462}}]
[{'label': 'person', 'confidence': 0.79530334, 'topleft': {'x': 65, 'y': 128}, 'bottomright': {'x': 508, 'y': 459}}]
[{'label': 'person', 'confidence': 0.85722095, 'topleft': {'x': 71, 'y': 113}, 'bottomright': {'x': 511, 'y': 474}}]
[{'label': 'person', 'confidence': 0.82554942, 'topleft': {'x': 5

In [6]:
# sample image snapshot
imgcv = cv2.imread("/media/sf_scripts/darkflow-master/sample_img/sample_computer.jpg")
result = tfnet.return_predict(imgcv)
print(result)

[{'label': 'tvmonitor', 'confidence': 0.124221556, 'topleft': {'x': 323, 'y': 147}, 'bottomright': {'x': 409, 'y': 313}}, {'label': 'cup', 'confidence': 0.458379, 'topleft': {'x': 318, 'y': 145}, 'bottomright': {'x': 423, 'y': 317}}, {'label': 'cup', 'confidence': 0.3655611, 'topleft': {'x': 374, 'y': 346}, 'bottomright': {'x': 421, 'y': 374}}, {'label': 'spoon', 'confidence': 0.12179081, 'topleft': {'x': 464, 'y': 259}, 'bottomright': {'x': 499, 'y': 357}}, {'label': 'bowl', 'confidence': 0.24720773, 'topleft': {'x': 419, 'y': 316}, 'bottomright': {'x': 484, 'y': 364}}, {'label': 'tvmonitor', 'confidence': 0.88161147, 'topleft': {'x': 157, 'y': 94}, 'bottomright': {'x': 345, 'y': 280}}, {'label': 'keyboard', 'confidence': 0.79866266, 'topleft': {'x': 123, 'y': 263}, 'bottomright': {'x': 333, 'y': 371}}, {'label': 'refrigerator', 'confidence': 0.487068, 'topleft': {'x': 0, 'y': 19}, 'bottomright': {'x': 130, 'y': 351}}]
