# Webcam bounding box detection in notebook

Use OpenCV to read webcam feed, use tensorflow do perform bounding box detection and use ipywidgets to display videostream.

The TensorFlow detection model is currently the [`ssdlite_mobilenet_v2_coco`](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) pretrained model from the Tensorflow [object detection API](https://github.com/tensorflow/models/tree/master/research/object_detection).

This notebook is inspired by the [object detection demo notebook](https://github.com/tensorflow/models/blob/master/research/object_detection/object_detection_tutorial.ipynb).

In [None]:
import os
import sys
import time
import tarfile
import zipfile
import six.moves.urllib as urllib
from collections import defaultdict
from io import StringIO
import threading

import numpy as np
import tensorflow as tf
import cv2

from ipywidgets import widgets
from IPython.display import display

## Download the model from the tensorflow repo

In [None]:
# Threhold used for prediction
PREDICT_THRESHOLD = 0.3

# What model to download.
MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09'
MODEL_FILE = MODEL_NAME + '.tar.gz'
DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

In [None]:
def download_model():
    """
    Download pretrained model from the tensorflow repo
    """
    opener = urllib.request.URLopener()
    opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
    tar_file = tarfile.open(MODEL_FILE)
    for file in tar_file.getmembers():
        file_name = os.path.basename(file.name)
        if 'frozen_inference_graph.pb' in file_name:
            tar_file.extract(file, os.getcwd())


In [None]:
print('Downloading model')
download_model()
print('Model downloaded')

## Model loading functionality

In [None]:
def get_model_graph(model_path):
    """
    Load the downloaded Tensorflow model into memory.
    """
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(model_path, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
    return detection_graph


def get_tf_tensors(graph):
    """
    Get handles to input and output tensors.
    """
    ops = graph.get_operations()
    all_tensor_names = {output.name for op in ops for output in op.outputs}
    tensor_dict = {}
    for key in ['detection_boxes', 'detection_scores', 'detection_classes']:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
            tensor_dict[key] = graph.get_tensor_by_name(
                tensor_name)
    input_image_tensor = graph.get_tensor_by_name('image_tensor:0')
    return tensor_dict, input_image_tensor


def get_graph_tensors(model_path):
    """
    Load model into memory and get the inputs.
    """
    graph = get_model_graph(model_path)
    tensor_dict, input_image_tensor = get_tf_tensors(graph)
    return graph, tensor_dict, input_image_tensor

In [None]:
class WebcamBoundingBoxThread(threading.Thread):
    """
    Background thread to read image from webcam, detect objects, and update
     interactive display.
    """
    def __init__(self, interactive_img, interactive_framerate_text, model_path):
        super(WebcamBoundingBoxThread, self).__init__()
        self._stop_event = threading.Event()
        self.model_path = model_path
        self.interactive_img = interactive_img
        self.interactive_framerate_text = interactive_framerate_text
        
    def setup(self):
        """
        Setup tensorflow graph and webcam connection.
        """
        # Setup Tensorflow detector
        (self.detection_graph, self.tensor_dict, 
         self.input_image_tensor) = get_graph_tensors(self.model_path)
        # Setup camera capture
        self.camera = cv2.VideoCapture(0)

    def stop(self):
        """
        Stop thread.
        """
        self._stop_event.set()

    def stopped(self):
        """
        True iff tread is stopped.
        """
        return self._stop_event.is_set()
    
    def run_tf_inference(self, image):
        """
        Run tf inference to detect bounding boxes
        """
        output_dict = self.session.run(
            self.tensor_dict,
            feed_dict={self.input_image_tensor: np.expand_dims(image, 0)})
        # all outputs are float32 numpy arrays, so convert types as appropriate
        output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8)
        output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
        output_dict['detection_scores'] = output_dict['detection_scores'][0]
        return output_dict
    
    def process_frame(self, frame):
        """
        Process a single frame for bounding box detection.
        """
        image = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        output_dict = self.run_tf_inference(image)
        for i in range(100):
            # Assume predictions are ordered by probability
            if output_dict['detection_scores'][i] < PREDICT_THRESHOLD:
                break
            ymin, xmin, ymax, xmax = output_dict['detection_boxes'][i,:]
            ymin_pix = int(ymin*image.shape[0])
            xmin_pix = int(xmin*image.shape[1])
            ymax_pix = int(ymax*image.shape[0])
            xmax_pix = int(xmax*image.shape[1])
            cv2.rectangle(image, (xmin_pix,ymin_pix), (xmax_pix,ymax_pix), (0,255,0), 3)
        return cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
    
    def run(self):
        """
        Start thread.
        Creates loop that reads frame from webcam, detects bounding boxes,
         and updates interactive image until thread is stopped.
        """
        self.setup()
        is_capturing = self.camera.isOpened()
        start_time  = time.time()
        with self.detection_graph.as_default():
            self.session = tf.Session()
            with self.session as sess:
                while is_capturing and not self.stopped():
                    start_time  = time.time()
                    is_capturing, frame = self.camera.read()
                    processed_frame = self.process_frame(frame)
                    self.interactive_img.value = cv2.imencode('.png', processed_frame)[1].tostring()
                    self.interactive_framerate_text.value = '{:.2f}'.format(1/(time.time() - start_time))
        self.camera.release()

In [None]:
print('Start')
# Create interactive image
interactive_img = widgets.Image(
    value=b'',
    format='png',
    width=800,
    height=600,
)

# Create interactive text to display framerate
interactive_framerate_text = widgets.Text(
    value='0',
    placeholder='0',
    description='Fps:',
    disabled=False
)


# Create thread to update interactive image with webcam
print('Setup thread')
thread = WebcamBoundingBoxThread(
    interactive_img, interactive_framerate_text, PATH_TO_CKPT)
thread.daemon = True
print('Display image')
display(interactive_img)
display(interactive_framerate_text)
print('Starting thread')
thread.start()


# Stop thread upon exit
print('Running loop')
while True:
    try:
        time.sleep(1)
    except:
        thread.stop()
        thread.join()
        break

        
print('Finish')