# Real-time person detection with Tensorflow

Let's do something similar with Tensorflow on CPU/GPU and compare the performance. Note that this is not the fastest implementation possible, but it is one that requires about the same effort as before...

In [19]:
import numpy as np
import cv2
from time import time
import tensorflow as tf
from tensorflow import InteractiveSession, ConfigProto
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
from PIL import Image
from PIL import ImageDraw
from PIL import ImageFont


Get an object detection model. In this demo we will get a generic object detection model and we will detect persons. This will be slower (due to more detections), but it will be provide a comparison between the two approaches.

In [20]:
!wget http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz -P data

--2019-05-06 11:30:33--  http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 172.217.22.80, 172.217.16.144, 2a00:1450:401b:805::2010
Connecting to download.tensorflow.org (download.tensorflow.org)|172.217.22.80|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 187925923 (179M) [application/x-tar]
Saving to: ‘data/ssd_mobilenet_v2_coco_2018_03_29.tar.gz’


2019-05-06 11:30:41 (23,1 MB/s) - ‘data/ssd_mobilenet_v2_coco_2018_03_29.tar.gz’ saved [187925923/187925923]



In [21]:
!tar zxvf data/ssd_mobilenet_v2_coco_2018_03_29.tar.gz

ssd_mobilenet_v2_coco_2018_03_29/checkpoint
ssd_mobilenet_v2_coco_2018_03_29/model.ckpt.meta
ssd_mobilenet_v2_coco_2018_03_29/pipeline.config
ssd_mobilenet_v2_coco_2018_03_29/saved_model/saved_model.pb
ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb
ssd_mobilenet_v2_coco_2018_03_29/saved_model/
ssd_mobilenet_v2_coco_2018_03_29/saved_model/variables/
ssd_mobilenet_v2_coco_2018_03_29/model.ckpt.index
ssd_mobilenet_v2_coco_2018_03_29/
ssd_mobilenet_v2_coco_2018_03_29/model.ckpt.data-00000-of-00001


Setup the graph and load the model!

In [22]:
detection_graph = tf.Graph()

with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile('ssd_mobilenet_v2_coco_2018_03_29/frozen_inference_graph.pb', 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
    sess = tf.Session()

    # Set the input/output tensors
    tensor_dict = {}
    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
    tensor_dict['detection_boxes'] = tf.get_default_graph().get_tensor_by_name('detection_boxes:0')
    tensor_dict['detection_classes'] = tf.get_default_graph().get_tensor_by_name('detection_classes:0')
    tensor_dict['detection_scores'] = tf.get_default_graph().get_tensor_by_name('detection_scores:0')

    font = ImageFont.truetype("/usr/share/fonts/truetype/freefont/FreeMonoBold.ttf", 20, encoding="unic")
    cap = cv2.VideoCapture(0)

Let's perform the face detection!

In [23]:
  
with detection_graph.as_default():
    while (True):
        # Run inference.
        start_time = time()

        # Get the frame
        _, frame = cap.read()

        # Convert to PIL image
        rgb_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        pil_im = Image.fromarray(rgb_frame)
        draw = ImageDraw.Draw(pil_im)

        # Run the detection
        image_np_original = np.asarray(pil_im).astype(np.uint8)
        new_img = pil_im.copy()
        new_img.thumbnail((320, 320), Image.ANTIALIAS)
        image_np = np.asarray(new_img).astype(np.uint8)
        image_np_expanded = np.expand_dims(image_np, axis=0)

        # Run inference
        start_time_inference = time()
        output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image_np_expanded})
        end_time_inference = time()

        boxes, scores = output_dict['detection_boxes'][0], output_dict['detection_scores'][0]
        classes = output_dict['detection_classes'][0].astype(np.int64)

        # Get the results
        for box, score, label in zip(boxes, scores, classes):
            if score > 0.1 and label == 1:  # Labels 1 corresponds to person
                [ymin, xmin, ymax, xmax] = box
                im_height, im_width, _ = image_np_original.shape
                draw.rectangle((xmin * im_width, ymax * im_height, xmax * im_width, ymin * im_height),
                               outline='red', width=4, )
        end_time = time()
        
        # Calculate some statistics
        fps = 1. / (end_time - start_time)
        fps_inf = 1. / (end_time_inference - start_time_inference)
        draw.text((0, 0), 'FPS:  %3.2f , Inference FPS:  %3.2f' % (fps, fps_inf), font=font)

        bgr_frame = cv2.cvtColor(np.array(pil_im), cv2.COLOR_RGB2BGR)

        # Display the frame
        cv2.imshow('frame', bgr_frame)

        # Quit on q
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    # When everything done, release the capture
    cap.release()
    cv2.destroyAllWindows()

    sess.close()