In [7]:
import tensorflow as tf 
import os
from tensorflow.python.platform import gfile
import numpy as np
import time
import cv2

import matplotlib.pyplot as plt
%matplotlib inline

In [8]:
def draw_boxes(frame, out, width, height):
    '''
    Draw bounding boxes onto the frame.
    ''' 
    num_detections = int(out[0][0])
    
    center_x = None 
    center_y = None 
    for i in range(num_detections): 
        score = float(out[1][0][i])
        bbox = [float(v) for v in out[2][0][i]]
        if score > 0.9:
            xmin = int(bbox[1] * width)
            ymin = int(bbox[0] * height)
            xmax = int(bbox[3] * width)
            ymax = int(bbox[2] * height)
            
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 1)
            center_x = (xmin+xmax)/2
            center_y = (ymin+ymax)/2
    
    return frame, center_x, center_y

In [9]:
input_ = 'Pedestrian_Detect_2_1_1.mp4'
capture = cv2.VideoCapture(input_)
capture.open(input_)
width = int(capture.get(3))
height = int(capture.get(4)) 

In [10]:
out = cv2.VideoWriter('out.mp4', 0x00000021, 24, (width,height))

In [11]:
# int(capture.get(cv2.CAP_PROP_FRAME_COUNT))

In [12]:
start = time.time() 

path_to_pb = "frozen_inference_graph.pb" 
# read pb into graph_def
with tf.gfile.FastGFile(path_to_pb, "rb") as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
    
with tf.Session(config=tf.ConfigProto()) as sess:    
    sess.graph.as_default()
    tf.import_graph_def(graph_def, name='')
    
    current_count = 0
    last_count = 0 
    total_count = 0
    center_x_old = 0            # x component of the box center
    center_y_old = 0            # y component of the box center
    ii = 0
    while capture.isOpened():
        ii += 1
        flag, frame = capture.read()
        if not flag:
            break

        p_frame = cv2.resize(frame, (300,300))
        p_frame = p_frame[:, :, [2, 1, 0]]  # BGR2RGB
        img = p_frame.reshape(1, p_frame.shape[0], p_frame.shape[1], 3)

        # Create session and load graph
        outputs = sess.run([sess.graph.get_tensor_by_name('num_detections:0'),
                              sess.graph.get_tensor_by_name('detection_scores:0'),
                              sess.graph.get_tensor_by_name('detection_boxes:0'),
                              sess.graph.get_tensor_by_name('detection_classes:0')],
                              feed_dict={'image_tensor:0': img})

        out_frame, center_x,center_y = draw_boxes(frame, outputs, width, height)
        if center_x:
#           print(np.sqrt((center_x-center_x_old)**2+(center_y-center_y_old)**2))
            if np.sqrt((center_x-center_x_old)**2+(center_y-center_y_old)**2)>110:
                current_count = 1
                center_x_old = center_x
                center_y_old = center_y
        else:
            current_count = 0
        
        if current_count > last_count:
            start_time = time.time()
            total_count = total_count + current_count - last_count
            print ('frame in', ii)

        if current_count < last_count:
            duration = int(time.time() - start_time)
            print ('frame out', ii)

        last_count = current_count    

        out.write(out_frame)

    out.release()
    capture.release()
    cv2.destroyAllWindows()
    
print('Inference time for all frames was',time.time()-start)

frame in 70
frame out 72
frame in 188
frame out 191
frame in 236
frame out 241
frame in 438
frame out 441
frame in 442
frame out 443
frame in 512
frame out 516
frame in 689
frame out 691
frame in 759
frame out 850
frame in 854
frame out 859
frame in 860
frame out 863
frame in 1182
frame out 1183
frame in 1243
frame out 1251
frame in 1344
frame out 1349
frame in 1352
frame out 1353
Inference time for all frames was 187.15742993354797
