In [1]:
import cv2
from darkflow.net.build import TFNet
import numpy as np
import time
import sys
import matplotlib.pyplot as plt
import os

In [2]:
option = {
    'model': 'cfg/tiny-yolo-voc.cfg',
    'load': 'bin/tiny-yolo-voc.weights',
    'threshold': 0.4,
    'gpu': 1.0
}

# After choosing the options of the model we have to pass it to the CNN

tfnet = TFNet(option) # Initializing the model

Parsing ./cfg/tiny-yolo-voc.cfg
Parsing cfg/tiny-yolo-voc.cfg
Loading bin/tiny-yolo-voc.weights ...
Successfully identified 63471556 bytes
Finished in 0.011000394821166992s
Model has a VOC model name, loading VOC labels.

Building net ...
Source | Train? | Layer description                | Output size
-------+--------+----------------------------------+---------------
       |        | input                            | (?, 416, 416, 3)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 416, 416, 16)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 208, 208, 16)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 208, 208, 32)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 104, 104, 32)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 104, 104, 64)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 52, 52, 64)
 Load  |  Yep!  | conv 3x3p1_1  +bnorm  leaky      | (?, 52, 52, 128)
 Load  |  Yep!  | maxp 2x2p0_2                     | (?, 26, 26,

In [3]:
if os.path.isfile('output.mp4'):
    os.remove('output.mp4')

In [6]:
cap = cv2.VideoCapture(0)
#width=cap.set(cv2.CAP_PROP_FRAME_WIDTH, 416) 
#height=cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 416)


width =int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height =int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
 
#Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v') # Be sure to use the lower case
out = cv2.VideoWriter('output.mp4', fourcc, 20.0, (width, height))

# Initialize all the arrays
bbox_on_use=[]
ok=[]
tracker=[]
areas=[]

In [7]:
while True:
    stime = time.time()
    ret, frame = cap.read() # ret is true if the video is working or false if not
    results = tfnet.return_predict(frame) 
    
    #   If there are two people on the photo, reults give to us the following information:
    #   [{'bottomright': {'x': 593, 'y': 320},
    #  'confidence': 0.42716929,
    #  'label': 'person',
    #  'topleft': {'x': 417, 'y': 124}},
    #  {'bottomright': {'x': 283, 'y': 373},
    #  'confidence': 0.76005679,
    #  'label': 'person',
    #  'topleft': {'x': 177, 'y': 104}}]
    
    if ret:
        bbox=[]        
        for result in results:
            tl = (result['topleft']['x'], result['topleft']['y']) # Top left corner coordinates
            br = (result['bottomright']['x'], result['bottomright']['y']) # Bottom right corner coordinates
            box=(tl[0],tl[1],br[0],br[1])
            bbox.append(box)
                
        for i in range(len(bbox)):
            new_bbox=True
            
            for x in range(len(bbox_on_use)):
                th=100 # Threshold
                corner=(bbox[i][0],bbox[i][1])
                max_corner=(bbox_on_use[x][0]+th,bbox_on_use[x][1]+th)
                min_corner=(bbox_on_use[x][0]-th,bbox_on_use[x][1]-th)
                if corner<max_corner and corner>min_corner:
                    new_bbox=False
                    
            if new_bbox==True:
                tracker.append(cv2.TrackerKCF_create())
                confir=tracker[-1].init(frame, bbox[i])
                ok.append(confir)
                bbox_on_use.append(bbox[i])
                area=(bbox[i][2]-bbox[i][0])*(bbox[i][3]-bbox[i][1])
                areas.append(area)
                
        # Delete all the tracker that are dead    
        aux=0
        y=False in ok
        while y==True:
            if ok[aux]==False:
                del ok[aux]
                del bbox_on_use[aux]
                del tracker[aux]
                del areas[aux]
            else:
                aux=aux+1
                
            y=False in ok 
            
        # Update the tracker
        for a in range(len(ok)):
            ok[a], bbox_on_use[a]= tracker[a].update(frame)
            ptl = (int(bbox_on_use[a][0]), int(bbox_on_use[a][1]))
            pbr = (int(bbox_on_use[a][0] + bbox_on_use[a][2]), int(bbox_on_use[a][1] + bbox_on_use[a][3]))
            frame=cv2.rectangle(frame, ptl, pbr, (255,0,0), 2, 1)
            #cc=[int((pbr[0]-ptl[0])/2+ptl[0]),int((pbr[1]-ptl[1])/2+ptl[1])] # centre of the bounding box coodintes 
            #frame = cv2.circle(frame, (cc[0],cc[1]) , 2, (0,0,255), -1)
            
            # The distance was calculated following the steps showed in the next web:
            # https://zone.biblio.laurentian.ca/bitstream/10219/2458/1/Peyman%20Alizadeh%20MSc.%20Thesis%20Corrected_2_2.pdf
            area_b=(bbox_on_use[a][2]-bbox_on_use[a][0])*(bbox_on_use[a][3]-bbox_on_use[a][1])
            movement_distance=0.01
            if area_b>areas[a]:
                d=movement_distance/(1-areas[a]/area_b)
                text='%.2f' % d
            elif area_b<areas[a]:
                d=movement_distance/(1-area_b/areas[a])
                text='%.2f' % d
            else:
                # First frame
                text='loading'
                
            areas[a]=area_b
            frame = cv2.putText(frame, text, ptl, cv2.FONT_HERSHEY_COMPLEX, 1, (0, 0, 0), 1)
                 
        out.write(frame)
        cv2.imshow('frame', frame)
        print('FPS {:.1f}'.format(1 / (time.time() - stime)))

    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
out.release()
cv2.destroyAllWindows()

FPS 1.7
FPS 1.1
FPS 0.7
FPS 0.9
FPS 1.6
FPS 1.0
FPS 1.1
FPS 1.7
FPS 1.3
FPS 1.2
FPS 1.2
FPS 1.2
FPS 1.3
FPS 1.2
FPS 1.2
FPS 1.2
FPS 1.2
FPS 1.2
FPS 1.1
FPS 1.2
FPS 1.2
FPS 1.2
FPS 1.4
FPS 1.6
FPS 1.7
FPS 1.7
FPS 1.7
FPS 1.7
FPS 1.7
FPS 1.4
FPS 1.2
FPS 1.2
FPS 1.3
FPS 1.3
FPS 1.3
FPS 1.3
FPS 1.2
FPS 1.2
FPS 1.2
FPS 1.3
FPS 1.3
FPS 1.3
FPS 1.2
FPS 1.3
FPS 1.3
FPS 1.3
FPS 1.3
FPS 1.5
FPS 1.7
FPS 1.7
FPS 1.7
FPS 1.7
FPS 1.7
FPS 1.6
FPS 1.7
FPS 1.3
