In [1]:
#The MIT License (MIT)

#Copyright (c) 2020 Juliana T.C. Marcos

#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
#THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
#CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#This code use methods of the ParticleFilter (PF) class in order to track an object in a video. The PF uses as
#measurements provider the You Only Look Once (YOLO) object detector. 

#The particles are represented in red, the PF estimate of
#the tracked animal is represented in green and the animal's bb is represented in blue. 

#Thanks to Nayak for the nice tutorial about using YOLOv3 with OpenCv which is available at this address:
#https://www.learnopencv.com/deep-learning-based-object-detection-using-yolov3-with-opencv-python-c/


In [2]:
#Import of useful librairies
import cv2
import math
import numpy as np
from skimage import measure
from ParticleFilter import ParticleFilter
import time

In [3]:
"""Some variables initialization """

#The total number of trials
Tot=1
#Lists for the Tot running outputs averages 
BB_avg=[]
ROI_avg=[]
anchor_avg=[]
xy_est_avg=[]
particles_avg=[]
#number of particles
n_particles=2000
#noise in sensors' measurements
meas_noise=0
#Read weights and config files to create YOLO(v3) net
net = cv2.dnn.readNet("./Inputs/yolov3.weights", "./Inputs/yolov3.cfg")
#Fetch the three output layers names of YOLO net, they are the ones not connected to 
#any following layers since they are the last layers
layer_names = net.getLayerNames()
output_layer_names = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
#Read the classes in coco.names file for YOLO net
coco_classes = []
with open ('./Inputs/coco.names','r') as file:
    coco_classes=[line.strip("\n") for line in file.readlines()]

font = cv2.FONT_HERSHEY_SIMPLEX
text_coord=(10,40)
t_size=0.8
t_thick=2
#This value was chosen according to a paper experiment
N_thresh=(2*n_particles)/30
#Scale for YOLO's inputs preprocessing
scale=1/255
#Video frame width and height
frame_width=1920
frame_height=1080
#Lists to contain the counters for each trial
yolo_det_l=[]
n_resampl_l=[]
#Initialize variable for shifting the anchor update between first measurements and first estimations
anchor_shift=60

In [4]:
#Data for video cows
anchorS=(1250,350)
videoIn_name="./Inputs/cows.avi"
path="./Outputs/"
videoOut_name=path+"cows-pf-yolo.avi"
#std in the prediction of particles for the object's position
std=10
#These are YOLO parameters
conf=0.8
nms=0.7
height, width, channels =224,320,3
blob_x,blob_y=224,320
#Motion model's speed in x and y directions
v_x=0.01
v_y=0.01

In [5]:
start_time = time.time()
for num in range(Tot):

    #List for averaging running outputs
    BB_l=[]
    ROI_l=[]
    anchor_l=[]
    xy_est_l=[]
    particles_l=[]
    #Counters
    yolo_det=0
    it=0
    n_resampling=0
    #A single program variables initialization
    anchor=anchorS
    #Initialization of measurements variables
    x_objMeasure,y_objMeasure=anchor[0],anchor[1]
    #BB variable initialization
    BB=0,0,0,0
    #Capture video where object tracking should be performed
    video = cv2.VideoCapture(videoIn_name)
    #Video output
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    video_output = cv2.VideoWriter(videoOut_name,fourcc,60,(frame_width,frame_height))
    #Particles Instantiation
    particles=ParticleFilter(frame_width,frame_height, n_particles)
    
    #Loop through the entire video
    while (True):
        #Take the video and break it frame by frame
        _,frame=video.read()
        #Check if frames are captured
        if(_ == False ): break

        it+=1

        #Particles prediction update
        particles.particles_update(v_x,v_y,std,frame_width,frame_height)
        particles_l.append(particles.particles.copy())
        anchor_l.append(anchor)
    
        #Coordinates of ROI around anchor
        dy1=int(anchor[1]-height/2)
        #if(dy1<0):dy1=0
        dy2=int(anchor[1]+height/2)
        #if (dy2>frame_height):dy2=frame_height
        dx1=int(anchor[0]-width/2)
        #if (dx1<0): dx1=0
        dx2=int(anchor[0]+width/2)
        #if (dx2>frame_width):dx2=frame_width

        frame_roi=frame[dy1:dy2,dx1:dx2]
        ROI_l.append((dx1,dy1,dx2,dy2))
            

        # Detecting objects
        scale=1/255
        blob = cv2.dnn.blobFromImage(frame_roi,scale,(blob_x, blob_y),(0, 0, 0), True, crop=False)
        net.setInput(blob)
        #Run forward pass to get predictions from output layers selected
        outputs = net.forward(output_layer_names)
        # Showing informations on the screen
        classes = []
        confidences = []
        boxes = []
        yolo_center=[]
        yolo_center_anchor_dist=[]

        #for each output of the YOLO last layers
        for output in outputs:
            for detection in output:
            #Each detection contains center point x,y,width,height,object probability
            #and 80 class probability
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > conf:
                    print("step ",str(it),"confidence ",confidence,"\n")
                    #non normalized detected object's center, width and height 
                    center_x,center_y = int(detection[0] *blob_x),int(detection[1] * blob_y)
                    w,h = int(detection[2] * blob_x),int(detection[3] * blob_y)
                    # Rectangle coordinates for drawing
                    x,y = int(center_x - w / 2),int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    classes.append(class_id)

                    #Get coordinates in initial video frame reference instead of 
                    #ROI reference
                    center_x_reverse=center_x+(anchor[0]-width/2)
                    center_y_reverse=center_y+(anchor[1]-height/2)
                    yolo_center.append((center_x_reverse,center_y_reverse))

        #Search and keep relevant bounding boxes given their scores 
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, conf, nms)
        #Only compute the distances between anchor and the yolo predictions
        #if confidence is good and indexes is not empty
        if (len(indexes)!=0):
            yolo_center_anchor_dist=[math.sqrt((yolo_center[i][0]-anchor[0])**2+\
            (yolo_center[i][1]-anchor[1])**2) for i in range (len(yolo_center))\
                                    if i in indexes]
            #Keep the relevant indices in a list to retrieve the relevant index latter
            nms_indexes=[i for i in range (len(yolo_center)) if i in indexes]

            print("step ",str(it),"indexes ",indexes," nms ",nms_indexes,\
            " yolo_center ",yolo_center," distances ", yolo_center_anchor_dist, "\n")

            #Save and return the coordinates of the most suitable center and its BB
            #Keep the index of the minimum distance between prediction and anchor
            index=yolo_center_anchor_dist.index(min(yolo_center_anchor_dist))
            #Retrieve relevant index to select the yolo index and the corresponding BB
            index_yolo=nms_indexes[index]
            print("step ",str(it)," index ",index," index_yolo ",index_yolo,"\n")
            BB_x=int(boxes[index_yolo][0]+(anchor[0]-width/2))
            BB_y=int(boxes[index_yolo][1]+(anchor[1]-height/2))
            BB =BB_x,BB_y,boxes[index_yolo][2],boxes[index_yolo][3]

            #Update measurements and anchor  
            x_objMeasure,y_objMeasure=yolo_center[index_yolo][0],yolo_center[index_yolo][1]
            yolo_det+=1

        #Save BB in a list
        BB_l.append(BB)
        
        #Draw the ROI in red
        cv2.rectangle(frame, (dx1,dy1), (dx2,dy2), (0,0,250), 2)

        #Update the particles weights with the new measurements (Object center)
        particles.weigth_update(x_objMeasure,y_objMeasure)

        #Estimation of object center position
        x_estimation,y_estimation=particles.position_estimation()
        
        #Update the anchor with either the current measurements or the x and y estimates
        if it > anchor_shift:
            anchor=x_estimation,y_estimation
        else:
            anchor=x_objMeasure,y_objMeasure
        
        #Save the x and y estimated in a list
        xy_est_l.append((x_estimation,y_estimation))

        #Draw the particles
        particles.draw_box_particles(frame,BB,x_estimation,y_estimation)
        #Draw the position estimation
        cv2.circle(frame,(x_estimation,y_estimation),5,[0,255,0],3)

        #Resample the particles
        if (particles.effective_particles() < N_thresh):
            n_resampling+=1
            particles.resampling()

        if (num==Tot-1):
            video_output.write(frame)

        #if it==50:
        #    break
    
    yolo_det_l.append(yolo_det) 
    n_resampl_l.append(n_resampling)
    BB_avg.append(BB_l)
    xy_est_avg.append(xy_est_l)
    particles_avg.append(particles_l)
    ROI_avg.append(ROI_l)
    anchor_avg.append(anchor_l)
    

step  1 confidence  0.95814544 

step  1 indexes  [[0]]  nms  [0]  yolo_center  [(1291.0, 358.0)]  distances  [41.773197148410844] 

step  1  index  0  index_yolo  0 

step  2 confidence  0.9327465 

step  2 indexes  [[0]]  nms  [0]  yolo_center  [(1319.0, 364.0)]  distances  [28.635642126552707] 

step  2  index  0  index_yolo  0 

step  3 confidence  0.92327935 

step  3 indexes  [[0]]  nms  [0]  yolo_center  [(1339.0, 388.0)]  distances  [31.240998703626616] 

step  3  index  0  index_yolo  0 

step  11 confidence  0.8650678 

step  11 indexes  [[0]]  nms  [0]  yolo_center  [(1349.0, 379.0)]  distances  [13.45362404707371] 

step  11  index  0  index_yolo  0 

step  12 confidence  0.9655613 

step  12 indexes  [[0]]  nms  [0]  yolo_center  [(1353.0, 381.0)]  distances  [4.47213595499958] 

step  12  index  0  index_yolo  0 

step  13 confidence  0.8092951 

step  13 indexes  [[0]]  nms  [0]  yolo_center  [(1356.0, 384.0)]  distances  [4.242640687119285] 

step  13  index  0  index_y

step  217 confidence  0.8632263 

step  217 indexes  [[0]]  nms  [0]  yolo_center  [(1436.0, 432.0)]  distances  [15.033296378372908] 

step  217  index  0  index_yolo  0 

step  236 confidence  0.8490345 

step  236 indexes  [[0]]  nms  [0]  yolo_center  [(1366.0, 405.0)]  distances  [74.67261881037788] 

step  236  index  0  index_yolo  0 

step  237 confidence  0.87863696 

step  237 indexes  [[0]]  nms  [0]  yolo_center  [(1365.0, 407.0)]  distances  [74.33034373659252] 

step  237  index  0  index_yolo  0 

step  238 confidence  0.89922297 

step  238 indexes  [[0]]  nms  [0]  yolo_center  [(1363.0, 403.0)]  distances  [74.09453421137081] 

step  238  index  0  index_yolo  0 

step  239 confidence  0.91390616 

step  239 confidence  0.87020814 

step  239 indexes  [[0]
 [1]]  nms  [0, 1]  yolo_center  [(1359.0, 400.0), (1307.0, 434.0)]  distances  [74.46475676452586, 121.1486689980538] 

step  239  index  0  index_yolo  0 

step  240 confidence  0.8722295 

step  240 confidence  0

step  461 confidence  0.85928047 

step  461 indexes  [[0]]  nms  [0]  yolo_center  [(1520.0, 480.0)]  distances  [21.540659228538015] 

step  461  index  0  index_yolo  0 

step  462 confidence  0.80291593 

step  462 indexes  [[0]]  nms  [0]  yolo_center  [(1388.0, 475.0)]  distances  [119.41942890501528] 

step  462  index  0  index_yolo  0 

step  463 confidence  0.924453 

step  463 indexes  [[0]]  nms  [0]  yolo_center  [(1389.0, 474.0)]  distances  [123.25988804148737] 

step  463  index  0  index_yolo  0 

step  464 confidence  0.8833646 

step  464 indexes  [[0]]  nms  [0]  yolo_center  [(1389.0, 474.0)]  distances  [122.2620137246234] 

step  464  index  0  index_yolo  0 

step  469 confidence  0.81925374 

step  469 confidence  0.8799132 

step  469 indexes  [[1]
 [0]]  nms  [0, 1]  yolo_center  [(1443.0, 437.0), (1506.0, 478.0)]  distances  [57.62811813689564, 27.294688127912362] 

step  469  index  1  index_yolo  1 

step  470 confidence  0.8246545 

step  470 indexes  [[0

step  510 confidence  0.821316 

step  510 indexes  [[0]]  nms  [0]  yolo_center  [(1328.0, 457.0)]  distances  [91.0823802938856] 

step  510  index  0  index_yolo  0 

step  511 confidence  0.8610138 

step  511 confidence  0.9261849 

step  511 indexes  [[1]
 [0]]  nms  [0, 1]  yolo_center  [(1418.0, 426.0), (1325.0, 458.0)]  distances  [7.615773105863909, 90.75791976461338] 

step  511  index  0  index_yolo  0 

step  512 confidence  0.818053 

step  512 confidence  0.886293 

step  512 indexes  [[1]
 [0]]  nms  [0, 1]  yolo_center  [(1419.0, 422.0), (1327.0, 456.0)]  distances  [7.0710678118654755, 91.706052144883] 

step  512  index  0  index_yolo  0 

step  513 confidence  0.820643 

step  513 indexes  [[0]]  nms  [0]  yolo_center  [(1329.0, 455.0)]  distances  [92.97311439335567] 

step  513  index  0  index_yolo  0 

step  514 confidence  0.86669165 

step  514 indexes  [[0]]  nms  [0]  yolo_center  [(1422.0, 420.0)]  distances  [4.123105625617661] 

step  514  index  0  index

step  557 confidence  0.8374702 

step  557 indexes  [[0]]  nms  [0]  yolo_center  [(1288.0, 428.0)]  distances  [54.037024344425184] 

step  557  index  0  index_yolo  0 

step  561 confidence  0.84967244 

step  561 confidence  0.83432037 

step  561 indexes  [[0]
 [1]]  nms  [0, 1]  yolo_center  [(1263.0, 430.0), (1195.0, 469.0)]  distances  [26.1725046566048, 102.95630140987001] 

step  561  index  0  index_yolo  0 

step  563 confidence  0.8161851 

step  563 confidence  0.8241069 

step  563 indexes  [[1]
 [0]]  nms  [0, 1]  yolo_center  [(1254.0, 432.0), (1184.0, 470.0)]  distances  [22.360679774997898, 101.13357503816425] 

step  563  index  0  index_yolo  0 

step  564 confidence  0.8374012 

step  564 indexes  [[0]]  nms  [0]  yolo_center  [(1174.0, 472.0)]  distances  [96.60745312862771] 

step  564  index  0  index_yolo  0 

step  569 confidence  0.8356444 

step  569 indexes  [[0]]  nms  [0]  yolo_center  [(1170.0, 475.0)]  distances  [78.79086241436883] 

step  569  index

step  681 confidence  0.8109803 

step  681 indexes  [[0]]  nms  [0]  yolo_center  [(1269.0, 446.0)]  distances  [2.23606797749979] 

step  681  index  0  index_yolo  0 

step  683 confidence  0.8072555 

step  683 indexes  [[0]]  nms  [0]  yolo_center  [(1269.0, 446.0)]  distances  [2.0] 

step  683  index  0  index_yolo  0 

step  685 confidence  0.83449435 

step  685 indexes  [[0]]  nms  [0]  yolo_center  [(1272.0, 444.0)]  distances  [3.605551275463989] 

step  685  index  0  index_yolo  0 

step  686 confidence  0.8957893 

step  686 indexes  [[0]]  nms  [0]  yolo_center  [(1272.0, 445.0)]  distances  [2.0] 

step  686  index  0  index_yolo  0 

step  687 confidence  0.93420583 

step  687 indexes  [[0]]  nms  [0]  yolo_center  [(1273.0, 444.0)]  distances  [2.0] 

step  687  index  0  index_yolo  0 

step  688 confidence  0.937246 

step  688 indexes  [[0]]  nms  [0]  yolo_center  [(1273.0, 445.0)]  distances  [2.23606797749979] 

step  688  index  0  index_yolo  0 

step  689 c

step  741 confidence  0.87204975 

step  741 indexes  [[0]]  nms  [0]  yolo_center  [(1281.0, 485.0)]  distances  [37.05401462729781] 

step  741  index  0  index_yolo  0 

step  742 confidence  0.81423557 

step  742 indexes  [[0]]  nms  [0]  yolo_center  [(1281.0, 454.0)]  distances  [1.4142135623730951] 

step  742  index  0  index_yolo  0 

step  743 confidence  0.8267624 

step  743 indexes  [[0]]  nms  [0]  yolo_center  [(1283.0, 490.0)]  distances  [37.05401462729781] 

step  743  index  0  index_yolo  0 

step  744 confidence  0.8962436 

step  744 indexes  [[0]]  nms  [0]  yolo_center  [(1277.0, 454.0)]  distances  [7.211102550927978] 

step  744  index  0  index_yolo  0 

step  746 confidence  0.9095092 

step  746 indexes  [[0]]  nms  [0]  yolo_center  [(1279.0, 447.0)]  distances  [7.0710678118654755] 

step  746  index  0  index_yolo  0 

step  748 confidence  0.8199672 

step  748 indexes  [[0]]  nms  [0]  yolo_center  [(1280.0, 446.0)]  distances  [2.23606797749979] 

st

step  803 confidence  0.91613036 

step  803 indexes  [[0]]  nms  [0]  yolo_center  [(1296.0, 451.0)]  distances  [1.4142135623730951] 

step  803  index  0  index_yolo  0 

step  804 confidence  0.91058135 

step  804 indexes  [[0]]  nms  [0]  yolo_center  [(1296.0, 450.0)]  distances  [1.4142135623730951] 

step  804  index  0  index_yolo  0 

step  805 confidence  0.8644005 

step  805 indexes  [[0]]  nms  [0]  yolo_center  [(1296.0, 449.0)]  distances  [1.4142135623730951] 

step  805  index  0  index_yolo  0 

step  806 confidence  0.91590744 

step  806 indexes  [[0]]  nms  [0]  yolo_center  [(1296.0, 449.0)]  distances  [1.0] 

step  806  index  0  index_yolo  0 

step  807 confidence  0.89520746 

step  807 indexes  [[0]]  nms  [0]  yolo_center  [(1296.0, 449.0)]  distances  [1.4142135623730951] 

step  807  index  0  index_yolo  0 

step  808 confidence  0.9434294 

step  808 indexes  [[0]]  nms  [0]  yolo_center  [(1297.0, 451.0)]  distances  [2.23606797749979] 

step  808  i

step  852 confidence  0.93859303 

step  852 indexes  [[0]]  nms  [0]  yolo_center  [(1307.0, 455.0)]  distances  [1.4142135623730951] 

step  852  index  0  index_yolo  0 

step  853 confidence  0.89898443 

step  853 indexes  [[0]]  nms  [0]  yolo_center  [(1309.0, 455.0)]  distances  [2.0] 

step  853  index  0  index_yolo  0 

step  854 confidence  0.8884861 

step  854 indexes  [[0]]  nms  [0]  yolo_center  [(1310.0, 455.0)]  distances  [1.4142135623730951] 

step  854  index  0  index_yolo  0 

step  855 confidence  0.872558 

step  855 indexes  [[0]]  nms  [0]  yolo_center  [(1310.0, 455.0)]  distances  [1.4142135623730951] 

step  855  index  0  index_yolo  0 

step  856 confidence  0.8582723 

step  856 indexes  [[0]]  nms  [0]  yolo_center  [(1311.0, 455.0)]  distances  [1.0] 

step  856  index  0  index_yolo  0 

step  857 confidence  0.83459723 

step  857 indexes  [[0]]  nms  [0]  yolo_center  [(1312.0, 456.0)]  distances  [1.0] 

step  857  index  0  index_yolo  0 

step 

step  931 confidence  0.8882434 

step  931 indexes  [[0]]  nms  [0]  yolo_center  [(1286.0, 456.0)]  distances  [12.041594578792296] 

step  931  index  0  index_yolo  0 

step  932 confidence  0.834198 

step  932 indexes  [[0]]  nms  [0]  yolo_center  [(1296.0, 457.0)]  distances  [2.23606797749979] 

step  932  index  0  index_yolo  0 

step  933 confidence  0.82878226 

step  933 indexes  [[0]]  nms  [0]  yolo_center  [(1296.0, 457.0)]  distances  [2.8284271247461903] 

step  933  index  0  index_yolo  0 

step  934 confidence  0.8124289 

step  934 indexes  [[0]]  nms  [0]  yolo_center  [(1298.0, 457.0)]  distances  [2.23606797749979] 

step  934  index  0  index_yolo  0 

step  935 confidence  0.8582415 

step  935 indexes  [[0]]  nms  [0]  yolo_center  [(1284.0, 455.0)]  distances  [13.038404810405298] 

step  935  index  0  index_yolo  0 

step  941 confidence  0.81467915 

step  941 indexes  [[0]]  nms  [0]  yolo_center  [(1285.0, 458.0)]  distances  [4.47213595499958] 

step

In [6]:
prog_duration= time.time() - start_time
prog_duration

394.99127984046936

In [7]:
prog_duration/60

6.583187997341156

In [8]:
prog_duration/(60*Tot)

6.583187997341156

In [9]:
sum(n_resampl_l)/Tot

402.0

In [10]:
len(BB_l),len(xy_est_l),len(BB_avg),len(xy_est_avg),len(particles_avg),len(anchor_avg)

(980, 980, 1, 1, 1, 1)

In [11]:
sum(yolo_det_l)/Tot

444.0

In [12]:
BB_l_avg=[] 
BB_avg=np.array(BB_avg)
BB_l_avg=np.sum(BB_avg,0)/Tot

In [13]:
xy_est_l_avg=[] 
xy_est_avg=np.array(xy_est_avg)
xy_est_l_avg=np.sum(xy_est_avg,0)/Tot
xy_est_l_avg

array([[1143.,  471.],
       [1270.,  404.],
       [1327.,  366.],
       ...,
       [1279.,  455.],
       [1281.,  455.],
       [1281.,  455.]])

In [14]:
ROI_l_avg=[] 
ROI_avg=np.array(ROI_avg)
ROI_l_avg=np.sum(ROI_avg,0)/Tot

In [15]:
particles_l_avg=[] 
particles_avg=np.array(particles_avg)
particles_l_avg=np.sum(particles_avg,0)/Tot

In [16]:
anchor_l_avg=[] 
anchor_avg=np.array(anchor_avg)
anchor_l_avg=np.sum(anchor_avg,0)/Tot
anchor_l_avg

array([[1250.,  350.],
       [1291.,  358.],
       [1319.,  364.],
       ...,
       [1278.,  455.],
       [1279.,  455.],
       [1281.,  455.]])

In [17]:
#xy_est_l

In [18]:
#BB_l

In [19]:
np.save(path+'xy_data3.npy',np.array(xy_est_l_avg))
np.save(path+'BB_data3.npy',np.array(BB_l_avg))
np.save(path+'ROI_data3.npy',np.array(ROI_l_avg))
np.save(path+'part_data3.npy',np.array(particles_l_avg))
np.save(path+'anchor_data3.npy',np.array(anchor_l_avg))