In [None]:
#The MIT License (MIT)

#Copyright (c) 2020 Juliana T.C. Marcos

#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO
#THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 
#CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

#This code use methods of the ParticleFilter (PF) class in order to track an object in a video. The PF uses as
#measurements provider the You Only Look Once (YOLO) object detector. 

#The particles are represented in red, the PF estimate of
#the tracked animal is represented in green and the animal's bb is represented in blue. 

#Thanks to Nayak for the nice tutorial about using YOLOv3 with OpenCv which is available at this address:
#https://www.learnopencv.com/deep-learning-based-object-detection-using-yolov3-with-opencv-python-c/


In [None]:
#Import of useful librairies
import cv2
import math
import numpy as np
from skimage import measure
from ParticleFilter import ParticleFilter
import time

In [None]:
"""Some variables initialization """

#The total number of trials
Tot=1
#Lists for the Tot running outputs averages 
BB_avg=[]
ROI_avg=[]
anchor_avg=[]
xy_est_avg=[]
particles_avg=[]
#number of particles
n_particles=2000
#noise in sensors' measurements
meas_noise=0
#Read weights and config files to create YOLO(v3) net
net = cv2.dnn.readNet("./Inputs/yolov3.weights", "./Inputs/yolov3.cfg")
#Fetch the three output layers names of YOLO net, they are the ones not connected to 
#any following layers since they are the last layers
layer_names = net.getLayerNames()
output_layer_names = [layer_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]
#Read the classes in coco.names file for YOLO net
coco_classes = []
with open ('./Inputs/coco.names','r') as file:
    coco_classes=[line.strip("\n") for line in file.readlines()]

font = cv2.FONT_HERSHEY_SIMPLEX
text_coord=(10,40)
t_size=0.8
t_thick=2
#This value was chosen according to a paper experiment
N_thresh=(2*n_particles)/30
#Scale for YOLO's inputs preprocessing
scale=1/255
#Video frame width and height
frame_width=1920
frame_height=1080
#Lists to contain the counters for each trial
yolo_det_l=[]
n_resampl_l=[]
#Initialize variable for shifting the anchor update between first measurements and first estimations
anchor_shift=60

In [None]:
#Data for video cows
anchorS=(1250,350)
videoIn_name="./Inputs/cows.avi"
path="./Outputs/"
videoOut_name=path+"cows-pf-yolo.avi"
#std in the prediction of particles for the object's position
std=10
#These are YOLO parameters
conf=0.8
nms=0.7
height, width, channels =224,320,3
blob_x,blob_y=224,320
#Motion model's speed in x and y directions
v_x=0.01
v_y=0.01

In [None]:
start_time = time.time()
for num in range(Tot):

    #List for averaging running outputs
    BB_l=[]
    ROI_l=[]
    anchor_l=[]
    xy_est_l=[]
    particles_l=[]
    #Counters
    yolo_det=0
    it=0
    n_resampling=0
    #A single program variables initialization
    anchor=anchorS
    #Initialization of measurements variables
    x_objMeasure,y_objMeasure=anchor[0],anchor[1]
    #BB variable initialization
    BB=0,0,0,0
    #Capture video where object tracking should be performed
    video = cv2.VideoCapture(videoIn_name)
    #Video output
    fourcc = cv2.VideoWriter_fourcc(*'XVID')
    video_output = cv2.VideoWriter(videoOut_name,fourcc,60,(frame_width,frame_height))
    #Particles Instantiation
    particles=ParticleFilter(frame_width,frame_height, n_particles)
    
    #Loop through the entire video
    while (True):
        #Take the video and break it frame by frame
        _,frame=video.read()
        #Check if frames are captured
        if(_ == False ): break

        it+=1

        #Particles prediction update
        particles.particles_update(v_x,v_y,std,frame_width,frame_height)
        particles_l.append(particles.particles.copy())
        anchor_l.append(anchor)
    
        #Coordinates of ROI around anchor
        dy1=int(anchor[1]-height/2)
        #if(dy1<0):dy1=0
        dy2=int(anchor[1]+height/2)
        #if (dy2>frame_height):dy2=frame_height
        dx1=int(anchor[0]-width/2)
        #if (dx1<0): dx1=0
        dx2=int(anchor[0]+width/2)
        #if (dx2>frame_width):dx2=frame_width

        frame_roi=frame[dy1:dy2,dx1:dx2]
        ROI_l.append((dx1,dy1,dx2,dy2))
            

        # Detecting objects
        scale=1/255
        blob = cv2.dnn.blobFromImage(frame_roi,scale,(blob_x, blob_y),(0, 0, 0), True, crop=False)
        net.setInput(blob)
        #Run forward pass to get predictions from output layers selected
        outputs = net.forward(output_layer_names)
        # Showing informations on the screen
        classes = []
        confidences = []
        boxes = []
        yolo_center=[]
        yolo_center_anchor_dist=[]

        #for each output of the YOLO last layers
        for output in outputs:
            for detection in output:
            #Each detection contains center point x,y,width,height,object probability
            #and 80 class probability
                scores = detection[5:]
                class_id = np.argmax(scores)
                confidence = scores[class_id]
                if confidence > conf:
                    print("step ",str(it),"confidence ",confidence,"\n")
                    #non normalized detected object's center, width and height 
                    center_x,center_y = int(detection[0] *blob_x),int(detection[1] * blob_y)
                    w,h = int(detection[2] * blob_x),int(detection[3] * blob_y)
                    # Rectangle coordinates for drawing
                    x,y = int(center_x - w / 2),int(center_y - h / 2)
                    boxes.append([x, y, w, h])
                    confidences.append(float(confidence))
                    classes.append(class_id)

                    #Get coordinates in initial video frame reference instead of 
                    #ROI reference
                    center_x_reverse=center_x+(anchor[0]-width/2)
                    center_y_reverse=center_y+(anchor[1]-height/2)
                    yolo_center.append((center_x_reverse,center_y_reverse))

        #Search and keep relevant bounding boxes given their scores 
        indexes = cv2.dnn.NMSBoxes(boxes, confidences, conf, nms)
        #Only compute the distances between anchor and the yolo predictions
        #if confidence is good and indexes is not empty
        if (len(indexes)!=0):
            yolo_center_anchor_dist=[math.sqrt((yolo_center[i][0]-anchor[0])**2+\
            (yolo_center[i][1]-anchor[1])**2) for i in range (len(yolo_center))\
                                    if i in indexes]
            #Keep the relevant indices in a list to retrieve the relevant index latter
            nms_indexes=[i for i in range (len(yolo_center)) if i in indexes]

            print("step ",str(it),"indexes ",indexes," nms ",nms_indexes,\
            " yolo_center ",yolo_center," distances ", yolo_center_anchor_dist, "\n")

            #Save and return the coordinates of the most suitable center and its BB
            #Keep the index of the minimum distance between prediction and anchor
            index=yolo_center_anchor_dist.index(min(yolo_center_anchor_dist))
            #Retrieve relevant index to select the yolo index and the corresponding BB
            index_yolo=nms_indexes[index]
            print("step ",str(it)," index ",index," index_yolo ",index_yolo,"\n")
            BB_x=int(boxes[index_yolo][0]+(anchor[0]-width/2))
            BB_y=int(boxes[index_yolo][1]+(anchor[1]-height/2))
            BB =BB_x,BB_y,boxes[index_yolo][2],boxes[index_yolo][3]

            #Update measurements and anchor  
            x_objMeasure,y_objMeasure=yolo_center[index_yolo][0],yolo_center[index_yolo][1]
            yolo_det+=1

        #Save BB in a list
        BB_l.append(BB)
        
        #Draw the ROI in red
        cv2.rectangle(frame, (dx1,dy1), (dx2,dy2), (0,0,250), 2)

        #Update the particles weights with the new measurements (Object center)
        particles.weigth_update(x_objMeasure,y_objMeasure)

        #Estimation of object center position
        x_estimation,y_estimation=particles.position_estimation()
        
        #Update the anchor with either the current measurements or the x and y estimates
        if it > anchor_shift:
            anchor=x_estimation,y_estimation
        else:
            anchor=x_objMeasure,y_objMeasure
        
        #Save the x and y estimated in a list
        xy_est_l.append((x_estimation,y_estimation))

        #Draw the particles
        particles.draw_box_particles(frame,BB,x_estimation,y_estimation)
        #Draw the position estimation
        cv2.circle(frame,(x_estimation,y_estimation),5,[0,255,0],3)

        #Resample the particles
        if (particles.effective_particles() < N_thresh):
            n_resampling+=1
            particles.resampling()

        if (num==Tot-1):
            video_output.write(frame)

        #if it==50:
        #    break
    
    yolo_det_l.append(yolo_det) 
    n_resampl_l.append(n_resampling)
    BB_avg.append(BB_l)
    xy_est_avg.append(xy_est_l)
    particles_avg.append(particles_l)
    ROI_avg.append(ROI_l)
    anchor_avg.append(anchor_l)
    

In [None]:
prog_duration= time.time() - start_time
prog_duration

In [None]:
prog_duration/60

In [None]:
prog_duration/(60*Tot)

In [None]:
sum(n_resampl_l)/Tot

In [None]:
len(BB_l),len(xy_est_l),len(BB_avg),len(xy_est_avg),len(particles_avg),len(anchor_avg)

In [None]:
sum(yolo_det_l)/Tot

In [None]:
BB_l_avg=[] 
BB_avg=np.array(BB_avg)
BB_l_avg=np.sum(BB_avg,0)/Tot

In [None]:
xy_est_l_avg=[] 
xy_est_avg=np.array(xy_est_avg)
xy_est_l_avg=np.sum(xy_est_avg,0)/Tot
xy_est_l_avg

In [None]:
ROI_l_avg=[] 
ROI_avg=np.array(ROI_avg)
ROI_l_avg=np.sum(ROI_avg,0)/Tot

In [None]:
particles_l_avg=[] 
particles_avg=np.array(particles_avg)
particles_l_avg=np.sum(particles_avg,0)/Tot

In [None]:
anchor_l_avg=[] 
anchor_avg=np.array(anchor_avg)
anchor_l_avg=np.sum(anchor_avg,0)/Tot
anchor_l_avg

In [None]:
#xy_est_l

In [None]:
#BB_l

In [None]:
np.save(path+'xy_data3.npy',np.array(xy_est_l_avg))
np.save(path+'BB_data3.npy',np.array(BB_l_avg))
np.save(path+'ROI_data3.npy',np.array(ROI_l_avg))
np.save(path+'part_data3.npy',np.array(particles_l_avg))
np.save(path+'anchor_data3.npy',np.array(anchor_l_avg))