<h1 style="font-size:30px;">Object Tracking Analysis</h1> 

In this notebook we will demonstrate how to execute multiple trackers and compare the results in a multi-view output video. This technique can be very valuable for assessing which tracking algorithm may be best suited for your particular application. 

<br>
<center>
<img src = "https://opencv.org/wp-content/uploads/2021/09/c0-m16-02-Object-Tracker-Feature-Image.png" alt="Object Tracker Feature Image">
</center>
<br>

In [None]:
if 'google.colab' in str(get_ipython()):
    print("Downloading Code to Colab Environment")
    !wget https://www.dropbox.com/sh/uklrcxd2d4zfcp3/AAAQiMxAHkBlQUP-6wCzTa58a?dl=1 -O module-code.zip -q --show-progress
    !unzip -qq module-code.zip
    !pip install --upgrade opencv-contrib-python
else:
    pass

In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
from moviepy.editor import VideoFileClip

# 1.  Define Data Structures and Helper Functions

In [None]:
tracker_dict = dict(
    BOOSTING   = cv2.legacy.TrackerBoosting_create(),
    CSRT       = cv2.legacy.TrackerCSRT_create(),
    KCF        = cv2.legacy.TrackerKCF_create(),
    MEDIANFLOW = cv2.legacy.TrackerMedianFlow_create(),
    MIL        = cv2.legacy.TrackerMIL_create(),
    MOSSE      = cv2.legacy.TrackerMOSSE_create(),
    TLD        = cv2.legacy.TrackerTLD_create(),
)

resolution_dict = {
    '360p':(480,360),
    '480p':(858,480),
    '720p':(1280,720),
    '1080p':(1920,1080)
    }

class VideoSpec:
    # Constructor
    def __init__(self,video_filename, resolution, bbox):
        self.video_filename = video_filename
        self.res = resolution
        self.bbox = bbox

def draw_bounding_box(frame, bbox, ok, color=(0, 255, 255), thickness=2):
    if ok: 
        p1 = (int(bbox[0]), int(bbox[1]))
        p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
        cv2.rectangle(frame, p1, p2, color, thickness)
    else:
        cv2.putText(frame, "Tracking failure detected", (10,80), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (0, 0, 255), 2)


def draw_banner_text(frame, text, banner_height_percent = 0.08, font_scale=1.5, font_thickness=2,
                    text_alignment = "center",  text_color = (0,255,0)):
    
    # Draw a black filled banner across the top of the image frame.
    # percent: banner height as a percentage of the frame height.
    banner_height = int(banner_height_percent * frame.shape[0])
    cv2.rectangle(frame, (0,0), (frame.shape[1],banner_height), (0,0,0), thickness=-1)
    
    # Draw text on banner
    width = frame.shape[0]
    alignment_dict = dict(left = width//4, center = width//2, right = width*3//4)
    left_offset = alignment_dict[text_alignment]
    location = (left_offset, banner_height - 10)
    cv2.putText(frame, text, location, cv2.FONT_HERSHEY_PLAIN, font_scale, text_color, 
                font_thickness, cv2.LINE_AA)


def draw_text(frame, text, location=(20,20), font_scale=1, color=(50,170,50), font_thickness=2):
    cv2.putText(frame, text, location, cv2.FONT_HERSHEY_SIMPLEX, font_scale, color,
                font_thickness, cv2.LINE_AA)

def get_trackers(tracker_names, tracker_dict):

    tracker_objects = []
    for tracker in tracker_names:
        tracker_objects.append(tracker_dict[tracker])

    return tracker_objects

def initialize_trackers(tracker_objects, frame, bbox):

    for tracker in tracker_objects:
        tracker.init(frame, bbox)


def get_tracker_results(tracker_objects, frame, tracker_names):

    n = len(tracker_objects)
    init_frames_list = [frame.copy() for i in range(n)]
    final_frames_list = []

    for i in range(n):
        ok, result = update_tracker(tracker_objects[i], init_frames_list[i], tracker_names[i])
        final_frames_list.append(result)

    return final_frames_list


def update_tracker(tracker, frame, tracker_type):
    
    timer = cv2.getTickCount()
    
    # Update tracker.
    ok, bbox = tracker.update(frame)
    
    # Calculate Frames per second (FPS).
    fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
    
    # Draw bounding box.
    draw_bounding_box(frame, bbox, ok)
    
    # Display tracker type on frame.
    draw_banner_text(frame, tracker_type + ' Tracker' + ', FPS : ' + str(int(fps)))
     
    return ok, frame 


def get_output_video_dims(tracker_names, resolution_specs):
    
    width, height = resolution_specs
    n = len(tracker_names)
    if n == 1: 
        return width, height
    if n == 2: 
        return width*2, height
    if n == 4: 
        return width*2, height*2
    if n == 6: 
        return width*3, height*2
    if n == 8:
        return width*4, height*2

    
def align_frames(frames_list):

    n = len(frames_list)

    if n == 1 :
        return frames_list[0]

    if n == 2: 
        return np.hstack([frames_list[0],frames_list[1]])
    
    if n == 4: 
        top = np.hstack([frames_list[0], frames_list[1]])
        bottom = np.hstack([frames_list[2], frames_list[3]])
        return np.vstack([top, bottom])

    if n == 6: 
        top = np.hstack([frames_list[0], frames_list[1], frames_list[2]])
        bottom = np.hstack([frames_list[3], frames_list[4], frames_list[5]])
        return np.vstack([top, bottom])

    if n == 8:  
        top = np.hstack([frames_list[0], frames_list[1], frames_list[2], frames_list[3]])
        bottom = np.hstack([frames_list[4], frames_list[5], frames_list[6], frames_list[7]])
        return np.vstack([top, bottom])

# 2. Define the Main Controller for Tracking

In [None]:
def run_tracker(tracker_names, video_spec, video_output_file_name):
    
    # Create the video capture object.
    video_cap = cv2.VideoCapture(video_spec.video_filename)
    
    # Confirm video file can be opened.
    if video_cap.isOpened():
        width  = int(video_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
        height = int(video_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
        fps    = int(video_cap.get(cv2.CAP_PROP_FPS))
    else: 
        print("Could not open video")
        sys.exit()
        
    # Set up video writer object for mp4.
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    fps_write = fps  # or other desired value
    resolution_specs = resolution_dict.get(video_spec.res)
    output_video_dim = get_output_video_dims(tracker_names, resolution_specs)
    video_out = cv2.VideoWriter(video_output_file_name, fourcc, fps_write, output_video_dim)
    
    # Read the first frame.
    ok, frame = video_cap.read()
    if not ok:
        print ('Cannot read video file')
        sys.exit()

    # Resize the image frame to the specified resolution.
    frame = cv2.resize(frame, resolution_specs, interpolation = cv2.INTER_AREA)

    # Get the list of tracker objects.
    tracker_objects = get_trackers(tracker_names, tracker_dict)

    # Initialize trackers.
    initialize_trackers(tracker_objects, frame, video_spec.bbox)
    
    #-----------------------
    # Process video frames.
    #-----------------------
    while True:

        ok, frame = video_cap.read()
        if not ok:
            break      

        # Resize the frame to the specified resolution.
        frame = cv2.resize(frame, resolution_specs, interpolation = cv2.INTER_AREA)

        # Retrieve the results for each tracker.
        frames_list = get_tracker_results(tracker_objects, frame, tracker_names)

        # Compose the final results in a multi-view layout.
        result = align_frames(frames_list)

        video_out.write(result)    

    video_cap.release()
    video_out.release()

# 3. Input Specification and Execution

The following table summarizes the input specifications for the `race_car.mp4` test video clip. The initial bounding box is specified depending on the video resolution desired. Executing at full 1080p will likely cause significant latency in the video playback. It is therefore recommended that a lower resolution be specified when experimenting with several trackers (e.g., 480p). This can be helpful when making a rough assessment of the pros and cons of each tracker. However, be aware that the resolution will also potentially affect the actual tracking results, so the final testing you perform should be at a resolution that you expect for your particular application.

`#------------------------------------------------------------------`<br>
`# video_obj = VideoSpec(input_video, '360p',   (205, 170, 110, 60))    `<br>
`# video_obj = VideoSpec(input_video, '480p',   (370, 225, 180, 80))    `<br>
`# video_obj = VideoSpec(input_video, '720p',   (550, 340, 230, 115)    `<br>
`# video_obj = VideoSpec(input_video, '1080p', (820, 510, 420, 180))`<br>
`#------------------------------------------------------------------`<br>

**Note**: The code cell below should only be executed once. If you want to execute it multiple times, you should re-run the previous code cells in this notebook.

In [None]:
input_video = './race_car.mp4'

video_output_prefix = 'test_1x2'
video_output_file_name = 'tracking_analysis_output_videos/' + video_output_prefix + '.mp4'

# Define a list of trackers.
trackers = ['BOOSTING', 'CSRT']

# Create a video specification object.
video_obj = VideoSpec(input_video, '480p',  (370, 225, 180, 80))

# Execute trackers.
run_tracker(trackers, video_obj, video_output_file_name)

### <font style="color:rgb(50,120,229)">Display the tracking results: 1x2</font>

In [None]:
clip = VideoFileClip('tracking_analysis_output_videos/test_1x2.mp4')
clip.ipython_display(width=1000)

### <font style="color:rgb(50,120,229)">Display the tracking results: 2x2</font>

In [None]:
clip = VideoFileClip('tracking_analysis_output_videos/test_2x2.mp4')
clip.ipython_display(width=1000)

### <font style="color:rgb(50,120,229)">Display the tracking results: 2x3</font>

In [None]:
clip = VideoFileClip('tracking_analysis_output_videos/test_2x3.mp4')
clip.ipython_display(width=1000)