In [2]:
# boring technical stuff
import os, sys
import random
import time
import imutils
import math
import numpy as np
import pandas as pd
import skimage.io
import matplotlib
import matplotlib.pyplot as plt
import cv2
from matplotlib.patches import Polygon

### List of trackers to run through for each video file

In [None]:
trackers = {'boosting': cv2.TrackerBoosting_create(),
                'mil': cv2.TrackerMIL_create(),
                'kcf': cv2.TrackerKCF_create(),
                'tld': cv2.TrackerTLD_create(),
                'medianflow': cv2.TrackerMedianFlow_create(),
                'goturn': cv2.TrackerGOTURN_create(),
                'mosse': cv2.TrackerMOSSE_create(),
                'csrt': cv2.TrackerCSRT_create()}
                # 're3': re3_tracker.Re3Tracker(),
                # 'pysot': base_tracker.BaseTracker()}

In [None]:
clips = {'Bike02': '/Users/kierandonnelly/thesis/raw_clips/Bike02.mp4',
        'Bike03': '/Users/kierandonnelly/thesis/raw_clips/Bike03.mp4',
        'Bike05': '/Users/kierandonnelly/thesis/raw_clips/Bike05.mp4',
        'Bike07': '/Users/kierandonnelly/thesis/raw_clips/Bike07.mp4',
        'Bike08': '/Users/kierandonnelly/thesis/raw_clips/Bike08.mp4',
        'Bike09': '/Users/kierandonnelly/thesis/raw_clips/Bike09.mp4',
        'Ski01': '/Users/kierandonnelly/thesis/raw_clips/Ski01.mp4',
        'Ski02': '/Users/kierandonnelly/thesis/raw_clips/Ski02.mp4',
        'Ski03': '/Users/kierandonnelly/thesis/raw_clips/Ski03.mp4',
        'Snowboard01': '/Users/kierandonnelly/thesis/raw_clips/Snowboard01.mp4',
        'Sup01': '/Users/kierandonnelly/thesis/raw_clips/Sup01.mp4',
        'Surf01': '/Users/kierandonnelly/thesis/raw_clips/Surf01.mp4',
        'Wake01': '/Users/kierandonnelly/thesis/raw_clips/Wake01.mp4'}

### Iterate over videos in directory and run tracker

In [None]:
def run_track_test(input1, input2):
    return(print(input1, input2))

In [None]:
frame_list = []

# iterate over clips
for clip, clip_loc in clips.items(): 
    
    # retrieve frame data for clip
    frames = pd.read_csv(str('./frame_data/'+str(filename)+'.csv'))
    
    # iterate over tracker dictionary
    for tracker, tracker_inst in trackers.items(): 

        # call tracking function
        # need this to output the results: save to CSV from within fn
        run_track_test(tracker, tracker_inst, clip_loc, frames)

### Import ground truth annotation data for each video

In [3]:
videos_df = pd.read_csv("./frame_data/Bike05.csv"); videos_df.head()

Unnamed: 0,frame,x1,y1,w,h
0,1.0,0.479388,0.439716,0.035904,0.143026
1,2.0,0.479388,0.439716,0.034574,0.166667
2,3.0,0.478723,0.44208,0.03391,0.14539
3,4.0,0.475399,0.44208,0.038564,0.140662
4,5.0,0.472739,0.443262,0.039229,0.150118


In [4]:
frames = pd.read_csv(str('./frame_data/Bike03.csv'))
frame_count = 3

In [5]:
frames.head()

Unnamed: 0,frame,x1,y1,w,h
0,1.0,0.006649,0.178487,0.067819,0.117021
1,2.0,0.018617,0.186761,0.067154,0.112293
2,3.0,0.034574,0.193853,0.064495,0.106383
3,4.0,0.043883,0.198582,0.068484,0.115839
4,5.0,0.057181,0.210402,0.068484,0.105201


In [6]:
init_bb = frames.loc[frames['frame'] == frame_count].squeeze()[1:]; init_bb

x1    0.034574
y1    0.193853
w     0.064495
h     0.106383
Name: 2, dtype: float64

In [9]:
(frames['frame'] == frame_count).any()

True

In [17]:
test = False

In [19]:
if ~test:
    print('success')

success


### Function to implement tracking on one video sequence

In [None]:
# this runs once per clip per tracker
# lists declared and modified here pertain to one clip
def run_track(tracker_name, tracker_inst, video_file, frames_df, resize=1):
    '''
    Runs a specified tracking algorithm on a specified video \
    file and evaluates tracker performance by comparing system \
    output to specified ground truth bounding boxes.
    
    Parameters:
    
    tracker_name (str): short name of tracking algo
    tracker_inst: instantiation of tracker class
    video_file: (str): path to video file
    frames_df (pd.DataFrame): DF of ground truth objects in video clip
    resize (int): fraction to which video should be resized (1, 2 or 4). 1 = unchanged (Def), 2 = 1/2, 4 = 1/4
    '''
    
    # CONSTANTS
    THRESHOLD_IOU, THRESHOLD_DIST = 0.5, 0.2
    
    # initialise vars
    frame_count = frames_df['frame'].min()
    fn_count, fp_count, fp_thresh_count, tp_count, tn_count, covered_count = 0, 0, 0, 0, 0, 0
    tracker_fps = 0
    iou_list, dist_list, metrics_list = [], [], []
    tracking_failed = None
    
    # Initialize the video stream and pointer to output video file
    vs = cv2.VideoCapture(video_file)
    
    # run until no more frames
    while True:
        # read the next frame from the file
        grabbed, frame = vs.read()
        
        # boolean for whether there is a ground truth object in the frame
        object_exists = (frames_df['frame'] == frame_count).any()

        # if no frame grabbed, reached end of clip
        if not grabbed:
            print ("Not grabbed!")
            break
            
        # video native dimensions
        orig_width = frame.shape[1]
        orig_height = frame.shape[0]

        # will resize the frame for faster processing
        new_width = int(orig_width/resize)
        new_height = int(orig_height/resize)
        frame = imutils.resize(frame, new_width) # ie if resize is 2, width resized to orig_width/2
        
        # number of rows in frames determine number of objects, N_objects
        N_objects = len(frames_df.index)
        
        # bb for ground truth obj (relative values)
        gt_bb = frames_df.loc[frames_df['frame'] == frame_count].squeeze()[1:]
        
        # multiply with resized frame width and height (absolute values)
        gt_bb *= (new_width, new_height, new_width, new_height)
        
        # initialize the writer
        if writer is None:
            fourcc = cv2.VideoWriter_fourcc(*"MP4V")
            fps = vs.get(5)
            print("FPS = " + str(fps))
            writer = cv2.VideoWriter("output/" + args["inputVideo"] + "_" + \
                                     args["tracker"] + ".mp4", fourcc, fps, \
                                     (frame.shape[1], frame.shape[0]), True)
            
        # start timer for fps metric
        # detectorTime = time.time()
        
        if tracker is None:
            # create tracker object
            tracker = tracker_inst
            
            # initialise with bounding box
            tracker.init(frame, gt_bb)
        else: 
            tracker_time = time.time()
            
            # update the tracker and grab the tracked object
            tracking, trk_bb = tracker.update(frame)
            
            # will need modifications for trackers outside of OpenCV-8
            if tracking is None:
                if ~object_exists:
                    # true negative
                    tn_count++
                    
                elif object_exists:
                    # false negative
                    fn_count++

                    # failure
                    cv2.putText(frame, "Tracking failure detected", (100, 80),\
                                cv2.FONT_HERSHEY_SIMPLEX, 0.75,(0,0,255),2)
                    tracking_failed = True
           
            elif tracking:
                if object_exists:
                    iou_t, dist_t = compute_iou_dist(gt_bb, trk_bb) # pass them in (x1, y1, w, h) format
                    
                    if dist_t < THRESHOLD_DIST:
                        # false positive
                        fp_thresh_count++
                        
                    elif dist_t >= THRESHOLD_DIST:
                        # true positive
                        tp_count++
                        
                    iou_list.append(iou_t)
                    dist_list.append(dist_t)
                    
                elif ~object_exists:
                    # false positive
                    fp_count++
                
                # tracking taken place, so draw frame
                p1 = (int(trk_bb[0]), int(trk_bb[1]))
                p2 = (int(trk_bb[0] + trk_bb[2]), int(trk_bb[1] + trk_bb[3]))
                cv2.rectangle(frame, p1, p2, (255, 0, 0), 2, 1)
                tracking_failed = False
            
            # logging purposes
            if ~tracking_failed:
                print("[INFO] Tracker running at " +\
                      str(1/(time.time() - trackerTime)) + " fps")
            
            tracking_fps += 1/(time.time() - trackerTime)
                
        # completeness metrics
        if iou_t >= THRESHOLD_IOU:
            covered_count++
                
        # increment frame counter
        frame_count += 1
        
        # write the frame to disk
        if writer is not None:
            writer.write(frame)
        
        # call metrics function - returns dictionary of metrics
        metrics = compute_metrics(fp_thresh_count, fp_count, tp_count, tn_count, fn_count, \
                                  covered_count, dist_list, iou_list, N_objects, N_frames, \
                                  tracking_fps, clip_name, resolution, tracker_name)
        metrics_list.append(metrics)
    
    # convert list of metrics dicts to df
    results_df = pd.DataFrame(metrics_list, columns=['Clip', 'Res', 'Tracker', 'Duration', 'SOTP', 'SOTA', 'Recall',\
                                                     'Precision', 'FPF', 'SODP', 'MT', 'ML', 'PT', 'FM', 'FPS'])
    
    
    # save to CSV
    results_df.to_csv('metrics.csv', index=False)

## Evaluation Metrics
This section defines functions to compute the various evaluation metrics detailed in Chapter 3 in the report. The metrics to compute are as follows:
- Recall: Correctly matched detections as proportion of total ground truth objects (of a sequence).
- Precision/N-SODA: Correctly matched detections as a proportion of total detections (of a sequence).
- FAF: Number of false alarms (incorrect detections) per frame averaged over a sequence.
- SODP: Average overlap between ground truth and system output.
- SOTA: Combines false negatives and false positives without weighting factors.
- SOTP: Average distance between centroids of ground truth and system output.
- TDE: Distance beetween the ground-truth annotation and the tracking result.
- MT: The ground-truth trajectory is covered by the tracker output for more than 80% of its length.
- ML: The ground-truth trajectory is covered by the tracker output for less than 20% of its length.
- PT: The ground-truth trajectory is covered by the tracker output for between 20% and 80% of its length.
- FM: Number of times that a ground-truth trajectory is interrupted in the tracking result, normalised over sequence.
- RS: Ratio of tracks which correctly recover from short term occlusion.
- RL: Ratio of tracks which correctly recover from long term occlusion.

### Terminology
- **Success**: aka True Positive. Overlap between ground truth and tracker hypothesis is non-zero and within threshold.
- **Success**: aka True Negative. No object and no hypothesis.
- **Miss**: aka False Negative. No hypothesis from tracker.
- **False Positive**: Overlap between tracker output and ground truth is less than threshold.
- **Distance**: Euclidean distance between centroids of tracker output and ground truth.
- **Covered**: Overlap exceeds THRESHOLD_IOU

### Function to Compute Each Metric

#### Intersection over Union (IoU) and Distance Between Centroids

In [1]:
def compute_iou_dist(box_a, box_b):
    
    # compute centroids (x1 + half width, y1 + half height)
    centroid_a = (box_a[0] + 0.5*box_a[2], box_a[1] + 0.5*box_a[3])
    centroid_b = (box_b[0] + 0.5*box_b[2], box_b[1] + 0.5*box_b[3])
    
    # (x1, y1, x1+w, y1+h) -> (x1, y1, x2, y2)
    box_a = (box_a[0], box_a[1], box_a[0] + box_a[2], box_a[1] + box_a[3])
    box_b = (box_b[0], box_b[1], box_b[0] + box_b[2], box_b[1] + box_b[3])
    
    # determine the (x, y)-coordinates of the intersection rectangle
    x_a = max(box_a[0], box_b[0])
    y_a = max(box_a[1], box_b[1])
    x_b = min(box_a[2], box_b[2])
    y_b = min(box_a[3], box_b[3])

    # compute the area of intersection rectangle
    area_overlap = max(0, x_b - x_a + 1) * max(0, y_b - y_a + 1)

    # compute the area of both the prediction and ground-truth rectangles
    area_box_a = (box_a[2] - box_a[0] + 1) * (box_a[3] - box_a[1] + 1)
    area_box_b = (box_b[2] - box_b[0] + 1) * (box_b[3] - box_b[1] + 1)

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = area_overlap / float(area_box_a + area_box_b - area_overlap)
    
    # distance is hypotenuse of two centroid coordinates
    dist = math.hypot(centroid_b[0] - centroid_a[0], centroid_b[1] - centroid_a[1])

    # return the intersection over union value
    return iou, dist

#### 1. SOTP - Single Object Tracking Precision, or Average Distance
Average distance between boxes only on frames with both an object and a hypothesis, i.e.:
- `fp_thresh_count`
- `tp_count`

`dist_t` should be forced to zero for frames with any results outside of the above (fp, tn, fn).

In [3]:
def metric_sotp(dist_list, fp_thresh_count, tp_count):
    total_dist = sum(distances)
    positives = fp_thresh_count + tp_count
    return (total_dist/positives)

#### 2. SOTA

In [13]:
def metric_sota(fp_thresh_count, fp_count, fn_count, N_objects):
    return 1 - (fp_thresh_count + fp_count + fn_count)/(N_objects)

#### 3. Recall

In [None]:
def metric_recall(tp_count, N_objects):
    return tp_count/N_objects

#### 4. Precision

In [6]:
def metric_recall(tp_count, fp_thresh_count, fp_count):
    return tp_count/(tp_count + fp_count + fp_thresh_count)

#### 5. FPF, False Positives per Frame

In [8]:
def metric_recall(fp_count, fp_thresh_count, N_frames):
    return tp_count/N_frames

#### 6. SODP, Single Object Detection Precision

In [9]:
def metric_sodp(iou_list, N_match):
    total_iou = sum(iou_list)
    return (total_iou/N_match)

#### 7. MT

In [None]:
def metric_mt(completeness):
    mt = 0
    if completeness >= 0.8:
        mt = 1
    return mt

#### 8. ML

In [11]:
def metric_ml(completeness):
    ml = 0
    if (completeness > 0.2) and (completeness < 0.8):
        ml = 1
    return ml

#### 9. PT

In [12]:
def metric_pt(completeness):
    pt = 0
    if completeness <= 0.2:
        pt = 1
    return pt

#### 10. FM, Fragmentation

In [None]:
def metric_fm(fp_thresh_count, fp_count, fn_count, N_objects):
    return (fp_thresh_count + fp_count + fn_count)/(N_objects)

### Metrics Function
This function calculates all metrics and returns them in a dictionary

In [None]:
def compute_metrics(fp_thresh_count, fp_count, tp_count, tn_count, fn_count, \
                    covered_count, dist_list, iou_list, N_objects, N_frames, \
                    tracking_fps, clip_name, resolution, tracker_name):
    
    N_match = fp_thresh_count + tp_count
    completeness = covered_count/N_match
    
    sotp = metric_sotp(dist_list, fp_thresh_count, tp_count)
    sota = metric_sota(fp_thresh_count, fp_count, fn_count, N_objects)
    recall = metric_recall(tp_count, N_objects)
    precision = metric_precision(tp_count, fp_thresh_count, fp_count)
    fpf = metric_fpf(fp_count, fp_thresh_count, N_frames)
    sodp = metric_sodp(iou_list, N_match)
    mt = metric_mt(completeness)
    ml = mtric_ml(completeness)
    pt = metric_pt(completeness)
    fm = metric_fm(fp_thresh_count, fp_count, fn_count, N_objects)
    
    results_dict = {'Clip': clip_name,
                    'Resolution': resolution,
                    'Tracker': tracker_name,
                    'Duration': N_frames,
                    'SOTP': sotp,
                    'SOTA': sota,
                    'Recall': recall,
                    'Precision': precision,
                    'FPF': fpf,
                    'SODP': sodp,
                    'MT': mt,
                    'ML': ml,
                    'PT': pt,
                    'FM': fm,
                    'FPS': tracking_fps}
    
    return results_dict