In [1]:
import os, sys
import random
import time
import imutils
import math
import numpy as np
import pandas as pd
import skimage.io
import cv2
from tqdm.auto import trange, tqdm
from re3_tensorflow.tracker import re3_tracker

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Function to implement one tracker on one video

In [2]:
def run_track(tracker_name, tracker_inst, video_name, video_file, frames_df, resize=0):
    '''
    Runs a specified tracking algorithm on a specified video \
    file and calls function to calculate evaluation metrics
    
    Parameters:
    
    tracker_name (str): short name of tracking algo
    tracker_inst: instantiation of tracker class
    video_name: (str) short name of video clip
    video_file: (str): path to video file
    frames_df (pd.DataFrame): DF of ground truth objects in video clip
    resize (int): fraction to which video should be resized (1, 2 or 4). 1 = unchanged (Def), 2 = 1/2, 4 = 1/4
    '''
    
    # CONSTANTS
    THRESHOLD_IOU, THRESHOLD_DIST = 0.4, 0.1
    
    # initialise vars
    frame_count = frames_df['frame'].min()
    fn_count, fp_count, fp_thresh_count, tp_count, tn_count, covered_count = 0, 0, 0, 0, 0, 0
    iter_count = 0
    tracking_fps = 0
    iou_list, dist_list = [], []
    tracker = None
    writer = None
    re3 = False
    
    # Initialize the video stream and pointer to output video file
    vs = cv2.VideoCapture(video_file)
    
    total_frames = int(vs.get(cv2.CAP_PROP_FRAME_COUNT))
    
    # start clock for FPS
    tracker_time = start_timer()
    
#     while True:
    for i in tqdm(range(total_frames), desc=tracker_name + ' progress'):
        # drawn rect flag
        drawn = False
        
        # read the next frame from the file
        grabbed, frame = vs.read()
        
        # if no frame grabbed, reached end of clip
        if not grabbed:
            print ('[INFO] End of clip')
            break
        
        # boolean for whether there is a ground truth object in the frame
        object_exists = (frames_df['frame'] == frame_count).any()

        if resize is not 0:
            frame = cv2.resize(frame, (resize, int(resize*(9/16))))
            
        # will resize the frame for faster processing
        new_width = frame.shape[1]
        new_height = frame.shape[0]
        
        # number of rows in frames determine number of objects, N_objects
        N_objects = len(frames_df.index)
        
        # bb for ground truth obj (relative values)
        gt_bb = (frames_df.loc[frames_df['frame'] == frame_count].squeeze()[1:])*(new_width, new_height, new_width, new_height) if object_exists else 0
        
        # initialize the writer
        if writer is None:
            print()
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            fps = vs.get(5)
            writer = cv2.VideoWriter('./output/' + video_name + '_' + tracker_name + \
                                     '_' + str(new_width) + '.mp4', fourcc, fps, \
                                     (frame.shape[1], frame.shape[0]), True)
        
        if tracker is None:
            # create tracker object
            tracker = tracker_inst
            
            # initialise with bounding box
            if tracker_name == 're3': # special case for re3
                # NB: format is (x1, y1, x2, y2) for re3
                re3 = True
                tracker.track('video', frame[:,:,::-1], \
                                                    (gt_bb[0], gt_bb[1], gt_bb[0] + gt_bb[2], gt_bb[1] + gt_bb[3]))
            
            else: # normal case for opencv trackers
                tracker.init(frame, tuple(gt_bb))
            
            # first frame given, so always draw it
            drawn = draw_rect(frame, gt_bb, frame_count, drawn, False)
            
        if tracker is not None:
            # update the tracker and grab the tracked object
            
            if re3 == True:
                tracking, trk_bb = tracker.track('video', frame[:,:,::-1])
                # convert to (x1, y1, x2, y2)
                trk_bb = (trk_bb[0], trk_bb[1], trk_bb[2] - trk_bb[0], trk_bb[3] - trk_bb[1])
            else:
                tracking, trk_bb = tracker.update(frame)

            # will need modifications for trackers outside of OpenCV-8
            if tracking is False:
                if ~object_exists:
                    # true negative
                    tn_count += 1
                    
                elif object_exists:
                    # false negative
                    fn_count += 1

            elif tracking:
                if object_exists:
                    iou_t, dist_rel_t = compute_iou_dist(gt_bb, trk_bb, new_width, new_height) # pass them in (x1, y1, w, h) format
                    
                    # completeness metrics
                    if iou_t >= THRESHOLD_IOU:
                        covered_count += 1
                    
                    if (dist_rel_t < THRESHOLD_DIST):
                        # true positive
                        tp_count += 1
                        
                    elif (dist_rel_t >= THRESHOLD_DIST):
                        # false positive
                        fp_thresh_count += 1
                        
                    iou_list.append(iou_t)
                    dist_list.append(dist_rel_t)
                    
                elif ~object_exists:
                    # false positive
                    fp_count += 1
                
                # tracking taken place, so draw frame
                drawn = draw_rect(frame, trk_bb, frame_count, drawn, re3)
        
        # increment frame counter
        frame_count += 1
        
        # increment loop counter
        iter_count += 1
        
        # write the frame to disk
        if writer is not None:
            writer.write(frame)
    
    # finalise FPS
    tracking_fps = stop_and_report(frame_count, tracker_time)
    
    if writer is not None:
        writer.release()
        
    # call metrics function - returns dictionary of metrics
    metrics = compute_metrics(fp_thresh_count, fp_count, tp_count, tn_count, fn_count, \
                              covered_count, dist_list, iou_list, N_objects, frame_count, \
                              tracking_fps, clip_name, str(new_width), tracker_name, \
                              THRESHOLD_IOU, THRESHOLD_DIST)
    
    return metrics

In [3]:
def draw_rect(frame, box, frame_count, drawn, re3):
    p1 = (int(box[0]), int(box[1]))
    p2 = (int(box[0] + box[2]), int(box[1] + box[3])) # if re3 != True else (int(box[2]), int(box[3]))
    if drawn != True:
        cv2.rectangle(frame, p1, p2, (255, 0, 0), 2, 1)
    return True

In [4]:
def start_timer():
    return time.time()

In [5]:
def stop_and_report(frames, start_time):
    return frames/(time.time() - start_time)

## Evaluation Metrics
This section defines functions to compute the various evaluation metrics detailed in Chapter 3 in the report. The metrics to compute are as follows:
- Recall: Correctly matched detections as proportion of total ground truth objects (of a sequence).
- Precision/N-SODA: Correctly matched detections as a proportion of total detections (of a sequence).
- FAF: Number of false alarms (incorrect detections) per frame averaged over a sequence.
- SODP: Average overlap between ground truth and system output.
- SOTA: Combines false negatives and false positives without weighting factors.
- SOTP: Average distance between centroids of ground truth and system output.
- TDE: Distance beetween the ground-truth annotation and the tracking result.
- MT: The ground-truth trajectory is covered by the tracker output for more than 80% of its length.
- ML: The ground-truth trajectory is covered by the tracker output for less than 20% of its length.
- PT: The ground-truth trajectory is covered by the tracker output for between 20% and 80% of its length.
- FM: Number of times that a ground-truth trajectory is interrupted in the tracking result, normalised over sequence.
- RS: Ratio of tracks which correctly recover from short term occlusion.
- RL: Ratio of tracks which correctly recover from long term occlusion.

### Terminology
- **True Positive**: Distance between centroids in both x and y directions below distance threshold.
- **True Negative**: No object and no hypothesis.
- **False Negative**: No hypothesis from tracker but object exists.
- **False Positive**: Hypothesis but no object exists.
- **False Positive - Exceeds Threshold**: Distance between centroids in both x and y directions exceeds distance threshold.
- **Distance**: Euclidean distance between centroids of tracker output and ground truth.
- **Covered**: Overlap exceeds THRESHOLD_IOU

### Function to Compute Each Metric

#### Intersection over Union (IoU) and Distance Between Centroids

In [6]:
def compute_iou_dist(box_a, box_b, frame_width, frame_height):
    
    # compute centroids (x1 + half width, y1 + half height)
    centroid_a = (box_a[0] + 0.5*box_a[2], box_a[1] + 0.5*box_a[3])
    centroid_b = (box_b[0] + 0.5*box_b[2], box_b[1] + 0.5*box_b[3])
    
    # (x1, y1, x1+w, y1+h) -> (x1, y1, x2, y2)
    box_a = (box_a[0], box_a[1], box_a[0] + box_a[2], box_a[1] + box_a[3])
    box_b = (box_b[0], box_b[1], box_b[0] + box_b[2], box_b[1] + box_b[3])
    
    # determine the (x, y)-coordinates of the intersection rectangle
    x_a = max(box_a[0], box_b[0])
    y_a = max(box_a[1], box_b[1])
    x_b = min(box_a[2], box_b[2])
    y_b = min(box_a[3], box_b[3])

    # compute the area of intersection rectangle
    area_overlap = max(0, x_b - x_a + 1) * max(0, y_b - y_a + 1)

    # compute the area of both the prediction and ground-truth rectangles
    area_box_a = (box_a[2] - box_a[0] + 1) * (box_a[3] - box_a[1] + 1)
    area_box_b = (box_b[2] - box_b[0] + 1) * (box_b[3] - box_b[1] + 1)

    # compute the intersection over union by taking the intersection
    # area and dividing it by the sum of prediction + ground-truth
    # areas - the interesection area
    iou = area_overlap / float(area_box_a + area_box_b - area_overlap)
    
    dist_x_rel = (centroid_b[0] - centroid_a[0])/frame_width
    dist_y_rel = (centroid_b[1] - centroid_a[1])/frame_height
    
    dist_rel = math.hypot(dist_x_rel, dist_y_rel)
    
    # distance is hypotenuse of two centroid coordinates
    # dist = math.hypot(centroid_b[0] - centroid_a[0], centroid_b[1] - centroid_a[1])

    # return the intersection over union value
    return iou, dist_rel

#### 1. SOTP - Single Object Tracking Precision, or Average Distance
Average distance between boxes only on frames with both an object and a hypothesis, i.e.:
- `fp_thresh_count`
- `tp_count`

`dist_t` should be forced to zero for frames with any results outside of the above (fp, tn, fn).

In [7]:
def metric_sotp(dist_list, N_match):
    total_dist = sum(dist_list)
    return (total_dist/N_match) if N_match != 0 else np.nan

#### 2. SOTA

In [8]:
def metric_sota(fp_thresh_count, fp_count, fn_count, N_objects):
    return 1 - (fp_thresh_count + fn_count)/(N_objects)

#### 3. Recall

In [9]:
def metric_recall(tp_count, N_objects):
    return tp_count/N_objects

#### 4. Precision

In [10]:
def metric_precision(tp_count, fp_thresh_count, fp_count):
    precision = tp_count/(tp_count + fp_count + fp_thresh_count) if tp_count != 0 else 0
    return precision

#### 5. FPF, False Positives per Frame

In [11]:
def metric_fpf(fp_count, fp_thresh_count, N_frames):
    return (fp_count + fp_thresh_count)/N_frames

#### 6. SODP, Single Object Detection Precision

In [12]:
def metric_sodp(iou_list, N_match):
    total_iou = sum(iou_list)
    sodp = total_iou/N_match if N_match != 0 else 0
    return sodp

#### 7. MT

In [13]:
def metric_mt(completeness):
    return 1 if completeness >= 0.8 else 0

#### 8. PT

In [14]:
def metric_pt(completeness):
    return 1 if (completeness > 0.2) and (completeness < 0.8) else 0

#### 9. ML

In [15]:
def metric_ml(completeness):
    return 1 if completeness <= 0.2 else 0

#### 10. FM, Fragmentation

In [16]:
def metric_fm(fp_thresh_count, fp_count, fn_count, N_objects):
    return (fp_thresh_count + fn_count)/(N_objects)

### Metrics Function
This function calculates all metrics and returns them in a dictionary

In [17]:
def compute_metrics(fp_thresh_count, fp_count, tp_count, tn_count, fn_count, \
                    covered_count, dist_list, iou_list, N_objects, N_frames, \
                    tracking_fps, clip_name, width, tracker_name, THRESHOLD_IOU,\
                    THRESHOLD_DIST):
    
    N_match = fp_thresh_count + tp_count
    completeness = covered_count/N_match if N_match != 0 else 0
    
    sotp = metric_sotp(dist_list, N_match)
    sota = metric_sota(fp_thresh_count, fp_count, fn_count, N_objects)
    recall = metric_recall(tp_count, N_objects)
    precision = metric_precision(tp_count, fp_thresh_count, fp_count)
    fpf = metric_fpf(fp_count, fp_thresh_count, N_frames)
    sodp = metric_sodp(iou_list, N_match)
    mt = metric_mt(completeness)
    pt = metric_pt(completeness)
    ml = metric_ml(completeness)
    fm = metric_fm(fp_thresh_count, fp_count, fn_count, N_objects)
    
    results_dict = {'Clip': clip_name,
                    'Res': width,
                    'Tracker': tracker_name,
                    'IOU_Thresh': THRESHOLD_IOU,
                    'Dist_Thresh': THRESHOLD_DIST,
                    'SOTP': np.round(sotp,4),
                    'SOTA': np.round(sota,4),
                    'Recall': np.round(recall,4),
                    'Precision': np.round(precision,4),
                    'FPF': np.round(fpf, 4),
                    'SODP': np.round(sodp,4),
                    'MT': mt,
                    'PT': pt,
                    'ML': ml,
                    'Completeness': np.round(completeness,2),
                    'FM': np.round(fm,2),
                    'FPS': np.round(tracking_fps,2),
                    'fp_count': fp_count,
                    'fp_thresh_count': fp_thresh_count,
                    'tp_count': tp_count,
                    'fn_count': fn_count,
                    'tn_count': tn_count,
                    'N_objects': N_objects,
                    'N_frames': N_frames,
                    'N_match': N_match}
    
    return results_dict

### List of trackers to run through for each video file

In [18]:
%%capture
trackers = {
            're3': re3_tracker.Re3Tracker()}
#             'mil': cv2.TrackerMIL_create()}
#             'boosting': cv2.TrackerBoosting_create(),
#             'kcf': cv2.TrackerKCF_create(),
#             'tld': cv2.TrackerTLD_create(),
#             'medianflow': cv2.TrackerMedianFlow_create(),
#             'goturn': cv2.TrackerGOTURN_create(),
#             'mosse': cv2.TrackerMOSSE_create(),
#             'csrt': cv2.TrackerCSRT_create()}
#                 # 'pysot': base_tracker.BaseTracker()}

INFO:tensorflow:Restoring parameters from /Users/kierandonnelly/thesis/re3_tensorflow/logs/checkpoints/model.ckpt-260946


In [19]:
# clips = {'Bike02': '/Users/kierandonnelly/thesis/raw_clips/Bike02.mp4'}
# clips = {'Bike03': '/Users/kierandonnelly/thesis/raw_clips/Bike03.mp4'}
# clips = {'Bike04': '/Users/kierandonnelly/thesis/raw_clips/Bike04.mp4'}
# clips = {'Bike05': '/Users/kierandonnelly/thesis/raw_clips/Bike05.mp4'}
# clips = {'Bike07': '/Users/kierandonnelly/thesis/raw_clips/Bike07.mp4'}
# clips = {'Bike08': '/Users/kierandonnelly/thesis/raw_clips/Bike08.mp4'}
# clips = {'Bike09': '/Users/kierandonnelly/thesis/raw_clips/Bike09.mp4'}
# clips = {'Ski01': '/Users/kierandonnelly/thesis/raw_clips/Ski01.mp4'}
# clips = {'Ski02': '/Users/kierandonnelly/thesis/raw_clips/Ski02.mp4'}
# clips = {'Ski03': '/Users/kierandonnelly/thesis/raw_clips/Ski03.mp4'}
# clips = {'Ski04': '/Users/kierandonnelly/thesis/raw_clips/Ski04.mp4'}
# clips = {'Snowboard01': '/Users/kierandonnelly/thesis/raw_clips/Snowboard01.mp4'}
# clips = {'Sup01': '/Users/kierandonnelly/thesis/raw_clips/Sup01.mp4'}
# clips = {'Surf01': '/Users/kierandonnelly/thesis/raw_clips/Surf01.mp4'}
# clips = {'Wake01': '/Users/kierandonnelly/thesis/raw_clips/Wake01.mp4'}
# clips = {'Car01': '/Users/kierandonnelly/thesis/raw_clips/Car01.mp4'}
# clips = {'Human01': '/Users/kierandonnelly/thesis/raw_clips/Human01.mp4'}
# clips = {'Slalom01': '/Users/kierandonnelly/thesis/raw_clips/Slalom01.mp4'}

clips = {'Bike02': '/Users/kierandonnelly/thesis/raw_clips/Bike02.mp4',
         'Bike03': '/Users/kierandonnelly/thesis/raw_clips/Bike03.mp4',
         'Bike04': '/Users/kierandonnelly/thesis/raw_clips/Bike04.mp4',
         'Bike05': '/Users/kierandonnelly/thesis/raw_clips/Bike05.mp4',
         'Bike07': '/Users/kierandonnelly/thesis/raw_clips/Bike07.mp4',
         'Bike08': '/Users/kierandonnelly/thesis/raw_clips/Bike08.mp4',
         'Bike09': '/Users/kierandonnelly/thesis/raw_clips/Bike09.mp4',
         'Ski01': '/Users/kierandonnelly/thesis/raw_clips/Ski01.mp4',
         'Ski02': '/Users/kierandonnelly/thesis/raw_clips/Ski02.mp4',
         'Ski03': '/Users/kierandonnelly/thesis/raw_clips/Ski03.mp4',
         'Ski04': '/Users/kierandonnelly/thesis/raw_clips/Ski04.mp4',
         'Snowboard01': '/Users/kierandonnelly/thesis/raw_clips/Snowboard01.mp4',
         'Sup01': '/Users/kierandonnelly/thesis/raw_clips/Sup01.mp4',
         'Surf01': '/Users/kierandonnelly/thesis/raw_clips/Surf01.mp4',
         'Wake01': '/Users/kierandonnelly/thesis/raw_clips/Wake01.mp4',
         'Car01': '/Users/kierandonnelly/thesis/raw_clips/Car01.mp4',
         'Human01': '/Users/kierandonnelly/thesis/raw_clips/Human01.mp4',
         'Slalom01': '/Users/kierandonnelly/thesis/raw_clips/Slalom01.mp4'}

### Iterate over videos in directory and run tracker

In [20]:
metrics_list = []
resize = 0

# iterate over clips
for clip_name, clip_loc in clips.items():
    
    # retrieve frame data for clip
    frames = pd.read_csv('./frame_data/' + str(clip_name) + '.csv', sep=';')
    
    # iterate over tracker dictionary
    for tracker_name, tracker_inst in tqdm(trackers.items(), desc=clip_name + ' progress: '):

            # call tracking function
            metrics = run_track(tracker_name, tracker_inst, clip_name, clip_loc, frames, resize)
            
            metrics_list.append(metrics)
            
print ('[INFO] Finished')
    
# convert list of metrics dicts to df
results_df = pd.DataFrame(metrics_list, columns=['Clip', 'Res', 'Tracker', 'IOU_Thresh', 'Dist_Thresh', 'SOTP',\
                                                 'SOTA', 'Recall','Precision', 'FPF', 'SODP', 'MT', 'PT', 'ML',\
                                                 'Completeness','FM', 'FPS', 'fp_count', 'fp_thresh_count',\
                                                 'tp_count','fn_count', 'tn_count', 'N_objects', 'N_frames', 'N_match'])

res = 'native' if resize == 0 else resize

# save to CSV
results_df.to_csv('metrics_' + clip_name + '_' + str(res)  + '.csv', index=False)

HBox(children=(IntProgress(value=0, description='Bike02 progress: ', max=1, style=ProgressStyle(description_wi…

HBox(children=(IntProgress(value=0, description='re3 progress', max=98, style=ProgressStyle(description_width=…


Current tracking speed:   4.993 FPS
Current image read speed: 117.468 FPS
Mean tracking speed:      0.000 FPS





HBox(children=(IntProgress(value=0, description='Bike03 progress: ', max=1, style=ProgressStyle(description_wi…

HBox(children=(IntProgress(value=0, description='re3 progress', max=92, style=ProgressStyle(description_width=…


Current tracking speed:   18.344 FPS
Current image read speed: 134.774 FPS
Mean tracking speed:      17.062 FPS

[INFO] End of clip

[INFO] Finished
