# Helmet Mapping + Deepsort

This notebook is basically an effort to 

- make a more rationally plausible mapping step based on projections and Hungarian Algorithm assignment, 
- perform optimal initialization of parameters using basin hopping algorithm, and
- apply consecutive local parameter search using an appropriate Kalman Filter. 

The basis for it is the previous work of [Fireflies](https://www.kaggle.com/firefliesqn/tuning-deepsort-helmet-mapping)

In [None]:
# Install helmet-assignment helper code
!pip install ../input/helmet-assignment-helpers/helmet-assignment-main/ > /dev/null 2>&1
from helmet_assignment.score import NFLAssignmentScorer, check_submission
from helmet_assignment.features import add_track_features
from helmet_assignment.video import video_with_predictions
from IPython.display import Video, display

In [None]:
import numpy as np
import pandas as pd
import itertools
import glob
import os
import sys
import torch
import cv2
import traceback
import time
from sklearn.metrics import accuracy_score
from tqdm.auto import tqdm
from multiprocessing import Pool
from matplotlib import pyplot as plt
import random
import torchvision
import shutil
from joblib import Parallel, delayed
from scipy.spatial.transform import Rotation
from math import pi, ceil, sqrt
from scipy.spatial import distance_matrix
from scipy.optimize import linear_sum_assignment
from statistics import mode
from sklearn.cluster import k_means
import importlib.util

## Settings and loading data

Note I've extracted `max_iter`, `DIG_STEP` and `DIG_MAX` to the top for easy experimentation. I've also modified the code to run in debug mode if running on the public test set.

In [None]:
n_test_videos = len(os.listdir('../input/nfl-health-and-safety-helmet-assignment/test/'))
# Run in debug mode unless during submission
if n_test_videos == 6:
    debug = True
else:
    debug = False
# Configurables
n_debug_samples = 1
RANDOM_STATE = 42
CONF_THRE = 0.4
max_iter = 1000
DIG_STEP = 3
DIG_MAX = DIG_STEP*10

# Read in the data.

BASE_DIR = '../input/nfl-health-and-safety-helmet-assignment'

labels = pd.read_csv(f'{BASE_DIR}/train_labels.csv')
if debug:
    tracking = pd.read_csv(f'{BASE_DIR}/train_player_tracking.csv')
    helmets = pd.read_csv(f'{BASE_DIR}/train_baseline_helmets.csv')
else:
    tracking = pd.read_csv(f'{BASE_DIR}/test_player_tracking.csv')
    helmets = pd.read_csv(f'{BASE_DIR}/test_baseline_helmets.csv')
helmets['frame'] = helmets.video_frame.apply(lambda x: int(x.split('_')[-1]))    
tracking = add_track_features(tracking)

In [None]:
def add_cols(df):
    df['game_play'] = df['video_frame'].str.split('_').str[:2].str.join('_')
    if 'video' not in df.columns:
        df['video'] = df['video_frame'].str.split('_').str[:3].str.join('_') + '.mp4'
    return df
helmets = add_cols(helmets)
if debug:
    labels = add_cols(labels)
    # Select `n_debug_samples` worth of videos to debug with
    sample_videos = labels['video'].drop_duplicates() \
        .sample(n_debug_samples, random_state=RANDOM_STATE).tolist()
    sample_gameplays = ['_'.join(x.split('_')[:2]) for x in sample_videos]
    tracking = tracking[tracking['game_play'].isin(sample_gameplays)]
    helmets = helmets[helmets['video'].isin(sample_videos)]
    labels = labels[labels['video'].isin(sample_videos)]
tracking.shape, helmets.shape, labels.shape

In [None]:
def find_nearest(tracking, value):
    value = int(value)
    array = np.asarray(tracking['est_frame']).astype(int)
    unique_frames = np.unique(array)
    idx = np.argmin(np.abs(unique_frames - value))
    if value > unique_frames[idx]:
        curr_frame = tracking[tracking['est_frame'] == unique_frames[idx]]
        try:
            next_frame = tracking[
                    tracking['est_frame'] == unique_frames[idx + 1]]
        except IndexError:
            return curr_frame

    elif value < unique_frames[idx]:
        next_frame = tracking[tracking['est_frame'] == unique_frames[idx]]
        try:
            curr_frame = tracking[
                tracking['est_frame'] == unique_frames[idx - 1]]
        except IndexError:
            return next_frame
        
    else:
        return tracking[tracking['est_frame'] == unique_frames[idx]].reset_index(drop=True)
    try:
        next_frame = next_frame.set_index('player')
        curr_frame = curr_frame.set_index('player')

        diff = next_frame.est_frame.iloc[0] - curr_frame.est_frame.iloc[0]
        cols = ['x','y', 'a', 'dir', 's', 'o', 'est_frame']
        if diff != 0:
            speed = (next_frame[cols] - curr_frame[cols]) / diff
        else:
            speed = 0
        ret = next_frame.copy()
        ret[cols] = curr_frame[cols] + (value - curr_frame.est_frame.iloc[0]) * speed
        ret = ret.dropna(axis=0, subset=['est_frame'])
        ret['est_frame'] = ret['est_frame'].astype(int)
    except:
        print(next_frame)
        print(curr_frame)
        print(ret['est_frame'])
        raise
    return ret.reset_index()


def norm_arr(a):
    a = a-a.min()
    a = a/a.max()
    return a
    
def dist(a1, a2):
    return np.linalg.norm(a1-a2)

def dist_for_different_len(a1, a2):
    assert len(a1) >= len(a2), f'{len(a1)}, {len(a2)}'
    len_diff = len(a1) - len(a2)
#     a2 = norm_arr(a2)
    if len_diff == 0:
#         a1 = norm_arr(a1)
        return dist(a1,a2), ()
    else:
        min_dist = 10000
        min_detete_idx = None
        cnt = 0
        del_list = list(itertools.combinations(range(len(a1)),len_diff))
        if len(del_list) > max_iter:
            del_list = random.sample(del_list, max_iter)
        for detete_idx in del_list:
            this_a1 = np.delete(a1, detete_idx)
#             this_a1 = norm_arr(this_a1)
            this_dist = dist(this_a1, a2)
            #print(len(a1), len(a2), this_dist)
            if min_dist > this_dist:
                min_dist = this_dist
                min_detete_idx = detete_idx
                
        return min_dist, min_detete_idx
        
def rotate_arr(u, t, deg=True):
    if deg == True:
        t = np.deg2rad(t)
    R = np.array([[np.cos(t), -np.sin(t)],
                  [np.sin(t),  np.cos(t)]])
    return  np.dot(R, u)

def dist_rot(tracking_df, a2):
    tracking_df = tracking_df.sort_values('x')
    x = tracking_df['x']
    y = tracking_df['y']
    min_dist = 10000
    min_idx = None
    min_x = None
    for dig in range(-DIG_MAX,DIG_MAX+1,DIG_STEP):
        arr = rotate_arr(np.array((x,y)), dig)
        this_dist, this_idx = dist_for_different_len(np.sort(arr[0]), a2)
        if min_dist > this_dist:
            min_dist = this_dist
            min_idx = this_idx
            min_x = arr[0]
    tracking_df['x_rot'] = min_x
    player_arr = tracking_df.sort_values('x_rot')['player'].values
    players = np.delete(player_arr,min_idx)
    return min_dist, players

def dist_matrix(points, dense_view=True):
    z = np.array([complex(c[0], c[1]) for c in points])
    if dense_view:
        return np.abs(z[..., np.newaxis] - z)[np.triu_indices(len(z),1)]
    else:
        return np.abs(z[..., np.newaxis] - z)


In [None]:
def mapping_df_fallback(tracking, df, previous_mapped=None):
    gameKey,playID,view,frame = df.video_frame.iloc[0].split('_')
    gameKey = int(gameKey)
    playID = int(playID)
    frame = int(frame)
    this_tracking = tracking[(tracking['gameKey']==gameKey) & (tracking['playID']==playID)]
    this_tracking = find_nearest(this_tracking, frame)
    len_this_tracking = len(this_tracking)
    df['center_h_p'] = (df['left']+df['width']/2).astype(int)
    df['center_h_m'] = (df['left']+df['width']/2).astype(int)*-1
    if 'conf' in df.columns:
        df = df[df['conf']>CONF_THRE].copy()
    if len(df) > len_this_tracking:
        df = df.tail(len_this_tracking)
    df_p = df.sort_values('center_h_p').copy()
    df_m = df.sort_values('center_h_m').copy()
    
    if view == 'Endzone':
        this_tracking['x'], this_tracking['y'] = this_tracking['y'].copy(), this_tracking['x'].copy()
    a2_p = df_p['center_h_p'].values
    a2_m = df_m['center_h_m'].values

    min_dist_p, min_detete_idx_p = dist_rot(this_tracking ,a2_p)
    min_dist_m, min_detete_idx_m = dist_rot(this_tracking ,a2_m)
    if min_dist_p < min_dist_m:
        min_dist = min_dist_p
        min_detete_idx = min_detete_idx_p
        tgt_df = df_p
    else:
        min_dist = min_dist_m
        min_detete_idx = min_detete_idx_m
        tgt_df = df_m
    #print(video_frame, len(this_tracking), len(df), len(df[df['conf']>CONF_THRE]), this_tracking['x'].mean(), min_dist_p, min_dist_m, min_dist)
    tgt_df['label'] = min_detete_idx
    unmatched = this_tracking[~this_tracking['player'].isin(tgt_df['label'])]
    return tgt_df[df.columns.tolist() + ['label']], this_tracking, {'fallback_mapping_used':True}, unmatched

In [None]:
from pykalman import KalmanFilter
from typing import Dict, Iterable, List
from numpy import ma
def cartesian_product(arrays):
    la = len(arrays)
    dtype = np.find_common_type([np.array(a).dtype for a in arrays], [])
    arr = np.empty([len(a) for a in arrays] + [la], dtype=dtype)
    for i, a in enumerate(np.ix_(*arrays)):
        arr[..., i] = a
    return arr.reshape(-1, la)


def get_observation_matrix(params_len):
    return np.pad(np.eye(params_len), ((0,0),(0,2*params_len)))
def get_transition_matrix(params_len):
    return np.eye(3 * params_len) + np.diag(
                    np.ones(2 * params_len), params_len) + np.diag(
                    0.5 * np.ones(params_len), 2 * params_len)

class KalmanFilterRoutine:
    def __init__(self, params: List[str], init_frames=5):
        self.init_frames = init_frames
        self.kf = None
        self.params_buffer = []
        self.frame = 0
        self.means = None
        self.covariances = None
        self.observation_matrix = None
        self.transition_matrix = None
        self.params = params
        self.params_len = len(params)

    @property
    def is_ready(self):
        return self.frame >= self.init_frames
        
    def update(self, **params):
        self.frame += 1
        if self.frame < self.init_frames:
            self.params_buffer.append(params)
        else:
            if self.frame == self.init_frames:
                self.params_buffer.append(params)
                self.transition_matrix = get_transition_matrix(self.params_len)
                self.observation_matrix = get_observation_matrix(self.params_len)
                params_df = pd.DataFrame(self.params_buffer)
                missing = [p for p in self.params if p not in params_df.columns]
                params_df[missing] = np.nan
                params_df = params_df.fillna(0)
                params_df = params_df[self.params]
                initial_state_mean = np.pad(params_df.mean(axis=0), (
                    (0, 2*len(self.params))))
                self.kf = KalmanFilter(transition_matrices=self.transition_matrix, 
                                       observation_matrices=self.observation_matrix,
                                      initial_state_mean=initial_state_mean, random_state=RANDOM_STATE)
                if len(params_df) < 3:
                    params_df = pd.concat(
                        [params_df] + [params_df.iloc[[-1]] for _ in range(3 - len(params_df))])
                self.means, self.covariances = self.kf.filter(params_df.values)
                self.means = self.means.tolist()
                self.covariances = self.covariances.tolist()
            if self.frame > self.init_frames:
                observation = ma.asarray(np.array([params[k] if k in params else np.nan for k  in self.params]))
                observation[np.isnan(observation)] = ma.masked
                state_means, state_covs = self.kf.filter_update(
                    self.means[-1],
                    self.covariances[-1],
                    observation =observation)
                self.means.append(state_means)
                self.covariances.append(state_covs)
            self.updated_params =  np.array(self.means[-1][:len(self.params)])
            self.updated_params_der = np.array(self.means[-1][len(self.params): 2 * len(self.params)])
            self.updated_params_sder = np.array(self.means[-1][2 * len(self.params): 3 * len(self.params)])
        

In [None]:
from collections import deque

class ParamsCombinationsGenerator:
    # Uses Kalman Filter with Taylor expansion up to the 2nd derivative
    def __init__(self, params_ranges : Dict[str, Iterable],
                 strictly_positive_params:List[str]=None,
                 min_perturbations: Dict[str, float]=None,
                 max_perturbations: Dict[str, float]=None,
                 kalman_init=5,
                 allowed_change_ratio=1, n_steps=5, 
                 obey_original_ranges=True, use_kalman=True, previous_frames_to_keep=10):
        self.use_kalman = use_kalman
        self.kalman_init = kalman_init
        self.allowed_change_ratio = allowed_change_ratio
        self.params_ranges = params_ranges
        self.strictly_positive_params = strictly_positive_params
        self.n_steps = n_steps
        self.previous_frames_to_keep = previous_frames_to_keep
        self.buffer_starts = deque(maxlen=previous_frames_to_keep)
        self.buffer_ends = deque(maxlen=previous_frames_to_keep)
        self.params_buffer = []
        self.ori_options = [0, 1]
        self.frame = 0
        self.params = list(params_ranges.keys())
        
        self.means = None
        self.covariances = None
        self.observation_matrix = None
        self.transition_matrix = None
        self.obey_original_ranges = obey_original_ranges
        if obey_original_ranges:
            self.ranges_limits = np.array([[np.min(params_ranges[p]),
                                            np.max(params_ranges[p])] for p in self.params])
        if min_perturbations is None:
            self.min_perturbations = np.zeros(len(self.params))
        else:
            self.min_perturbations = np.array([min_perturbations[x]  if x in min_perturbations else 0 for x in self.params])
        if max_perturbations is not None:
            self.max_perturbations = np.array([max_perturbations[x]  if x in max_perturbations else
                                               np.inf for x in self.params])
        else:
            self.max_perturbations = np.zeros(len(self.params)) + np.inf
        self.kf = KalmanFilterRoutine(self.params, self.kalman_init)
        
    
    def reset(self):
        self.kf = KalmanFilterRoutine(self.params, self.kalman_init)
    
    def zero_buffer(self):
        self.buffer_starts = deque(maxlen=self.previous_frames_to_keep)
        self.buffer_ends = deque(maxlen=self.previous_frames_to_keep)
        
    @property
    def is_ready(self):
        return self.kf.is_ready
    
    def update(self, **params):
        self.prev_params = np.array([params[k] for k in self.params])
        if not self.use_kalman:
            return
        self.kf.update(**params)

    
    def get_bounds(self):    
        if not self.use_kalman or not self.kf.is_ready:
            if not self.buffer_starts:
                ranges = [self.params_ranges[x] for x in self.params]
            else:
                ranges = [(start,end) for start, end in zip(0.8 * np.mean(self.buffer_starts, axis=0), 
                                                            1.2 * np.mean(self.buffer_ends, axis=0))]
        else:
            diff1 = self.kf.updated_params - self.prev_params
            diff2 = self.kf.updated_params_der + 0.5 * self.kf.updated_params_sder
            
            
            changes =  np.maximum(np.abs(diff1 + diff2), self.min_perturbations)
            perturbations = changes
            starts = self.prev_params - perturbations
            ends = self.prev_params + perturbations
            assert np.all(ends - starts > 0), (changes, self.min_perturbations)
            ranges = [(start, end) for start, end in zip(starts, ends)]
            self.buffer_starts.append(starts)
            self.buffer_ends.append(ends)
                          
        return ranges    
    

In [None]:
SIDELINE_START_THRES = 50
MAX_COORDS = (120, 53.33)
MAX_COST_SIDELINE = 50
MAX_COST_ENDZONE = 100
from scipy.optimize import basinhopping, minimize
def cost_function(this_tracking, expanded, im_centers, 
                  camera_height, camera_length, max_p, 
                  xdig, zdig, scaling, max_cost_thres=None, previous_tracking=None,
                  ret_cost_only=False):
    assert ~np.any(np.isnan([xdig]))


    z_rot = Rotation.from_rotvec([0, 0, zdig]).as_matrix()
    #camera sits somewhere near the middle of the appropriate x side of the field
    camera_pos = np.array([camera_length,0,camera_height])

    expanded = expanded - camera_pos 

    z_rot = Rotation.from_rotvec([0, 0, zdig]).as_matrix()
    z_rotated = (z_rot @ expanded.T).T
    x_rot = Rotation.from_rotvec([xdig,0,0]).as_matrix()
    x_rotated =  (x_rot @ z_rotated.T).T
    if not ret_cost_only:
        # sort the closer to the observer to be the latter in the list of players coordinates
        eliminated_dim = x_rotated[:, 1]
        sorting_order = np.argsort(eliminated_dim)[::-1]
        x_rotated = x_rotated[sorting_order, :]
        this_tracking = this_tracking.iloc[sorting_order]
    x_rotated = x_rotated[:, [0, 2]]
    
    opt_params = None
    opt_rl_remapped = None
    scaled = scaling * x_rotated
    # the origin is now assumed to be at the center of the image, so we need to move it to the bottom left first
    scaled = scaled + np.array([1280,720]) / 2
    # and then revert the y axis
    scaled[:,1] = 720 - scaled[:,1]
        
    d = distance_matrix(im_centers,
                        scaled)
    match_to, match_from = linear_sum_assignment(d)
    if not ret_cost_only:        
        match_to = match_to[np.argsort(match_from)]
        match_from = np.sort(match_from)
        
        
    as_costs = d[match_to, match_from]
    cost = np.mean(as_costs)
    

    if max_cost_thres is not None:
        mask = as_costs < max_cost_thres
        match_from = match_from[mask]
        match_to = match_to[mask]
        as_costs = as_costs[mask]
    if not ret_cost_only:
        reduced_tracking = this_tracking.iloc[match_from].copy()
        reduced_tracking[['x2im', 'y2im']] = scaled[match_from, :]
        reduced_tracking[['im2x', 'im2y']] = im_centers[match_to, :]
        um_index = list(set(range(len(this_tracking))) - set(match_from)) 
        unmatched_tracking = this_tracking.iloc[um_index].copy()
        unmatched_tracking[['x2im', 'y2im']] = scaled[um_index, :]
    assert np.all(np.isfinite(scaled))
    adraneia = None
    p = None
    if previous_tracking is not None and 'x2im' in previous_tracking.columns:
        if match_from.size > 0:
            previous_scaled = previous_tracking[['x2im', 'y2im']].values
            # there are nans because of image points not matched to GPS points (shown as nan)
            previous_scaled = previous_scaled[np.all(np.isfinite(previous_scaled), axis=1), :]
            d = distance_matrix(previous_scaled,
                        scaled)
            adraneia = d[linear_sum_assignment(d)].mean()

            cost = 0.7 * cost + 0.3 * adraneia
     

    if ret_cost_only:
        return cost                    
                        
                        
    params = dict(xdig=xdig, zdig=zdig, scaling=scaling,
                  camera_height=camera_height,
                  camera_length=camera_length,
                  cost=cost,
                  p=p,
                  c=np.mean(scaled, axis=0),
                  adraneia=adraneia,
                  match_from=match_from,
                  match_to=match_to,
                  match_costs=as_costs,
                  max_cost=(as_costs.max() if np.any(as_costs)
                            else max_cost_thres))
    return params, reduced_tracking, unmatched_tracking

In [None]:
def check_duplicates(x, cols=['left','top']):
    assert np.all(x.groupby(cols).size() == 1), (x, x.groupby(cols).size())
def mapping_df(combs_generator, tracking, df, previous_mapped=None, 
               available_oris=(0,1), ignore_starting_preproc=False,
               force_local_minimize=True, check_mapping=False, ratio = 0.8):
    gameKey,playID,view,frame = df.video_frame.iloc[0].split('_')
    gameKey = int(gameKey)
    playID = int(playID)
    frame = int(frame)
    this_tracking = tracking[(tracking['gameKey']==gameKey) & (tracking['playID']==playID)]
    this_tracking = find_nearest(this_tracking, frame)
    df = df.reset_index(drop=True)
    
    max_p = MAX_COORDS
    if view == 'Endzone':
        max_cost_thres = MAX_COST_ENDZONE
    else:
        max_cost_thres = MAX_COST_SIDELINE
    same_sgns = 0 # the projected axes on the image need to be reflected as of x or as of y
    if view == 'Endzone':
        this_tracking['x'], this_tracking['y'] = this_tracking['y'].copy(), this_tracking['x'].copy()
        max_p = max_p[::-1]
        # the projected axes need to be reflected both as of x and as of y or stay as is
        same_sgns = 1 
    if 'conf' in df.columns:
        df = df[df['conf']>CONF_THRE].copy()
    df_num = len(df)
    if not ignore_starting_preproc and (view == 'Sideline') and not combs_generator.is_ready:
        inc_mask = (df['top'] >= SIDELINE_START_THRES) & (df['top'] < 720 - SIDELINE_START_THRES)
        if not np.all(inc_mask):
            print(f"Removing {(~inc_mask).sum()} bounding boxes that reside in the top or bottom edge of the screen")
            df = df[inc_mask].copy()
        
    im_centers = df[['left', 'top']].values+ (df[['width', 'height']]/2).values
    rl_centers = this_tracking[['x','y']].values
    
    im_centers = im_centers
    
    opt_params = None
    costs = {0: [], 1: []}
    min_cost = 1e7
    opt_params = None
    for change_ori in available_oris:
        # assume 1 yard average height 
        expanded = np.hstack([rl_centers,
                              1 + np.zeros((len(rl_centers),1))])
        if same_sgns:
            c_translation = np.zeros(2)
            c_scaling = np.ones(2)
            if change_ori:
                c_translation = max_p
                c_scaling = - np.ones(2)
        else:
            if change_ori:
                c_translation = np.array([0, max_p[1]])
                c_scaling = np.array([1, -1])
            else:
                c_translation = np.array([max_p[0], 0])
                c_scaling = np.array([-1, 1])
        expanded[:, :2] = c_scaling * expanded[:, :2] + c_translation
        expanded[:, 1] = max_p[1] - expanded[:, 1]


        to_opt = combs_generator.params
        x0 = [np.mean(combs_generator.params_ranges[x]) for x in to_opt]
        bounds = combs_generator.get_bounds()
        min_func = lambda p: cost_function(
            this_tracking=this_tracking,
            expanded=expanded,
            im_centers=im_centers,
            camera_length=p[to_opt.index('camera_length')],
            camera_height=p[to_opt.index('camera_height')],
            max_p=max_p,
            zdig=p[to_opt.index('zdig')],                                 
            xdig=p[to_opt.index('xdig')],
            scaling=p[to_opt.index('scaling')],
            ret_cost_only=True,
            previous_tracking=previous_mapped,
            max_cost_thres=None)
        
        if force_local_minimize or combs_generator.is_ready:
            ret = minimize(min_func, x0=x0,
                bounds=bounds)
#             if ret.fun > max_cost_thres * ratio:
#                 print(f'Resetting due to high cost({ret.fun} > {max_cost_thres * ratio})')
#                 combs_generator.reset()
#                 bounds = combs_generator.get_bounds()
        if not combs_generator.is_ready and not force_local_minimize:
            ret = basinhopping(min_func,
                               x0=x0,niter=100 if combs_generator.buffer_starts else 6000,
                               niter_success=5 if combs_generator.buffer_starts else 150,
                minimizer_kwargs=dict(bounds=bounds), seed=RANDOM_STATE)
            if ret.fun > max_cost_thres * ratio:
                print('Basin Hopping Unsuccessful! Resetting due to high cost('
                      f'{ret.fun} > {max_cost_thres * ratio})')
                combs_generator.reset()
                    
        cost = ret.fun
        if cost < min_cost:
            xdig = ret.x[to_opt.index('xdig')]
            zdig = ret.x[to_opt.index('zdig')]
            scaling = ret.x[to_opt.index('scaling')]
            camera_height = ret.x[to_opt.index('camera_height')]
            camera_length = ret.x[to_opt.index('camera_length')]
            found_params, found_tracking, unmatched_tracking = cost_function(
                this_tracking=this_tracking,
                expanded=expanded, 
                im_centers=im_centers,
                camera_height=camera_height,
                camera_length=camera_length,
                max_p=max_p,
                zdig=zdig,
                xdig=xdig,
                scaling=scaling,
                ret_cost_only=False, 
                previous_tracking=previous_mapped,
                max_cost_thres=max_cost_thres)
            min_cost = cost
            opt_ret = ret
            opt_params = found_params
            opt_params['change_ori'] = change_ori
            opt_tracking = found_tracking
            opt_unmatched = unmatched_tracking
    

    if opt_params is None:
        combs_generator.reset()
        if debug:
            print('Failure')
            print(ret)
        raise
    match_to = opt_params['match_to']
    match_from = opt_params['match_from']
    match_costs = opt_params['match_costs']
    df['view'] = view
    df[['im_x_remapped', 'im_y_remapped']] = im_centers
    df_cols = [col for col in df.columns if col not in ['x','y','x2im', 'y2im', 'player']]
    to_double_match = opt_unmatched.copy()
    to_double_match_flag = ((to_double_match['x2im'] < 1280) &
                       (to_double_match['x2im'] >= 0) &
                       (to_double_match['y2im'] < 720) &
                       (to_double_match['y2im'] >= 0))
    # deactivating it
    to_double_match_flag = np.zeros_like(to_double_match_flag)
    double_match_aug = None
    unmatched_df_inds = list(set(range(len(df))) - set(match_to))
    if to_double_match_flag.any():
        to_double_match = to_double_match[to_double_match_flag].copy()
        dd = distance_matrix(im_centers, to_double_match[['x2im', 'y2im']].values)
        dmatch_to, dmatch_from  = linear_sum_assignment(dd)
        dcosts = dd[dmatch_to, dmatch_from]
        dflag = dcosts <= max_cost_thres
        dmatch_to = dmatch_to[dflag]
        dmatch_from = dmatch_from[dflag]
        
#         unmatched_df_inds = list(set(unmatched_df_inds) - set(dmatch_to))
        double_match_aug = df[df_cols].iloc[dmatch_to].copy().reset_index(drop=True)
        double_match_aug['left'] += 1
        double_match_aug[['x','y','x2im', 'y2im', 'player']] = to_double_match.iloc[dmatch_from][
            ['x','y','x2im', 'y2im', 'player']].values
        double_match_aug['cost'] = dd[dmatch_to, dmatch_from]
        opt_unmatched = pd.concat(
            [opt_unmatched[~to_double_match_flag],
             to_double_match.iloc[list(set(range(len(to_double_match))) - set(dmatch_from))]],axis=0)
        
        
    combs_generator.update(zdig=opt_params['zdig'],
                           xdig=opt_params['xdig'],
                           scaling=opt_params['scaling'],
                           camera_height=opt_params['camera_height'],
                           camera_length=opt_params['camera_length'])
    
    
    
            
    ret = pd.concat(
        [
            df[df_cols].iloc[match_to].reset_index(drop=True),
            opt_tracking[['x','y','x2im', 'y2im', 'player']].reset_index(drop=True)
        ],
        axis=1).set_index(df.iloc[match_to].index)
    ret['cost'] = match_costs
    
    if double_match_aug is not None:
        
        ret = pd.concat([ret, double_match_aug],axis=0).reset_index(drop=True)
        
        
        opt_tracking = pd.concat(
            [opt_tracking, to_double_match.iloc[dmatch_from]],axis=0).reset_index(drop=True)
        
    
    ret = pd.concat([ret, df.iloc[unmatched_df_inds][df_cols]], axis=0).reset_index(drop=True) # null labels for unmatched
    assert df.left.isin(ret.left).all(), (sorted(df.left), sorted(ret.left))
    ret['cost'] = ret['cost'].fillna(np.inf)
    if double_match_aug is not None:
        x = set(unmatched_df_inds) | set(match_to) |set(dmatch_to)
        assert  len(x) == df_num, (len(df), x, df_num) 
        df = pd.concat([df, double_match_aug[df_cols]], axis=0).reset_index(drop=True)
        assert len(df) >= df_num, (len(df), df_num)
        assert df.left.isin(ret.left).all()
    
    ret.rename(columns={'player':'label'},inplace=True)
    
    
    if previous_mapped is not None:
        previous_mapped = previous_mapped[~previous_mapped['label'].isnull()]
    if previous_mapped is not None and (len(previous_mapped) > 0) and ('cost' in previous_mapped.columns):
        ori_len = len(ret)
        
        compared_ret, to_reassign_labels = compare_and_assign(ret, previous_mapped)
        if ~to_reassign_labels.empty:
            max_permitted_costs = compared_ret['cost'].max()
            tracking_to_check = pd.concat([opt_tracking, opt_unmatched],axis=0)
            tracking_mask = ~tracking_to_check.player.isin(
                compared_ret['label'].values)
            to_reassign_flag = ret['label'].isin(to_reassign_labels)
            reduced_df = df.merge(ret.loc[to_reassign_flag, ['left', 'top']],
                                  on=['left', 'top'], how='inner')
            reduced_dist = distance_matrix(reduced_df[['im_x_remapped',
                                                       'im_y_remapped']],
                                           tracking_to_check[['x2im', 'y2im']])[:, tracking_mask]
            reduced_dist[np.isinf(reduced_dist)] = 1e7
            try:
                matched_to, matched_from = linear_sum_assignment(reduced_dist)
            except:
                return ret, opt_tracking, opt_params, opt_unmatched
            matched_costs = reduced_dist[matched_to, matched_from]
            reassigned_ret = pd.concat(
                [reduced_df[df_cols].iloc[
                    matched_to].reset_index(drop=True),
                 tracking_to_check[tracking_mask].iloc[matched_from][
                     ['x','y','x2im', 'y2im', 'player']].reset_index(drop=True)],
                axis=1).set_index(reduced_df.iloc[matched_to].index)
            reassigned_ret['cost'] = matched_costs
            reassigned_ret.rename(columns={'player':'label'},inplace=True)
            reassigned_ret.loc[reassigned_ret['cost'] > max_permitted_costs, 'label'] = np.nan
            
            ret = pd.concat([compared_ret, reassigned_ret],axis=0)
            mask = ~ret['label'].isnull()
        else:
            ret = compared_ret
    ret = ret.sort_values('left')
#     assert df['left'].isin(ret['left']).all(), (df['left'], ret['left'])
    return ret[df_cols + ['label', 'cost', 'x2im', 'y2im']], opt_tracking, opt_params, opt_unmatched
    
def compare_and_assign(ret, previous_mapped):
    hist_dist_mat = distance_matrix(
    ret[['left', 'top']].values,
    previous_mapped[['left', 'top']].values)

    matched_to, matched_from = linear_sum_assignment(hist_dist_mat)
    costs = np.array([ret.iloc[matched_to]['cost'].values,
               previous_mapped.iloc[matched_from]['cost'].values])
    to_select = np.argmin(costs,axis=0)
    to_keep_previous = to_select == 1
    
    matched_to_flag = np.zeros(len(ret)).astype(bool)

    matched_from = matched_from[to_keep_previous]
    matched_to = matched_to[to_keep_previous]
    matched_to_flag[matched_to] = True
    labels_to_keep_previous = previous_mapped.iloc[matched_from].label
    to_change_df = ret[matched_to_flag] .copy()
    to_change_df['cost'] = (
        to_change_df['cost'].values +
        previous_mapped.iloc[matched_from]['cost'].values) / 2
    to_change_df['label'] = previous_mapped.iloc[matched_from]['label'].values
    to_keep_df = ret[~matched_to_flag].copy()
    to_reassign_flag = to_keep_df['label'].isin(to_change_df['label'].values).values
    ret = pd.concat([to_change_df, to_keep_df[~to_reassign_flag]],axis=0)
    mask = ~ret['label'].isnull()
    if len(ret[mask]) != len(ret[mask].drop_duplicates('label')):
        display(to_change_df)
        display(to_keep_df[~to_reassign_flag])
        print(matched_to)
        raise
   
    return ret, to_keep_df['label'][to_reassign_flag]

In [None]:
class Mapping:
    def __init__(self, tracking, view, use_kalman=True, use_previous=True, available_oris=(0,1),
                 init_frames=19, ignore_starting_preproc=False):
        self.tracking = tracking
        self.use_kalman = use_kalman
        self.use_previous = use_previous
        self.available_oris = available_oris
        self.init_frames = init_frames
        self.ignore_starting_preproc = ignore_starting_preproc
        
        self.buffer_max_cost = deque(maxlen=30)
        self.buffer_ori = []
        self.buffer_costs = []
        dig_step = np.deg2rad(DIG_STEP)
        dig_max = np.deg2rad(DIG_MAX)
        step_size = int(2 * DIG_MAX / DIG_STEP)
        length = MAX_COORDS[0] if view=='Sideline' else MAX_COORDS[1]
        self.params_ranges = dict(zdig=[- pi / 3, pi / 3],
                                  xdig=[0, pi/3],
                                  camera_height=[15, 50],
                                  camera_length=[0.3 * length, 0.7 * length],
                                  scaling=[20, 80])
        self.min_perturbations = dict(zdig=np.deg2rad(5), xdig=np.deg2rad(5), scaling=0.1, camera_length=0.1,
                                      camera_height=0.1)
        self.max_perturbations = dict(zdig=np.deg2rad(10), xdig=np.deg2rad(10), scaling=1, camera_length=0.2,
                                      camera_height=0.2)
        
        self.combinations_generator = ParamsCombinationsGenerator(
            self.params_ranges, strictly_positive_params='scaling', min_perturbations=self.min_perturbations,
        max_perturbations=self.max_perturbations, use_kalman=use_kalman, kalman_init=1, obey_original_ranges=False)
        self.previous_df = None
        self.max_cost_thres=None
        self.frame = 0
    
    def __call__(self, this_df):
        try:
            self.previous_df, this_tracking, opt_params, opt_unmatched = mapping_df(
                self.combinations_generator,
                self.tracking, this_df, 
                previous_mapped=(self.previous_df
                                 if self.use_previous else None),
                available_oris=self.available_oris,
                force_local_minimize=len(self.available_oris)==2, # we dont need much of accuracy when detecting orientation
                ignore_starting_preproc=self.ignore_starting_preproc,
            )
            self.previous_df = pd.concat([self.previous_df, opt_unmatched[
                    [col for col in opt_unmatched if col in self.previous_df.columns]]],axis=0)
            
            if len(self.buffer_ori) < self.init_frames:
                self.buffer_ori.append(opt_params['change_ori'])
                self.buffer_max_cost.append(opt_params['max_cost'])
                self.buffer_costs.append(opt_params['cost'])
            if len(self.buffer_ori) == self.init_frames:
                ori_df = pd.DataFrame({'ori': self.buffer_ori, 'cost': self.buffer_costs})
                mean_costs = ori_df.groupby('ori').median()
                if len(self.available_oris) == 2:
                    self.available_oris = [mean_costs.iloc[np.argmax(mean_costs)]]
                self.max_cost_thres = 1.1 * np.max(self.buffer_max_cost)
        except KeyboardInterrupt:
            raise
        except:
            if debug:
                raise
            traceback.print_exc()
            self.previous_df, this_tracking, opt_params, opt_unmatched = mapping_df_fallback(self.tracking, this_df)
            opt_params['error'] = traceback.format_exc()
        return self.previous_df[~self.previous_df.label.isnull()].copy(), this_tracking, opt_params, opt_unmatched


def apply_on_video(tracking, video_df):
    submission_df_list = []
    df_list = list(video_df.groupby('frame'))
    view = video_df.iloc[0]['video_frame'].split('_')[2]
    ori_mapping = Mapping(tracking, view=view, use_kalman=False, use_previous=False, ignore_starting_preproc=True)
    print('Detecting video view orientation...')
    for frame in tqdm(np.linspace(1, len(df_list)-1, ori_mapping.init_frames).astype(int)):
        _, this_df = df_list[frame]
        ori_mapping(this_df)
    detected_ori = [mode(ori_mapping.buffer_ori)] if ori_mapping.buffer_ori else [0,1]
    print(ori_mapping.buffer_ori)
    print('Detected orientation:',detected_ori, '. Mapping...')
    
    mapping = Mapping(tracking,view=view,
                      available_oris=detected_ori)
    opt_params_dict = {}
    try:
        for frame, this_df in tqdm(df_list):
            df, _, opt_params, _ = mapping(this_df)
            if debug:
                opt_params_dict[frame] = opt_params
            submission_df_list.append(df)
        submission_df = pd.concat(submission_df_list)
    except KeyboardInterrupt:
        if debug:
            with pd.option_context('display.max_rows', None, 'display.max_columns', None):
                display(pd.DataFrame(opt_params_dict).T)
        raise
    return submission_df

In [None]:
videos_dfs = list(helmets.groupby('video'))
if len(videos_dfs) == 1:
    submission_df_list = [apply_on_video(tracking, videos_dfs[0][1])]
else:
    submission_df_list = Parallel(n_jobs=-1)(delayed(apply_on_video)(tracking, video_df) for _, video_df in tqdm(videos_dfs))
submission_df = pd.concat(submission_df_list)
submission_df.to_csv('submission-baseline.csv', index=False)

In [None]:
submission_df

In [None]:
if debug:
    scorer = NFLAssignmentScorer(labels)
    baseline_score = scorer.score(submission_df[~submission_df['label'].isnull()])
    print(f"validation score {baseline_score:0.4f}")

## Score the predictions before applying deepsort postprocessing

The scores are roughly ~0.3, which is similar to the public leaderboard.

# Deepsort Postprocessing

Deepsort is a popular framework for object tracking within video. 
- [This blog post](https://nanonets.com/blog/object-tracking-deepsort/
) shows some examples of it being put to use.
- This notebook shows how to apply deepsort to this helmet dataset: https://www.kaggle.com/s903124/nfl-helmet-with-yolov5-deepsort-starter
- You can also read the paper for deepsort here: https://arxiv.org/pdf/1703.07402.pdf

The approach is fairly simple:
1. Step through each frame in a video and apply the deepsort algorithm. This clusters helmets across frames when it is the same player/helmet.
2. Group by each of these deepsort clusters - and pick the most common label for that cluster. Then override all of the predictions for that helmet to the same player.

## Importing Deepsort from dataset
Because your submission is not allowed to use internet access, you can reference the deepsort codebase from the attached dataset. Deepsort also has a dependency of `easydict` which I've also added as a dataset.

In [None]:
import sys
sys.path.append('../input/easydict-master/easydict-master/')
# https://github.com/mikel-brostrom/Yolov5_DeepSort_Pytorch
sys.path.append('../input/yolov5-deepsort-pytorch/Yolov5_DeepSort_Pytorch-master/Yolov5_DeepSort_Pytorch-master/deep_sort_pytorch/')
from deep_sort.deep_sort import DeepSort
from utils.parser import get_config

## Deepsort config

Deepsort uses a config yaml file for some settings. These are just the default configs and could be improved.

In [None]:
%%writefile deepsort.yaml

DEEPSORT:
  REID_CKPT: "../input/yolov5-deepsort-pytorch/ckpt.t7"
  MAX_DIST: 0.1
  MIN_CONFIDENCE: 0.4
  NMS_MAX_OVERLAP: 0.5
  MAX_IOU_DISTANCE: 0.9
  MAX_AGE: 15
  N_INIT: 1
  NN_BUDGET: 100

In [None]:
"""
Helper functions from yolov5 to plot deepsort labels.
"""

def compute_color_for_id(label):
    """
    Simple function that adds fixed color depending on the id
    """
    palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)

    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
    return tuple(color)

def plot_one_box(x, im, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label: 
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return im

## Functions to apply deepsort to helmet boxes.

Below are two functions `deepsort_helmets` which runs deepsort across a video. There is a lot of room for improving this function. The merging of deepsort labels onto the original helmet boxes is currently done in a very crude manner.

`add_deepsort_label_col` mapps the most common label to each deepsort cluster.

In [None]:
class ValidRegionTracker:
    def __init__(self):
        self.state_surface = None
        self.boundary_flag = None
        self.input_shape = (512,512)
        self.mask5 = np.ones((5,5), np.uint8)
        self.mask3 = np.ones((3,3), np.uint8)
        self.mask15 = np.ones((15,15), np.uint8)
        self.large_mask = np.ones((self.input_shape[1]//5,self.input_shape[0]//5), np.uint8)
        
    def detect(self, image_data):
        og_shape = image_data.shape[:2][::-1]
        mask = self.get_mask(cv2.resize(image_data, self.input_shape))
        return cv2.resize(
                    mask.astype(np.uint8), og_shape, 0, 0, cv2.INTER_NEAREST) > 0

    def get_mask(self, image_data):

        
        hls_img = cv2.cvtColor(image_data,  cv2.COLOR_RGB2HLS)
        white_obj_mask = cv2.threshold(hls_img[:,:,1],150,1, cv2.THRESH_BINARY)[1]
        seeds = cv2.erode(
            cv2.morphologyEx(white_obj_mask.astype(np.uint8), cv2.MORPH_OPEN, self.mask5),
            self.mask3)
        seeds[3:-3,3:-3] = 0
        sure_fg = seeds
        unknown = cv2.subtract(cv2.threshold(hls_img[:,:,1],100,1,cv2.THRESH_BINARY)[1],sure_fg)
        _, markers = cv2.connectedComponents(sure_fg)
        markers = markers+1
        markers[unknown==1] = 0
        img = cv2.cvtColor(white_obj_mask * 255, cv2.COLOR_GRAY2RGB)
        cv2.watershed(img, markers)
        white_obj_on_im_edges = (markers>1).astype(np.uint8)
        white_obj_on_im_edges = cv2.morphologyEx(white_obj_on_im_edges, cv2.MORPH_CLOSE,self.mask15)
        white_obj_on_im_edges = cv2.morphologyEx(white_obj_on_im_edges, cv2.MORPH_OPEN, self.mask15)
        from math import pi, sqrt
        boundary_flag = np.zeros(white_obj_on_im_edges.shape[:2])
        if self.boundary_flag is not None:
            boundary_flag = cv2.erode(self.boundary_flag, self.mask15)
        to_detect_edges = white_obj_on_im_edges
        edges = cv2.morphologyEx(
                cv2.Canny(to_detect_edges * 255,0,1,apertureSize = 3),cv2.MORPH_CLOSE,
                self.mask15)
        lines = cv2.HoughLinesP(
            edges,
            1, pi/180,100,maxLineGap=3
            )
        if lines is not None:
            # keep 4 largest
            lines_lengths = [ sqrt((x2-x1)**2 + (y2-y1)**2) for (x1,y1,x2,y2) in [l[0] for l in lines]]
            lines = lines[np.argsort(lines_lengths)[-4:],:,:]
            for line in lines:
                x1,y1,x2,y2 = line[0]
                cv2.line(boundary_flag,(x1,y1),(x2,y2),1,2)
        sure_fg = boundary_flag.astype(np.uint8)
        unknown = cv2.subtract(white_obj_on_im_edges,sure_fg)
        _, markers = cv2.connectedComponents(sure_fg)
        markers = markers+1
        markers[unknown==1] = 0
        img = cv2.cvtColor(
            white_obj_on_im_edges*255, cv2.COLOR_GRAY2RGB)
        cv2.watershed(
            img, markers)
        markers = (markers > 1).astype(np.uint8)
        self.boundary_flag = cv2.morphologyEx(
            markers, cv2.MORPH_CLOSE, self.large_mask)

        return self.boundary_flag == 0

In [None]:
def deepsort_helmets(video_data,
                     video_dir,
                     deepsort_config='deepsort.yaml',
                     plot=False,
                     plot_frames=[]):
    
    # Setup Deepsort
    cfg = get_config()
    cfg.merge_from_file(deepsort_config)    
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    tracker = ValidRegionTracker()
    # Run through frames.
    video_data = video_data.sort_values('frame').reset_index(drop=True)
    ds = []
    for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
        d['x'] = (d['left'] + round(d['width'] / 2))
        d['y'] = (d['top'] + round(d['height'] / 2))

        xywhs = d[['x','y','width','height']].values

        cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame-1) # optional
        success, image = cap.read()
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        mask = tracker.detect(image)
        image = image * mask[:, :, np.newaxis]
        confs = np.ones([len(d),])
        clss =  np.zeros([len(d),])
        outputs = deepsort.update(xywhs, confs, clss, image)

        if (plot and frame > cfg.DEEPSORT.N_INIT) or (frame in plot_frames):
            for j, (output, conf) in enumerate(zip(outputs, confs)): 

                bboxes = output[0:4]
                id = output[4]
                cls = output[5]

                c = int(cls)  # integer class
                label = f'{id}'
                color = compute_color_for_id(id)
                im = plot_one_box(bboxes, image, label=label, color=color, line_thickness=2)
            fig, ax = plt.subplots(figsize=(15, 10))
            video_frame = d['video_frame'].values[0]
            ax.set_title(f'Deepsort labels: {video_frame}')
            plt.imshow(im)
            plt.show()

        preds_df = pd.DataFrame(outputs, columns=['left','top','right','bottom','deepsort_cluster','class'])
        if len(preds_df) > 0:
            # TODO Fix this messy merge
            d[['left','top']] = d[['left','top']].astype(int)
            preds_df[['left','top']] = preds_df[['left','top']].astype(int)
            d = pd.merge_asof(d.sort_values(['left','top']),
                              preds_df[['left','top','deepsort_cluster']] \
                              .sort_values(['left','top']), on='left', suffixes=('','_deepsort'),
                              direction='nearest')
        ds.append(d)
    dout = pd.concat(ds)
    return dout

def add_deepsort_label_col(out):
    # Find the top occuring label for each deepsort_cluster
    cum = out[~out['label'].isnull()].groupby('deepsort_cluster')['label'].value_counts() \
        .sort_values(ascending=False).to_frame() \
        .rename(columns={'label':'label_count'}) \
        .reset_index() \
        .groupby(['deepsort_cluster']) \
        .first()
    
    sortlabel_map = cum['label'].to_dict()
    # Find the # of times that label appears for the deepsort_cluster.
    sortlabelcount_map = cum['label_count'].to_dict()
    
    out['label_deepsort'] = out['deepsort_cluster'].map(sortlabel_map)
    out['label_count_deepsort'] = out['deepsort_cluster'].map(sortlabelcount_map)
    return out

def score_vs_deepsort(myvideo, out, labels):
    # Score the base predictions compared to the deepsort postprocessed predictions.
    myvideo_mp4 = myvideo + '.mp4'
    labels_video = labels.query('video == @myvideo_mp4')
    scorer = NFLAssignmentScorer(labels_video)
    out_deduped = out.groupby(['video_frame','label']).first().reset_index()
    base_video_score = scorer.score(out_deduped)
    
    out_preds = out.drop('label', axis=1).rename(columns={'label_deepsort':'label'})
    out_preds = out_preds.groupby(['video_frame','label']).first().reset_index()
    deepsort_video_score = scorer.score(out_preds)
    print(f'{base_video_score:0.5f} before --> {deepsort_video_score:0.5f} deepsort')

## Apply Deepsort to Baseline Predictions

In [None]:
# Add video and frame columns to submission.
submission_df['video'] = submission_df['video_frame'].str.split('_').str[:3].str.join('_')
submission_df['frame'] = submission_df['video_frame'].str.split('_').str[-1].astype('int')

if debug:
    video_dir = '../input/nfl-health-and-safety-helmet-assignment/train/'
else:
    video_dir = '../input/nfl-health-and-safety-helmet-assignment/test/'

# Loop through test videos and apply. If in debug mode show the score change.
out_ds = []
outs = []
for myvideo, video_data in tqdm(submission_df.groupby('video'), total=submission_df['video'].nunique()):
    print(f'==== {myvideo} ====')
    if debug:
        # Plot deepsort labels when in debug mode.
        out = deepsort_helmets(video_data, video_dir, plot_frames=[10, 150, 250])
    else:
        out = deepsort_helmets(video_data, video_dir)
    out_ds.append(out)
    out = add_deepsort_label_col(out)
    outs.append(out)
    if debug:
        # Score
        score_vs_deepsort(myvideo, out, labels)
submission_deepsort = pd.concat(outs).copy()

# Check Submission & Save
Finally we will create a submission file and check that it passes the submission requirements.
The steps are:
1. Drop the `label` and replace with `label_deepsort` predictions.
2. Remove any duplicate labels within a single video/frame. This is required to meet the submission requirements.
3. Save the results.

In [None]:
ss = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/sample_submission.csv')
# Final Checks
submission_deepsort.reset_index(inplace=True, drop=True)
submission_deepsort['label_deepsort'] = submission_deepsort['label_deepsort'].fillna(submission_deepsort['label'])
submission_deepsort = submission_deepsort[~submission_deepsort['label_deepsort'].isnull()]
submission_deepsort = submission_deepsort.drop('label', axis=1) \
    .rename(columns={'label_deepsort':'label'})[ss.columns]
# Drop duplicate labels
submission_deepsort = submission_deepsort.loc[
    ~submission_deepsort[['video_frame','label']].duplicated()]
check_submission(submission_deepsort)
submission_deepsort[['left','width','top','height']] = submission_deepsort[['left','width','top','height']].astype(int)
submission_deepsort = submission_deepsort.dropna(axis=0)
submission_deepsort.to_csv('submission.csv', index=False)

# Display video showing predictions

Lastly, if we want to review our predictions we can create a video to review the predictions using the `video_with_predictions` function from the `helmet_assignment` helper package.

In [None]:
if debug:
    submission_deepsort['video'] = submission_deepsort['video_frame'].str.split('_').str[:3].str.join('_') + '.mp4'
    debug_videos = submission_deepsort['video'].unique()
    debug_labels = labels.query('video in @debug_videos')
    scorer = NFLAssignmentScorer(debug_labels)
    scorer.score(submission_deepsort)
    for video in debug_videos:
        # Create video showing predictions for one of the videos.
        video_out = video_with_predictions(
            f'../input/nfl-health-and-safety-helmet-assignment/train/{video}',
            scorer.sub_labels.fillna(0))

        frac = 0.60 # scaling factor for display
        display(Video(data=video_out,
                      embed=True,
                      height=int(720*frac),
                      width=int(1280*frac))
               )