In [1]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2

import os
from IPython.display import Video, display
import matplotlib.pyplot as plt
import scipy.optimize
import scipy.signal
from tqdm.auto import tqdm
import math
import random

import matplotlib.animation as animation

import sys
sys.path.append('../input/easydict-master/easydict-master/')
# https://github.com/mikel-brostrom/Yolov5_DeepSort_Pytorch
sys.path.append('../input/yolov5-deepsort-pytorch/Yolov5_DeepSort_Pytorch-master/Yolov5_DeepSort_Pytorch-master/deep_sort_pytorch/')

from deep_sort.deep_sort import DeepSort
from utils.parser import get_config

from scipy.interpolate import interp1d
from statsmodels.nonparametric.smoothers_lowess import lowess

In [2]:
train = False
if train:
    data_baseline_helmets = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/train_baseline_helmets.csv')
    data_player_tracking = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/train_player_tracking.csv')
    data_labels = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/train_labels.csv')
    video_dir = '../input/nfl-health-and-safety-helmet-assignment/train/'
else:
    data_baseline_helmets = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/test_baseline_helmets.csv')
    data_player_tracking = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/test_player_tracking.csv')
    video_dir = '../input/nfl-health-and-safety-helmet-assignment/test/'


In [3]:
# Preprocessing 

def process_baseline(baseline_df):
    baseline_df['gameKey'] = baseline_df['video_frame'].apply(lambda x: int(x.split('_')[0]))
    baseline_df['playID'] = baseline_df['video_frame'].apply(lambda x: int(x.split('_')[1]))
    baseline_df['view'] = baseline_df['video_frame'].apply(lambda x: x.split('_')[2])
    baseline_df['frame'] = baseline_df['video_frame'].apply(lambda x: int(x.split('_')[3]))
    baseline_df['video'] = baseline_df['video_frame'].str.split('_').str[:3].str.join('_')
    baseline_df['x'] = baseline_df.apply(lambda x: x.left + x.width/2, axis = 1)
    baseline_df['y'] = baseline_df.apply(lambda x: x.top + x.height/2, axis = 1)
    baseline_df['label'] = 'UNK'
    return baseline_df

data_baseline_helmets = process_baseline(data_baseline_helmets)

def process_labels(labels_df):
    labels_df['x'] = labels_df.apply(lambda x: x.left + x.width/2, axis = 1)
    labels_df['y'] = labels_df.apply(lambda x: x.top + x.height/2, axis = 1)
    return labels_df

if train:
    data_labels = process_labels(data_labels)

# Copied from https://www.kaggle.com/go5kuramubon/merge-label-and-tracking-data

def add_track_features(tracks, fps=59.94, snap_frame=10):
    """
    Add column features helpful for syncing with video data.
    """
    tracks = tracks.copy()
    tracks["game_play"] = (
        tracks["gameKey"].astype("str")
        + "_"
        + tracks["playID"].astype("str").str.zfill(6)
    )
    tracks["time"] = pd.to_datetime(tracks["time"])
    snap_dict = (
        tracks.query('event == "ball_snap"')
        .groupby("game_play")["time"]
        .first()
        .to_dict()
    )
    tracks["snap"] = tracks["game_play"].map(snap_dict)
    tracks["isSnap"] = tracks["snap"] == tracks["time"]
    tracks["team"] = tracks["player"].str[0].replace("H", "Home").replace("V", "Away")
    tracks["snap_offset"] = (tracks["time"] - tracks["snap"]).astype(
        "timedelta64[ms]"
    ) / 1_000
    # Estimated video frame
    tracks["est_frame"] = (
        ((tracks["snap_offset"] * fps) + snap_frame).round().astype("int")
    )
    return tracks

data_player_tracking = add_track_features(data_player_tracking)

In [4]:
def distance(p1, p2):
    x1, y1 = p1
    x2, y2 = p2
    return np.sqrt((x1-x2)**2 + (y1-y2)**2)

def distance2(p1, p2):
    x1, y1 = p1
    x2, y2 = p2
    return (x1-x2)**2 + (y1-y2)**2

In [5]:
def deepsort_helmets(video_data, video_dir, deepsort_config='deepsort.yaml'):
    
    # Setup Deepsort
    cfg = get_config()
    cfg.merge_from_file(deepsort_config)    
    deepsort = DeepSort(cfg.DEEPSORT.REID_CKPT,
                        max_dist=cfg.DEEPSORT.MAX_DIST,
                        min_confidence=cfg.DEEPSORT.MIN_CONFIDENCE,
                        nms_max_overlap=cfg.DEEPSORT.NMS_MAX_OVERLAP,
                        max_iou_distance=cfg.DEEPSORT.MAX_IOU_DISTANCE,
                        max_age=cfg.DEEPSORT.MAX_AGE,
                        n_init=cfg.DEEPSORT.N_INIT,
                        nn_budget=cfg.DEEPSORT.NN_BUDGET,
                        use_cuda=True)
    
    # Run through frames.
    video_data = video_data.sort_values('frame').reset_index(drop=True)
    ds = []
    
    myvideo = video_data.video.unique()[0]
    cap = cv2.VideoCapture(f'{video_dir}/{myvideo}.mp4')
    
    for frame, d in tqdm(video_data.groupby(['frame']), total=video_data['frame'].nunique()):
        videoframe = d.video_frame.unique()[0]
        xywhs = d[['x','y','width','height']].values

        success, image = cap.read()
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        confs = np.ones([len(d),])
        clss =  np.zeros([len(d),])
        try:
            outputs = deepsort.update(xywhs, confs, clss, image)
        except Exception as e:
            outputs = []
            print(f'Error {e} : Skipped')

        preds_df = pd.DataFrame(outputs, columns=['left','top','right','bottom','cluster','class'])
        
        d = deepsort_merge(preds_df, d)
        ds.append(d)
        
    dout = pd.concat(ds)
    return dout


def deepsort_merge(deepsort_out, video_data):
    
    if len(deepsort_out) == 0:
        video_data.loc[:,'cluster'] = 'UNK'
        return video_data
    
    forfit = 10
    deepsort_pts   = list(zip(deepsort_out['left'], deepsort_out['top']))
    video_pts      = list(zip(video_data['left'], video_data['top']))

    # Compute matching with Hungarian algorithm in both sides
    match_cost  = np.array([ [distance(pt1, pt2) for pt1 in deepsort_pts] for pt2 in video_pts ])
    trash_cost  = np.array([ [forfit for _ in deepsort_pts] for _ in deepsort_pts ])
    
    cost_matrix = np.concatenate([match_cost, trash_cost], axis = 0) 
    idxs1, idxs2 = scipy.optimize.linear_sum_assignment(cost_matrix)
    try: 
        idxs1, idxs2 = np.array([ [idx1, idx2] for idx1, idx2 in zip(idxs1, idxs2) if idx1 < len(video_pts) ]).transpose()
    except:
        idxs1, idxs2 = [], []
    
    labels = deepsort_out.iloc[idxs2]['cluster'].copy()
    
    video_data.loc[:,'cluster'] = 'UNK'

    video_data.iloc[idxs1, video_data.columns.get_loc('cluster') ] = labels

    return video_data

In [6]:
%%writefile deepsort.yaml

DEEPSORT:
  REID_CKPT: "../input/yolov5-deepsort-pytorch/ckpt.t7"
  MAX_DIST: 0.35                          # Maximum cosine distance thold for similarity purpose
  MIN_CONFIDENCE: 0.4                   # Min confidence for entry bboxes 
  NMS_MAX_OVERLAP: 1                     # Remove boxes with overlap !! We don't want NMS, NMS already done and could destroy "Collision players"
  MAX_IOU_DISTANCE: 0.5                  # Gating IOU threshold. Associations with cost larger than this value are disregarded.
  MAX_AGE: 30                            # Maximum number of misses before a track is deleted. --> We prefer IDswitch rather that incorect re-ID
  N_INIT: 0                              # Number of consecutive detections before the track is confirmed. (0.1 s) The track state is set to `Deleted` if a miss occurs within the first `n_init` frames.
  NN_BUDGET: 100                         # If not None, fix samples per class to at most this number. Removes the oldest samples when the budget is reached.

Writing deepsort.yaml


In [7]:
"""
Helper functions from yolov5 to plot deepsort labels.
"""

def compute_color_for_id(label):
    """
    Simple function that adds fixed color depending on the id
    """
    palette = (2 ** 11 - 1, 2 ** 15 - 1, 2 ** 20 - 1)

    color = [int((p * (label ** 2 - label + 1)) % 255) for p in palette]
    return tuple(color)

def plot_one_box(x, im, color=None, label=None, line_thickness=3):
    # Plots one bounding box on image 'im' using OpenCV
    assert im.data.contiguous, 'Image not contiguous. Apply np.ascontiguousarray(im) to plot_on_box() input image.'
    tl = line_thickness or round(0.002 * (im.shape[0] + im.shape[1]) / 2) + 1  # line/font thickness
    color = color or [random.randint(0, 255) for _ in range(3)]
    c1, c2 = (int(x[0]), int(x[1])), (int(x[2]), int(x[3]))
    cv2.rectangle(im, c1, c2, color, thickness=tl, lineType=cv2.LINE_AA)
    if label: 
        tf = max(tl - 1, 1)  # font thickness
        t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0]
        c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3
        cv2.rectangle(im, c1, c2, color, -1, cv2.LINE_AA)  # filled
        cv2.putText(im, label, (c1[0], c1[1] - 2), 0, tl / 3, [225, 255, 255], thickness=tf, lineType=cv2.LINE_AA)
    return im

In [8]:
dbh_high = data_baseline_helmets[data_baseline_helmets.conf > 0.4]
dbh_low  = data_baseline_helmets[data_baseline_helmets.conf <= 0.4]
dbh_low['cluster'] = 'UNK'

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


In [9]:
# Shortcut 
# data_baseline_helmets = pd.read_csv('../input/baselinehelmetswithdeepsort/dbh_test_deepsortclustered_conf40.csv')

In [10]:
if train:
    video_dir = '../input/nfl-health-and-safety-helmet-assignment/train/'
else:
    video_dir = '../input/nfl-health-and-safety-helmet-assignment/test/'

outs = []
for myvideo, video_data in tqdm(dbh_high.groupby('video'), total=dbh_high['video'].nunique()):
    print(f'==== {myvideo} ====')
    out = deepsort_helmets(video_data, video_dir)        
    outs.append(out)
data_baseline_helmets = pd.concat(outs).copy()
data_baseline_helmets = pd.concat([data_baseline_helmets, dbh_low]).sort_values(['video','frame','conf']).reset_index(drop = True)

  0%|          | 0/6 [00:00<?, ?it/s]

==== 57906_000718_Endzone ====


  0%|          | 0/434 [00:00<?, ?it/s]

==== 57906_000718_Sideline ====


  0%|          | 0/440 [00:00<?, ?it/s]

==== 57995_000109_Endzone ====


  0%|          | 0/529 [00:00<?, ?it/s]

==== 57995_000109_Sideline ====


  0%|          | 0/529 [00:00<?, ?it/s]

==== 58102_002798_Endzone ====


  0%|          | 0/366 [00:00<?, ?it/s]

==== 58102_002798_Sideline ====


  0%|          | 0/366 [00:00<?, ?it/s]

In [11]:
cur_id  = 100000

def id_gen():
    global cur_id
    cur_id = cur_id + 1 
    return cur_id

data_baseline_helmets.cluster = data_baseline_helmets.cluster.apply(lambda x: id_gen() if x=='UNK' else x)

In [12]:
def get_nearest_frame(frame, tracked_helmets: pd.DataFrame):
    """ Return the nearest estimated frame in the tracking data """
    
    available_frames = tracked_helmets.est_frame.unique()
    shift = min(abs(available_frames - frame))
    plus_frame = frame + shift
    minus_frmae = frame - shift
    nearest_frame = plus_frame if plus_frame in available_frames else minus_frmae
    return nearest_frame

def hungarian_matching(baseline_helmets: pd.DataFrame, tracked_helmets : pd.DataFrame, forfit = 10000):
    
    track_pts   = list(zip(tracked_helmets['xc'], tracked_helmets['yc']))
    img_pts     = list(zip(baseline_helmets['x'], baseline_helmets['y']))
    confidence  = baseline_helmets['conf']

    # Compute matching with Hungarian algorithm in both sides
    match_cost  = [ [c*distance2(pt1, pt2) for pt1 in track_pts] for (pt2,c) in zip(img_pts, confidence) ]
    trash_cost  = [ [c*forfit for _ in img_pts] for c in confidence ]
    cost_matrix = np.concatenate([match_cost, trash_cost], axis = 1) 
    idxs1, idxs2 = scipy.optimize.linear_sum_assignment(cost_matrix)
    cost  = cost_matrix[idxs1, idxs2].sum()/len(idxs1)
    try: 
        idxs1, idxs2 = np.array([ [idx1, idx2] for idx1, idx2 in zip(idxs1, idxs2) if idx2 < len(track_pts) ]).transpose()
    except:
        idxs1, idxs2 = [], []
    labels = tracked_helmets.iloc[idxs2].player.tolist().copy()
    baseline_helmets.loc[:,'label'] = 'UNK'
    baseline_helmets.iloc[idxs1, baseline_helmets.columns.get_loc('label')] = labels

    return baseline_helmets, cost

def apply_matrix(M, tracked_helmets):
    all_src = np.float32([[tracked_helmets['x'] , tracked_helmets['y']]] ).transpose().reshape(-1,1,2)
    tr_src = cv2.perspectiveTransform(all_src, M).transpose()

    tracked_helmets.loc[:,'xc'] = tr_src[0,0]
    tracked_helmets.loc[:,'yc'] = tr_src[1,0]
    return tracked_helmets

def find_CR(baseline_helmets: pd.DataFrame, tracked_helmets: pd.DataFrame, adapt_to_view = None, use_confidence = True, flip = False):
    """ 
    Find Center Reduce matrix 

    
    If adapt_to_view = Endzone  switch x and y coordinates 
    If use_confidence, weight normalisation with confidence score 
    
    """
    bh = baseline_helmets.copy()
    th = tracked_helmets.copy()
    
    # Center matrix : Align centroid
    if use_confidence:
        bh_centroid = np.array([np.average(bh.x, weights = bh.conf), np.average(bh.y, weights = bh.conf)])
    else:
        bh_centroid = np.array([np.average(bh.x), np.average(bh.y)])
    
    th_centroid = np.array([np.average(th.x), np.average(th.y)])
    
    C1 = np.float32([[1 , 0,  -th_centroid[0]],
                     [0 , 1,  -th_centroid[1]],
                     [0 , 0,         1      ]])
    
    C2 = np.float32([[1 , 0,  bh_centroid[0]],
                     [0 , 1,  bh_centroid[1]],
                     [0 , 0,         1      ]])
    
    # Reduce matrix: Align lengths 
    
    bh.loc[:,'d'] = bh.apply(lambda x: math.sqrt( (x.x - bh_centroid[0])**2
                                           +(x.y - bh_centroid[1])**2 ) , axis = 1)
    
    if use_confidence:
        bh_std = np.average(bh.d, weights = bh.conf)
    else:
        bh_std = np.average(bh.d)
                         
    
    th.loc[:,'d'] = th.apply(lambda x: math.sqrt( (x.x - th_centroid[0])**2
                                           +(x.y - th_centroid[1])**2 ) , axis = 1)

    th_std = np.average(th.d)
    
    ratio = bh_std/th_std
    
    R = np.float32([[ratio,   0  ,   0],
                    [0    , ratio,   0],
                    [0    ,   0  ,   1]])
    
    # Adaptation matrix
    
    if adapt_to_view == 'Endzone':
        """ Switch x and y because for Endzone images, x coordinate almost correspond to y coordinate of the stadium (dillate on x, squeeze on y)"""

        A = np.float32([[0    ,   1.4 ,  0],
                        [0.7  ,   0   ,  0],
                        [0    ,   0   ,  1]])
        
    if adapt_to_view == 'Sideline':
        
        A = np.float32([[1.4  ,   0    ,  0],
                        [0    ,   -0.7 ,  0],
                        [0    ,   0    ,  1]])
    
    F = np.float32([[-1  ,   0  ,  0],
                    [0   ,  -1  ,  0],
                    [0   ,   0  ,  1]])
    
    if not flip:
        CR = C2  @ A @ R @ C1
    else:
        CR = C2 @ F @ A @ R @ C1
    return CR

def find_M(baseline_helmets: pd.DataFrame, tracked_helmets: pd.DataFrame):
    conf_th = 0.7
    left   = baseline_helmets[['x' ,'y' , 'conf', 'label']].set_index('label')
    right  = tracked_helmets[['x','y','player']].set_index('player')
    merged = left.join(right, how = 'inner', rsuffix = '_r')
    merged = merged[merged['conf'] > conf_th]
    src = np.float32([ merged['x_r'] , merged['y_r'] ]).transpose().reshape(-1,1,2)
    dst = np.float32([ merged['x'] , merged['y'] ]).transpose().reshape(-1,1,2)
    if len(src) == 3:
        M = cv2.getAffineTransform(src, dst)
        M = np.vstack((M,[0, 0, 1]))
    elif len(src) < 3:
        raise Exception('not enough input pts for homography mapping')
    else:
        M, _ = cv2.findHomography(src, dst)
    return M

In [13]:
class Frame(object):
    def __init__(self, bh, th, view, M = 'identity', cost = np.inf):
        self.bh = bh
        
        # Transformation matrix from x,y to xc, yc
        if M == 'identity':
            self.matrix = np.float32([[1  ,   0  ,  0],
                                      [0  ,   1  ,  0],
                                      [0  ,   0  ,  1]])
        else:
            self.matrix = M
        
        # Add the column xc, yc to tracked helmets
        self.th          = apply_matrix(self.matrix, th)
        
        # Initialise best arguments
        self.view        = view
        self.cost        = cost
        self.best_matrix = self.matrix
        
    def projection(self, flip = False):
        """ Center reduce matrix """
        
        CR = find_CR(baseline_helmets = self.bh.copy(), tracked_helmets = self.th.copy(), adapt_to_view = self.view, flip = flip)
        self.matrix = CR
        self.th = apply_matrix(CR, self.th.copy())
        
    def homography(self, M = None):
        """ Apply homography """
        if M is None:
            try:
                M = find_M(self.bh.copy(), self.th.copy())
            except Exception as e:
                return
        self.matrix = M
        self.th = apply_matrix(M, self.th.copy())

    def match(self):
        bh, cost = hungarian_matching(self.bh.copy(), self.th.copy())
        if cost < self.cost:
            bh['map_cost'] = cost
            self.cost = cost
            self.bh = bh
            self.best_matrix = self.matrix
    
    # For visualisation 
    def get_bh_xy(self, conf_th = 0):
        bh = self.bh.query('conf > @conf_th')
        return bh.x.to_list(), bh.y.to_list()
    
    def get_th_xy(self):
        return self.th.xc.to_list(), self.th.yc.to_list()
    

In [14]:
import itertools
import operator

def most_common(L):
  # get an iterable of (item, iterable) pairs
    SL = sorted((x, i) for i, x in enumerate(L))
  # print 'SL:', SL
    groups = itertools.groupby(SL, key=operator.itemgetter(0))
    # auxiliary function to get "quality" for an item
    def _auxfun(g):
        item, iterable = g
        count = 0
        min_index = len(L)
        for _, where in iterable:
            count += 1
            min_index = min(min_index, where)
        # print 'item %r, count %r, minind %r' % (item, count, min_index)
        return count, -min_index
    # pick the highest-count/earliest item
    return max(groups, key=_auxfun)[0]

def convolution(v, r):
    frames = v['frame'].to_list()
    labels = v['label'].to_list()
    mc_labels = []
    scores = []
    for frame in frames:
        min_frame = frame - r
        max_frame = frame + r
        available_labels = [label for (frame, label) in zip(frames, labels) if min_frame <= frame <= max_frame ]
        mc = most_common(available_labels)
        score = available_labels.count(mc)/len(available_labels)
        mc_labels.append(mc)
        scores.append(score)
    v['mc_label'] = mc_labels
    v['mc_score'] = scores
    return v

r = 80

In [15]:
dpt = data_player_tracking.groupby(['playID'])
dbh = data_baseline_helmets.groupby(['playID', 'view'])

class MapNtrack(object):
    def __init__(self, playID, view):
        # Get a copy of each helmets for this video
        tracked_helmets  = dpt.get_group(playID).reset_index(drop = True).copy()
        baseline_helmets = dbh.get_group((playID, view)).reset_index(drop = True).copy()
        self.baseline_helmets = baseline_helmets
        self.tracked_helmets = tracked_helmets
        self.view = view
        
        # Group on frame
        thg = tracked_helmets.groupby('est_frame')
        bhg = baseline_helmets.groupby('frame')
        
        # Create a frame class for each frame
        thg = tracked_helmets.groupby('est_frame')
        bhg = baseline_helmets.groupby('frame')
        self.frames    = []
        for frameID in self.baseline_helmets.frame.unique():
            nearest_frameID = get_nearest_frame(frameID, tracked_helmets)
            th = thg.get_group(nearest_frameID).copy()
            bh = bhg.get_group(frameID).copy()
            self.frames.append(Frame(bh, th, view))
        self.frames_mx = []
        
        
    def animate(self):
        """ Animate points """
        # Création de la figure et de l'axe
        fig, ax = plt.subplots(figsize=(15,15))

        # Création de la ligne qui sera mise à jour au fur et à mesure
        point_th, = ax.plot([], [], ls="none", marker="o", color = 'blue')
        point_bh, = ax.plot([], [], ls="none", marker="o", color = 'orange')
        links     = [ax.plot([], [], color = 'green') for _ in range(22)]
        # Création de la function qui sera appelée à "chaque nouvelle image"
        def anim(k):
            i = min(k, len(self.frames))
            frame = self.frames[k]
            
            x,y = frame.get_bh_xy(conf_th = 0.6)
            point_bh.set_data(x, y)
            
            x,y = frame.get_th_xy()
            point_th.set_data(x, y) 
            
            if 'label' in frame.bh.columns:
                left   = frame.bh[['x' ,'y' ,'label']].set_index('label')
                right  = frame.th[['xc','yc','player']].set_index('player')
                merged = left.join(right, how = 'inner')
                pts    = [([row.x, row.xc], [row.y, row.yc]) for _, row in merged.iterrows()]
                
                for link in links:
                    link[0].set_data([], [])
                for link, pt in zip(links, pts):
                    link[0].set_data(pt[0], pt[1])
            
            return point_bh, point_th
        
        #Gestion des limites de la fenêtre
        ax.set_xlim([-10, 1400])
        ax.set_ylim([-10, 800])
        
        # Génération de l'animation, frames précise les arguments numérique reçus par func (ici animate), 
        ani = animation.FuncAnimation(fig=fig, func=anim, frames=range(len(self.frames)), interval=50, blit=True)
        return ani
    
    # Map procedures
    def update_map(self):
        self.baseline_helmets = pd.concat([frame.bh for frame in mnt.frames])
        self.frames_mx = [frame.best_matrix for frame in mnt.frames]
    
    def projection(self, flip = False):
        """ Compute transformation on tracking data for each frame """
        for frame in self.frames:
            frame.projection(flip = flip)
    
    def homography(self):
        """ Compute transformation on tracking data for each frame """
        for frame in self.frames:
            frame.homography()
            
    def match(self):
        for frame in self.frames:
            frame.match()
        
    def foward_repair(self):
        # Apply previous homography on current frame 
        cur_M    = self.frames[0].matrix
        for frame in self.frames:
            frame.homography(cur_M)
            frame.match()
            frame.homography()
            frame.match()
            cur_M = frame.best_matrix
    
    def backward_repair(self):
        # Apply next homography on current frame
        cur_M    = self.frames[-1].matrix
        for frame in reversed(self.frames):
            frame.homography(cur_M)
            frame.match()
            frame.homography()
            frame.match()
            cur_M = frame.best_matrix
        
    # Track procedures # Track on the top occuring label for each cluster
    def cluster_count_track(self):
        # Find the top occuring label for each cluster
        sortlabel_map = self.baseline_helmets.groupby('cluster')['label'].value_counts() \
            .sort_values(ascending=False).to_frame() \
            .rename(columns={'label':'label_count'}) \
            .reset_index() \
            .groupby(['cluster']) \
            .first()['label'].to_dict()
        
        # Find the # of times that label appears for the deepsort_cluster.
        sortlabelcount_map = self.baseline_helmets.groupby('cluster')['label'].value_counts() \
            .sort_values(ascending=False).to_frame() \
            .rename(columns={'label':'label_count'}) \
            .reset_index() \
            .groupby(['cluster']) \
            .first()['label_count'].to_dict()
        
        # Find the total # of label for each deepsort_cluster.
        sortlabeltotal_map = self.baseline_helmets.groupby('cluster')['label'].value_counts() \
            .sort_values(ascending=False).to_frame() \
            .rename(columns={'label':'label_count'}) \
            .reset_index() \
            .groupby(['cluster']) \
            .sum()['label_count'].to_dict()
        
        sortlabelconf_map = {k:(sortlabelcount_map[k]/sortlabeltotal_map[k]) for k in sortlabeltotal_map}

        self.baseline_helmets['label_cluster'] = self.baseline_helmets['cluster'].map(sortlabel_map)
        self.baseline_helmets['cluster_count'] = self.baseline_helmets['cluster'].map(sortlabelcount_map)
        self.baseline_helmets['cluster_conf'] = self.baseline_helmets['cluster'].map(sortlabelconf_map)
        
        # Merge baseline_helmets with the tracking clusters infos
        for _, example in self.baseline_helmets.groupby('video_frame'):
            example['cluster_score'] = example.apply(lambda x: x.cluster_count*x.cluster_conf**3, axis = 1)
            example.sort_values('cluster_score', ascending = False, inplace = True)
            assigned = set()
            for idx, row in example.iterrows():
                if row.label_cluster not in assigned or row.label_cluster == 'UNK':
                    assigned.add(row.label_cluster)
                    mnt.baseline_helmets.loc[idx, 'label'] = row.label_cluster
                elif row.label not in assigned:
                    assigned.add(row.label)
                    mnt.baseline_helmets.loc[idx, 'label'] = row.label
                else:
                    mnt.baseline_helmets.loc[idx, 'label'] = 'UNK'
                    
    def smooth_cluster_track(self):
        
        self.baseline_helmets = self.baseline_helmets.groupby('cluster').apply(convolution, r)
        # Merge baseline_helmets with the tracking clusters infos
        for _, example in self.baseline_helmets.groupby('video_frame'):
            example.sort_values('mc_score', ascending = False, inplace = True)
            assigned = set()
            for idx, row in example.iterrows():
                if row.mc_label not in assigned or row.mc_label == 'UNK':
                    assigned.add(row.mc_label)
                    mnt.baseline_helmets.loc[idx, 'label'] = row.mc_label
                elif row.label not in assigned:
                    assigned.add(row.label)
                    mnt.baseline_helmets.loc[idx, 'label'] = row.label
                else:
                    mnt.baseline_helmets.loc[idx, 'label'] = 'UNK'
    
    def update_track(self):
        # Group on frame
        thg = self.tracked_helmets.groupby('est_frame')
        bhg = self.baseline_helmets.groupby('frame')
        
        self.frames    = []
        for frameID in self.baseline_helmets.frame.unique():
            nearest_frameID = get_nearest_frame(frameID, self.tracked_helmets)
            th = thg.get_group(nearest_frameID).copy()
            bh = bhg.get_group(frameID).copy()
            self.frames.append(Frame(bh, th, self.view))
            

In [16]:
mnts = []
for playID, view in tqdm(dbh.groups.keys()):
    mnt = MapNtrack(playID, view)
    mnt.projection(flip = True)
    mnt.match()
    mnt.projection(flip = False)
    mnt.match()
    mnt.homography()
    mnt.match()
    mnt.foward_repair()
    mnt.backward_repair()
    mnt.update_map()
    mnt.cluster_count_track()
    mnt.update_track()
    mnt.homography()
    mnts.append(mnt)

  0%|          | 0/6 [00:00<?, ?it/s]

In [17]:
ss = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/sample_submission.csv')
submission = pd.concat([mnt.baseline_helmets for mnt in mnts])[ss.columns]

In [18]:
submission = submission[submission['label'] != 'UNK']
submission.to_csv('submission.csv', index = False)

In [19]:
def post_process(video_player):
    video_player["frame"] = video_player['video_frame'].apply(lambda x: int(x.split('_')[-1]))
    known = video_player.frame.unique()
    video = video_player.video.unique()[0]
    label = video_player.label.unique()[0]
    missing_frames = [frame for frame in range(video_player.frame.min(), video_player.frame.max()) if frame not in known]
    video_frame = [video.split('.')[0] + "_" + str(frame) for frame in missing_frames]
    missing_df = pd.DataFrame({'video_frame': video_frame, 'label':[label]*len(missing_frames), 'video':[video]*len(missing_frames), 'frame':missing_frames})
    video_player = pd.concat([missing_df, video_player], ignore_index = True)
    video_player.sort_values('frame', inplace= True)
    video_player = video_player.reset_index(drop = True)
    for feat in ['top','left', 'width', 'height']:
        r = lowess(video_player[feat], np.arange(video_player.frame.min(), video_player.frame.max()+1), frac=0.15)
        try:
            video_player[feat+'_reg'] = scipy.interpolate.interp1d(x = r[:, 0], y = r[:, 1])(video_player.frame)
        except Exception as e:
            return video_player
        video_player[feat] = video_player[feat].fillna(video_player[feat+'_reg'])
    return video_player

submission['video'] = submission['video_frame'].str.split('_').str[:3].str.join('_') + '.mp4'
sub2 = submission.groupby(['video', 'label']).apply(post_process)

submission2 = sub2[ss.columns].reset_index(drop = True)

submission2.to_csv('submission.csv', index = False)

In [20]:
!pip install ../input/helmet-assignment-helpers/helmet-assignment-main/ > /dev/null 2>&1
from helmet_assignment.score import NFLAssignmentScorer, check_submission

if not train:
    data_labels = pd.read_csv('../input/nfl-health-and-safety-helmet-assignment/train_labels.csv')
    data_labels = data_labels[data_labels['playID'].isin([718, 109, 2798])].copy()

scorer = NFLAssignmentScorer(data_labels)
scorer.score(submission)

0.7268619197511297

In [21]:
submission2[["left", "top", "height", "width"]] = submission2[["left", "top", "height", "width"]].astype('int')
submission2.drop_duplicates(subset = ['video_frame', 'left', 'height', 'width', 'top'], inplace=True) 
c1 = (submission2.left >= 0)
c2 = (submission2.top >= 0)
c3 = (submission2.left + submission2.width  <= 1280)
c4 = (submission2.top + submission2.height  <= 720)
c5 = (submission2.height >= 0)
c6 = (submission2.width >= 0)
submission2 = submission2[c1 & c2 & c3 & c4 & c5 & c6]
scorer.score(submission2)

0.7366442319742283

In [22]:
submission2.to_csv("submission.csv", index=False) 

In [23]:
import subprocess
def video_with_predictions(
    video_path: str, sub_labels: pd.DataFrame, max_frame=9999, freeze_impacts=True,
    verbose=True
) -> str:
    """
    Annotates a video with both the baseline model boxes and ground truth boxes.
    """
    VIDEO_CODEC = "MP4V"
    HELMET_COLOR = (0, 0, 0)  # Black
    
    INCORRECT_IMPACT_COLOR = (0, 0, 255)  # Red
    CORRECT_IMPACT_COLOR = (51, 255, 255)  # Yellow

    CORRECT_COLOR = (0, 255, 0)  # Green
    INCORRECT_COLOR = (255, 0, 128)  # Rose
    WHITE = (255, 255, 255)  # White 

    video_name = os.path.basename(video_path).replace(".mp4", "")
    if verbose:
        print(f"Running for {video_name}")
    sub_labels = sub_labels.copy()
    # Add frame and video columns:
    sub_labels['video'] = sub_labels['video_frame'].str.split('_').str[:3].str.join('_')
    sub_labels['frame'] = sub_labels['video_frame'].str.split('_').str[-1].astype('int')

    vidcap = cv2.VideoCapture(video_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)/5
    width = int(vidcap.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(vidcap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    output_path = "pred_" + video_name + ".mp4"
    tmp_output_path = "tmp_" + output_path
    output_video = cv2.VideoWriter(
        tmp_output_path, cv2.VideoWriter_fourcc(*VIDEO_CODEC), fps, (width, height)
    )
    frame = 0
    while True:
        it_worked, img = vidcap.read()
        if not it_worked:
            break
        frame += 1

        img_name = f"{frame} : {video_name}"
        cv2.putText(
            img,
            img_name,
            (5, 20),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            WHITE,
            thickness=1,
        )
        
        cv2.putText(
            img,
            str(frame),
            (1230, 710),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.8,
            WHITE,
            thickness=1,
        )
        # Get stats about current state in frame
        stats = sub_labels.query('video == @video and frame <= @frame')
        correct_nonimp = len(stats.query('weight == 1 and isCorrect'))
        total_nonimp = len(stats.query('weight == 1'))
        correct_imp = len(stats.query('weight > 1 and isCorrect'))
        total_imp = len(stats.query('weight > 1'))
        correct_weighted = correct_nonimp + (correct_imp * 1000)
        total_weighted = total_nonimp + (total_imp * 1000)
        acc_imp = correct_imp/np.max([1, total_imp])
        acc_nonimp = correct_nonimp/np.max([1, total_nonimp])
        acc_weighted = correct_weighted/np.max([1, total_weighted])
        cv2.putText(
            img,
            f'{acc_imp:0.4f} Impact Boxes Accuracy :      ({correct_imp}/{total_imp})',
            (5, 40),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            WHITE,
            thickness=1,
        )

        cv2.putText(
            img,
            f'{acc_nonimp:0.4f} Non-Impact Boxes Accuracy: ({correct_nonimp}/{total_nonimp})',
            (5, 60),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            WHITE,
            thickness=1,
        )
        
        cv2.putText(
            img,
            f'{acc_weighted:0.4f} Weighted Accuracy:     ({correct_weighted}/{total_weighted})',
            (5, 80),
            cv2.FONT_HERSHEY_SIMPLEX,
            0.5,
            WHITE,
            thickness=1,
        )

        
        video_frame = f'{video_name}_{frame}' 
        boxes = sub_labels.query("video_frame == @video_frame")
        if len(boxes) == 0:
            return
        for box in boxes.itertuples(index=False):
            if box.isCorrect and box.weight == 1:
                # CORRECT
                box_color = CORRECT_COLOR
                gt_color = CORRECT_COLOR
                pred_thickness = 1
            elif box.isCorrect and box.weight > 1:
                box_color = CORRECT_IMPACT_COLOR
                gt_color = CORRECT_IMPACT_COLOR
                pred_thickness = 3
            elif (box.isCorrect == False) and (box.weight > 1):
                box_color = INCORRECT_IMPACT_COLOR
                gt_color = INCORRECT_IMPACT_COLOR
                pred_thickness = 3
            elif (box.isCorrect == False) and (box.weight == 1):                
                box_color = INCORRECT_COLOR
                gt_color = HELMET_COLOR
                pred_thickness = 1

            # Ground Truth Box
            cv2.rectangle(
                img,
                (box.left_gt, box.top_gt),
                (box.left_gt + box.width_gt, box.top_gt + box.height_gt),
                gt_color,
                thickness=1,
            )
            # Prediction Box
            cv2.rectangle(
                img,
                (int(box.left_sub), int(box.top_sub)),
                (int(box.left_sub + box.width_sub), int(box.top_sub + box.height_sub)),
                box_color,
                thickness=pred_thickness,
            )

            cv2.putText(
                img,
                f"{box.label_gt}:{box.label_sub}",
                (max(0, box.left_gt - box.width_gt), max(0, box.top_gt - 5)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                WHITE,
                thickness=1,
            )

        if boxes['weight'].sum() > 22 and freeze_impacts:
            for _ in range(60):
                # Freeze for 60 frames on impacts
                output_video.write(img)
        else:
            output_video.write(img)
        
        if frame >= max_frame:
            break
        
    output_video.release()
    # Not all browsers support the codec, we will re-load the file at tmp_output_path
    # and convert to a codec that is more broadly readable using ffmpeg
    if os.path.exists(output_path):
        os.remove(output_path)
    subprocess.run(
        [
            "ffmpeg",
            "-i",
            tmp_output_path,
            "-crf",
            "18",
            "-preset",
            "veryfast",
            "-vcodec",
            "libx264",
            output_path,
        ]
    )
    os.remove(tmp_output_path)

    return output_path

In [24]:
# #from helmet_assignment.video import video_with_predictions
# from IPython.display import Video, display

# submission['video'] = submission['video_frame'].str.split('_').str[:3].str.join('_') + '.mp4'
# debug_videos = submission2'video'].unique()

# # Create video showing predictions for one of the videos.
# video_out = video_with_predictions(
#     f'../input/nfl-health-and-safety-helmet-assignment/train/{debug_videos[0]}',
#     scorer.sub_labels,  freeze_impacts=False)

# frac = 1 # scaling factor for display
# display(Video(data=video_out,
#               embed=True,
#               height=int(720*frac),
#               width=int(1280*frac))
#        )