In [None]:
%config Completer.use_jedi = False

In [None]:
import os
os.mkdir('label_assignment')

In [None]:
%%capture
!cp -r /kaggle/input/gmmreglib /kaggle/working/label_assignment/gmmreg-install
%cd /kaggle/working/label_assignment/gmmreg-install/src
!python setup.py install --user
%cd /kaggle/working

In [None]:
!cp -r /kaggle/input/helmet-assignment-helpers/helmet-assignment-main/helmet_assignment /kaggle/working/label_assignment/helmet_assignment

# Point cloud matching algorithm

## Main Functions

In [None]:
%%writefile label_assignment/__init__.py




In [None]:
%cd /kaggle/working

In [None]:
!ls label_assignment/helmet_assignment

In [None]:
%%writefile label_assignment/all.py

import pandas as pd
import numpy as np
import os
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist
from gmmreg._core import run_multi_level, normalize
from .helmet_assignment.features import add_track_features
from functools import partial


class DataLoader():
    def __init__(self, preds, is_train = True, flip_y = True, top22 = False):
        if is_train:
            track = pd.read_csv('/kaggle/input/nfl-health-and-safety-helmet-assignment/train_player_tracking.csv')
        else:
            track = pd.read_csv('/kaggle/input/nfl-health-and-safety-helmet-assignment/test_player_tracking.csv')
        track['y'] = -track['y']
        self.track = add_track_features(track).query('est_frame > 0').reset_index()
        self.videos = pd.Series(list(map(lambda x: splitjoin(x, ':-1'), preds['video_frame'].unique()))).sort_values().unique()
        
        if 'conf' not in preds.columns:
            print('"conf" column missing in "preds" DataFrame, filling with 1...')
            preds['conf'] = 1
        if 'id' not in preds.columns:
            print('"id" column missing in "preds" DataFrame, filling with unique values...')
            preds['id'] = range(len(preds))
        if top22:
            preds = preds.sort_values('conf').groupby('video_frame').head(22).sort_index().reset_index(drop = True)

        self.preds = preds
        
    def __nearest__(self, frame):
        idx = abs(self.track_est_frames - frame).argmin()
        frame = self.track_est_frames[idx]
        return self.gameplay_track.set_index('est_frame').loc[frame]
        
    def filter_video(self, video):
        gameplay, view = splitjoin(video, [':-1', '-1:'])
        self.video = video
        self.gameplay = gameplay
        self.view = view
        self.gameplay_track = self.track.query(f"game_play == '{gameplay}'")
        self.video_preds = self.preds.query(f"video_frame.str.contains('{video}')", engine='python')
        self.track_est_frames = self.gameplay_track['est_frame'].unique()
        self.frames = (self
                       .video_preds['video_frame']
                       .drop_duplicates()
                       .apply(splitjoin, keep = '-1:')
                       .astype('int')
                       .sort_values()
                       .values
                      )

    def __call__(self, frame, method = 'nearest'):
        if not hasattr(self, 'gameplay_track'):
            raise ValueError("You must call 'filter_video' before calling the generator")
        if method == 'nearest':
            track = self.__nearest__(frame)
            xy_track = track[['x', 'y']].values
            label_track = track['player'].values
        else:
            #todo implement interpolation on frames
            raise ValueError("Only 'nearest' method is implemented so far")
        
        video_frame = splitjoin(self.video_preds['video_frame'].values[0], ':-1') + f'_{frame}'
        video_preds = self.video_preds.set_index('video_frame').loc[video_frame]
        xy_video = ltwh2xcyc(video_preds)
        label_video = video_preds['id'].values
        
        return xy_video, xy_track, label_video, label_track
    
    def bbox(self, frame):
        video_frame = splitjoin(self.video_preds['video_frame'].values[0], ':-1') + f'_{frame}'
        bbox = (self
                .video_preds
                .set_index('video_frame')
                .loc[video_frame, ['left', 'width', 'top', 'height']]
                .values)
        return bbox
              
def rotate(xy, theta):
    t = theta * np.pi/180
    R = np.array([[np.cos(t), -np.sin(t)],
                  [np.sin(t),  np.cos(t)]])
    return xy @ R

def normalize(xy):
    return (xy - xy.mean(axis = 0))/xy.std(axis = 0)

def splitjoin(string, keep):
    splitted = string.split('_')
    if isinstance(keep, list):
        joint = [f"'_'.join(splitted[{k}])" for k in keep]
        joint = tuple(map(eval, joint))
    else:
        joint = eval(f"'_'.join(splitted[{keep}])")
    return joint

def ltwh2xcyc(df):
    xc = df['left'] + df['width']/2
    yc = df['top'] + df['height']/2
    xcyc = np.vstack([xc.values, yc.values]).T
    return xcyc

def register(xy_source, xy_target, theta, n_grid, **kwargs):
    grid = np.linspace(-2, 2, n_grid)
    grid = np.array(np.meshgrid(grid, grid)).T.reshape(-1,2)
    xy_target = normalize(xy_target)
    xy_source = normalize(xy_source)
    xy_source = rotate(xy_source, theta)
    xy_source = run_multi_level(xy_source, xy_target, grid, **kwargs)
    return xy_source, xy_target

def label_matrix(matrix, label_row, label_col):
    return pd.DataFrame(matrix, index = label_row, columns = label_col)

def get_optimal_theta(xy_video, xy_tracking, thetas, **kwargs):
    if thetas == 'Endzone':
        thetas = [-90, 90]
    elif thetas == 'Sideline':
        thetas = [0, 180]
    scores = []
    for theta in thetas:
        xy_video_r, xy_tracking_r = register(xy_video, xy_tracking, theta, **kwargs)
        dist = cdist(xy_video_r, xy_tracking_r)
        M = linear_sum_assignment(dist)
        score = dist.mean()/dist[M].mean()
        scores.append(score)
    scores = np.array(scores)
    return thetas[scores.argmax()]

from statistics import mode
def estimate_theta(dl, frames = [1,21,41,61,81,101,121], **kwargs):
    thetas = []
    for frame in frames:
        xy_video, xy_tracking, labels_video, labels_tracking = dl(frame)
        theta = get_optimal_theta(xy_video, xy_tracking, dl.view, **kwargs) 
        thetas.append(theta)
    theta = mode(thetas) 
    return theta

def match_video(dl, theta, **kwargs):
    video_dist = []
    for frame in dl.frames:
        xy_video, xy_tracking, labels_video, labels_tracking = dl(frame) 
        xy_video, xy_tracking = register(xy_video, xy_tracking, theta, **kwargs)
        dist = cdist(xy_video, xy_tracking)
        dist = label_matrix(dist, labels_video, labels_tracking)
        video_dist.append(dist)
    return video_dist

def assign_labels(dl, video_agg_dist):
    video_labels = []
    idx_video = []
    for frame in dl.frames:
        _, _, labels_video, labels_tracking = dl(frame)
        dist = video_agg_dist.loc[labels_video, labels_tracking]
        M = linear_sum_assignment(dist)
        video_labels.append(labels_tracking[M[1]])
        idx_video.append(M[0])
    return video_labels, idx_video

def build_submission_for_video(dl, labels, idx_video):
    video_sub = []
    for frame, label, idx in zip(dl.frames, labels, idx_video):
        frame_sub = pd.DataFrame({
            'video_frame': f'{dl.video}_{frame}',
            'label': label,
        })
        frame_sub[['left', 'width', 'top', 'height']] = dl.bbox(frame)[idx]
        video_sub.append(frame_sub)
    video_sub = pd.concat(video_sub)
    return video_sub


def track2sub(dl, **kwargs):
    ## Estimate camera angle
    ### Estimate camera angle by minimizing the matching distance and 
    ### get the mode of the best matches for multiple frames
    theta = estimate_theta(dl, **kwargs)
    
    ## Generate a list of distance dataframes (named matrix)
    ### register the point clouds for all frames and returns a list of named
    ### distance matrix (row names are pseudo_labels and col names are tracking labels)
    video_dist = match_video(dl, theta, **kwargs)

    ## Aggregate the list of distance dataframes to a single distance dataframe
    ### For now this is simple but could be replaced for a more complex function
    video_agg_dist = pd.concat(video_dist).groupby(level=0).agg('mean')
    
    ## Label assignment based on aggregated distance
    ### Uses hungarian algorithm to match based on the aggregated distance matrix
    video_labels, idx_video = assign_labels(dl, video_agg_dist)
    
    ## Submission generation for a video
    ### replace the labels on the original bbox dataframe
    video_sub = build_submission_for_video(dl, video_labels, idx_video)
    
    return video_sub, theta

class Register():
    def __init__(self, algo = 'gmmreg', **kwargs):
        if algo == 'gmmreg':
            if 'n_grid' in kwargs:
                n_grid = kwargs.pop('n_grid')
                grid = np.linspace(-2, 2, n_grid)
                self.grid = np.array(np.meshgrid(grid, grid)).T.reshape(-1,2)
            else:
                self.grid = None
            self.algo = partial(run_multi_level, **kwargs)
        else:
            raise ValueError('Only gmmreg is implemented')
    def __call__(self, src, trg):
        if self.grid is None: grid = src
        else: grid = self.grid
        return self.algo(src, trg, grid)
    
    
def match_videoV2(dl, theta, **kwargs):
    
    reg_gmm = Register(**kwargs)
    video_dist = []
    for frame in dl.frames:
        xy_video, xy_tracking, labels_video, labels_tracking = dl(frame) 
        xy_tracking = normalize(xy_tracking)
        xy_video = normalize(xy_video)
        xy_video = rotate(xy_video, theta)
        if frame == 1:
            _xy_video = xy_tracking
        xy_video = reg_gmm(xy_video, _xy_video)

        dist = cdist(xy_video, xy_tracking)
        M = linear_sum_assignment(dist)
        _xy_video = xy_tracking[M[1]]
        
        d_m = dist[M].mean()
        w_m = 1/(1 + np.exp(-(len(M[0])-8)/2))
        dist = label_matrix(dist, labels_video, labels_tracking)
        video_dist.append(dist * d_m / w_m)
    return video_dist 

def track2subV2(dl, **kwargs):
    theta = estimate_theta(dl, **kwargs)
    video_dist = match_videoV2(dl, theta, **kwargs)
    video_agg_dist = pd.concat(video_dist).groupby(level=0).agg('mean')
    video_labels, idx_video = assign_labels(dl, video_agg_dist)
    video_sub = build_submission_for_video(dl, video_labels, idx_video)
    return video_sub, theta

In [None]:
%%writefile label_assignment/utils.py
def fix_submission(sub):
    
    n_na = sub.isna().any(axis = 1).sum()
    if n_na:
        sub = sub.dropna()
        print(f'Dropped {n_na} lines from submission')
        
    n_dupe_labels = sub[["video_frame", "label"]].duplicated().sum()
    if n_dupe_labels:
        sub = sub.drop_duplicates(['video_frame', 'label'])
        print(f'Dropped {n_dupe_labels} duplicated labels')
    
    n_dupe_bbox = sub[["video_frame", "left", "width", "top", "height"]].duplicated().sum()
    if n_dupe_bbox:
        sub = sub.drop_duplicates(['video_frame', 'left','width','top','height'])
        print(f'Dropped {n_dupe_bbox} duplicated bboxes')
    
    n_over_22 = (sub.groupby(["video_frame"])["label"].count() > 22).sum()
    if n_over_22:
        sub = sub.groupby("video_frame").head(22)
        print(f'Dropped {n_over_22} extra bboxes')

    n_out_of_bounds = (
        (sub['left'] < 0) | 
        (sub['top'] < 0) | 
        ((sub['left'] + sub['width']) > 1280) | 
        ((sub['top'] + sub['height']) > 720)
    ).sum()
    if n_out_of_bounds:
        sub['right'] = sub['left'] + sub['width']
        sub['bottom'] = sub['top'] + sub['height']
        
        sub['left'] = sub['left'].clip(0, 1280-1)
        sub['right'] = sub['right'].clip(1, 1280)
        sub['top'] = sub['top'].clip(0, 720-1)
        sub['bottom'] = sub['bottom'].clip(1, 720)
        
        sub['width'] = sub['right'] - sub['left']
        sub['height'] = sub['bottom'] - sub['top']
        sub = sub.drop(['bottom', 'right'], axis = 1)
        print(f'Clipped {n_out_of_bounds} bboxes')
        
    return sub

In [None]:
import sys
sys.path.append("/kaggle/label_assignment")
from label_assignment.all import *

# Testing

In [None]:
import sys
sys.path.append("/kaggle/input/helmet-assignment-helpers/helmet-assignment-main")
from helmet_assignment.score import NFLAssignmentScorer
from helmet_assignment.video import video_with_predictions

In [None]:
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from termcolor import colored

In [None]:
## GT Rotations
gt_rotations = pd.read_csv('../input/nlf-helmet-safety-camera-rotations/NFL-rotations-plays.csv').set_index('play')
gt_rotations = pd.DataFrame({
    'video' : np.concatenate([(gt_rotations.index + '_Sideline').values, (gt_rotations.index + '_Endzone').values]),
    'rotation' : np.concatenate([gt_rotations['Sideline'].values, gt_rotations['Endzone'].values])
}).set_index('video').sort_index()

## GT Labels
gt_labels = pd.read_csv('/kaggle/input/nfl-health-and-safety-helmet-assignment/train_labels.csv')

In [None]:
N_VIDEOS = 6
labeler_cfg_Endzone = {
    'level': 3, 
    'scales':  [1, 0.2, 0.1], 
    'lambdas': [0.1, 0.04, 0.02], 
    'iters':   [30, 20, 10],
    'n_grid': 5
}
labeler_cfg_Sideline = {
    'level': 3, 
    'scales':  [1, 0.5, 0.25], 
    'lambdas': [1, 0.02, 0.25], 
    'iters':   [30, 20, 10],
    'n_grid': 5
}

### Testing with GT data

In [None]:
sorted_bboxes = pd.read_csv('/kaggle/input/nfl-health-and-safety-helmet-assignment/train_labels.csv').query('frame > 0')
sorted_bboxes['id'] = sorted_bboxes['label']
dl = DataLoader(sorted_bboxes)

acc = []
sub = []
for video in tqdm(dl.videos[:N_VIDEOS]):
    
    #=#=# Ground truth angle of rotation for dubugging (REMOVE ON INFERENCE) #=#=#
    gt_theta = gt_rotations.loc[video, 'rotation']
    #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#   

    ## Filtering the dataloader to return the data of a single video
    dl.filter_video(video = video)

    ## generating submission for a single video
    if 'Sideline' in video:
        video_sub, theta = track2subV2(dl, **labeler_cfg_Sideline)
    else:
        video_sub, theta = track2subV2(dl, **labeler_cfg_Endzone)

    sub.append(video_sub)
    
    #=#=# Scoring for debugging (REMOVE ON INFERENCE) #=#=#
    scorer = NFLAssignmentScorer(gt_labels.query(f'video == "{video}.mp4"'), impact_weight=1)
    video_acc = scorer.score(video_sub)
    scorer = NFLAssignmentScorer(gt_labels.query(f'video == "{video}.mp4"'))
    video_score = scorer.score(video_sub)
    #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
    
    ## Print video metrics
    print(f'Video: {video:<21} | Acc.:',
          colored(f'{video_acc:.3f}','green' if video_acc > 0.9 else ('yellow' if video_acc > 0.7 else 'red')),
          '| Score:',
          colored(f'{video_score:.3f}','green' if video_score > 0.9 else ('yellow' if video_score > 0.7 else 'red')),
          '| Angle:',
          colored(f'{theta:>3}째', 'green' if theta == gt_theta else 'red')
         )
    acc.append(video_acc)

scorer = NFLAssignmentScorer(gt_labels)
score = scorer.score(pd.concat(sub))
acc = np.array(acc)

print(f'Mean Accuracy: {acc.mean():.3f}')
print(f'Mean Score: {score:.3f}')
plt.hist(acc);

### Testing with GT bboxes (no tracking)

In [None]:
sorted_bboxes = pd.read_csv('/kaggle/input/nfl-health-and-safety-helmet-assignment/train_labels.csv').query('frame > 0')
dl = DataLoader(sorted_bboxes)

acc = []
sub = []
for video in tqdm(dl.videos[:N_VIDEOS]):
    
    #=#=# Ground truth angle of rotation for dubugging (REMOVE ON INFERENCE) #=#=#
    gt_theta = gt_rotations.loc[video, 'rotation']
    #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#   

    ## Filtering the dataloader to return the data of a single video
    dl.filter_video(video = video)

    ## generating submission for a single video
    if 'Sideline' in video:
        video_sub, theta = track2subV2(dl, **labeler_cfg_Sideline)
    else:
        video_sub, theta = track2subV2(dl, **labeler_cfg_Endzone)

    sub.append(video_sub)
    
    #=#=# Scoring for debugging (REMOVE ON INFERENCE) #=#=#
    scorer = NFLAssignmentScorer(gt_labels.query(f'video == "{video}.mp4"'), impact_weight=1)
    video_acc = scorer.score(video_sub)
    scorer = NFLAssignmentScorer(gt_labels.query(f'video == "{video}.mp4"'))
    video_score = scorer.score(video_sub)
    #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
    
    ## Print video metrics
    print(f'Video: {video:<21} | Acc.:',
          colored(f'{video_acc:.3f}','green' if video_acc > 0.9 else ('yellow' if video_acc > 0.7 else 'red')),
          '| Score:',
          colored(f'{video_score:.3f}','green' if video_score > 0.9 else ('yellow' if video_score > 0.7 else 'red')),
          '| Angle:',
          colored(f'{theta:>3}째', 'green' if theta == gt_theta else 'red')
         )
    acc.append(video_acc)

scorer = NFLAssignmentScorer(gt_labels)
score = scorer.score(pd.concat(sub))
acc = np.array(acc)

print(f'Mean Accuracy: {acc.mean():.3f}')
print(f'Mean Score: {score:.3f}')
plt.hist(acc);

## Baseline helmets

In [None]:
sorted_bboxes = pd.read_csv('/kaggle/input/nfl-health-and-safety-helmet-assignment/train_baseline_helmets.csv')
dl = DataLoader(sorted_bboxes)

acc = []
sub = []
for video in tqdm(dl.videos[:N_VIDEOS]):
    
    #=#=# Ground truth angle of rotation for dubugging (REMOVE ON INFERENCE) #=#=#
    gt_theta = gt_rotations.loc[video, 'rotation']
    #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#   

    ## Filtering the dataloader to return the data of a single video
    dl.filter_video(video = video)

    ## generating submission for a single video
    if 'Sideline' in video:
        video_sub, theta = track2subV2(dl, **labeler_cfg_Sideline)
    else:
        video_sub, theta = track2subV2(dl, **labeler_cfg_Endzone)

    sub.append(video_sub)
    
    #=#=# Scoring for debugging (REMOVE ON INFERENCE) #=#=#
    scorer = NFLAssignmentScorer(gt_labels.query(f'video == "{video}.mp4"'), impact_weight=1)
    video_acc = scorer.score(video_sub)
    scorer = NFLAssignmentScorer(gt_labels.query(f'video == "{video}.mp4"'))
    video_score = scorer.score(video_sub)
    #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
    
    ## Print video metrics
    print(f'Video: {video:<21} | Acc.:',
          colored(f'{video_acc:.3f}','green' if video_acc > 0.9 else ('yellow' if video_acc > 0.7 else 'red')),
          '| Score:',
          colored(f'{video_score:.3f}','green' if video_score > 0.9 else ('yellow' if video_score > 0.7 else 'red')),
          '| Angle:',
          colored(f'{theta:>3}째', 'green' if theta == gt_theta else 'red')
         )
    acc.append(video_acc)

scorer = NFLAssignmentScorer(gt_labels)
score = scorer.score(pd.concat(sub))
acc = np.array(acc)

print(f'Mean Accuracy: {acc.mean():.3f}')
print(f'Mean Score: {score:.3f}')
plt.hist(acc);

## Deepsorted

In [None]:
sorted_bboxes = pd.read_csv('/kaggle/input/nfl-csv-dataset/tracked_detections.csv')
dl = DataLoader(sorted_bboxes)

acc = []
sub = []
for video in tqdm(dl.videos[:N_VIDEOS]):
    
    #=#=# Ground truth angle of rotation for dubugging (REMOVE ON INFERENCE) #=#=#
    gt_theta = gt_rotations.loc[video, 'rotation']
    #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#   

    ## Filtering the dataloader to return the data of a single video
    dl.filter_video(video = video)

    ## generating submission for a single video
    if 'Sideline' in video:
        video_sub, theta = track2subV2(dl, **labeler_cfg_Sideline)
    else:
        video_sub, theta = track2subV2(dl, **labeler_cfg_Endzone)

    sub.append(video_sub)
    
    #=#=# Scoring for debugging (REMOVE ON INFERENCE) #=#=#
    scorer = NFLAssignmentScorer(gt_labels.query(f'video == "{video}.mp4"'), impact_weight=1)
    video_acc = scorer.score(video_sub)
    scorer = NFLAssignmentScorer(gt_labels.query(f'video == "{video}.mp4"'))
    video_score = scorer.score(video_sub)
    #=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#=#
    
    ## Print video metrics
    print(f'Video: {video:<21} | Acc.:',
          colored(f'{video_acc:.3f}','green' if video_acc > 0.9 else ('yellow' if video_acc > 0.7 else 'red')),
          '| Score:',
          colored(f'{video_score:.3f}','green' if video_score > 0.9 else ('yellow' if video_score > 0.7 else 'red')),
          '| Angle:',
          colored(f'{theta:>3}째', 'green' if theta == gt_theta else 'red')
         )
    acc.append(video_acc)

scorer = NFLAssignmentScorer(gt_labels)
score = scorer.score(pd.concat(sub))
acc = np.array(acc)

print(f'Mean Accuracy: {acc.mean():.3f}')
print(f'Mean Score: {score:.3f}')
plt.hist(acc);