# Assign NGS player to FairMOT track

In [None]:
# # Developer

# %cd /kaggle
# from IPython.display import clear_output
# pat = input('GitHub Personal Access Token:')
# clear_output()
# ! git clone https://$pat@github.com/qAp/nfl_helmet_assignment_kaggle.git

# # %cd /kaggle/nfl_helmet_assignment_kaggle/
# # ! git pull

In [5]:
# User

! cp -r /kaggle/input/nfl-helmet-assignment-kaggle/nfl_helmet_assignment_kaggle /kaggle/.

In [31]:
import os, sys
from tqdm.auto import tqdm
import random
import numpy as np
import pandas as pd
import cv2
import matplotlib
import matplotlib.pyplot as plt
from IPython.display import Video, display

sys.path.append('/kaggle/nfl_helmet_assignment_kaggle/')
from helmet_tracker.utils.features import add_track_features
from helmet_tracker.models.helmet_mapping import find_nearest

In [7]:
BASE_DIR = '/kaggle/input/nfl-health-and-safety-helmet-assignment'

labels = pd.read_csv(f'{BASE_DIR}/train_labels.csv')

In [8]:
# debug-train videos
dir_video = '/kaggle/input/nfl-health-and-safety-helmet-assignment/train'
pth_ss_hmap = '/kaggle/input/nfl-mydata/submission_helmtrack_debug.csv'
dir_demo = '/kaggle/input/nfldata05-fairmot-demo-trainsample/demo_debug'

# # test videos
# video_dir = '/kaggle/input/nfl-health-and-safety-helmet-assignment/test'
# submission_df = pd.read_csv('/kaggle/input/nfl-mydata/submission_helmtrack.csv')
# dir_demo = None

In [9]:
! ls {dir_demo}

57700_001264_Endzone   57783_003374_Endzone   57997_003691_Endzone
57700_001264_Sideline  57783_003374_Sideline  57997_003691_Sideline


In [10]:
video = '57783_003374_Sideline'

# Load MOT tracks

In [11]:
# Load FairMOT inference output txt
pth_fmot = os.path.join(dir_demo, video, 'results.txt')
columns = ['frame', 'id', 'x1', 'y1', 'w', 'h']
df_fmot = pd.read_csv(pth_fmot, header=None, usecols=range(len(columns)), 
                      names=columns)

# Find height and width of video
pth_video = os.path.join(dir_video, f'{video}.mp4')
cap = cv2.VideoCapture(pth_video)
fps    = cap.get(cv2.CAP_PROP_FPS)
width  = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)

# Rescale output bboxes to match video's dimensions
fmot_video_width = 1920
fmot_video_height = 1080
x_scale = width / fmot_video_width
y_scale = height / fmot_video_height
df_fmot[['x1', 'w']] = x_scale * df_fmot[['x1', 'w']]
df_fmot[['y1', 'h']] = y_scale * df_fmot[['y1', 'h']]

# Rename columns to be like for DeepSORT
df_fmot.rename(columns={'id': 'fairmot_cluster', 
                        'x1': 'left', 
                        'y1': 'top', 
                        'w': 'width', 
                        'h': 'height'}, 
               inplace=True)

for c in ['left', 'top', 'width', 'height']:
    df_fmot[c] = df_fmot[c].astype(int)

In [12]:
df_fmot.shape

(8334, 6)

# Load helmet mapping results

In [13]:
ss_hmap = pd.read_csv(pth_ss_hmap)

video_frame = ss_hmap['video_frame'].str.split('_')
ss_hmap['video'] = video_frame.str[:3].str.join('_')
ss_hmap['frame'] = video_frame.str[3].astype(int)

In [14]:
%%time

df_hmap = ss_hmap.query('video==@video')

CPU times: user 19.2 ms, sys: 2 µs, total: 19.2 ms
Wall time: 20.2 ms


In [15]:
df_hmap.shape

(8916, 8)

# Iterate over frames, merging bounding boxes

In [16]:
frames = df_hmap['frame'].unique()

In [17]:
%%time

df_tracks = []
for frame in tqdm(frames, total=len(frames)):
    hmap = df_hmap[df_hmap['frame'] == frame].copy()
    fmot = df_fmot[df_fmot['frame'] == frame].copy()
    
    # Sort by 'left', then merge by matching the nearest left
    hmap.sort_values(['left'], axis=0, inplace=True)
    fmot.sort_values(['left'], axis=0, inplace=True)
    merged = pd.merge_asof(hmap, fmot, on='left', direction='nearest', suffixes=('', '_fairmot'))

    # For duplicated fairmot_cluster, choose the best matched top.
    # Note this removes excess baseline helmets
    merged['dtop'] = (merged['top'] - merged['top_fairmot']).abs()
    merged.sort_values('dtop', inplace=True)
    merged = merged.groupby('fairmot_cluster').first().reset_index()
    
    df_tracks.append(merged)
    
df_tracks = pd.concat(df_tracks, axis=0)

  0%|          | 0/451 [00:00<?, ?it/s]

CPU times: user 5.1 s, sys: 197 ms, total: 5.29 s
Wall time: 5.01 s


In [18]:
df_hmap.shape, df_tracks.shape, df_fmot.shape

((8916, 8), (7617, 14), (8334, 6))

# Assign player number to track

`fairmot_cluster` is the MOT id. Across different frames, the same MOT id ought to have the same player number (`label`), as it represents some player moving through spacetime.  But at this stage it doesn't, so we count the number of occurences of each unique player number for a MOT id, then assign the one that occurs most frequently to the MOT id.  This results in each MOT id having *one* player number. 

In [19]:
%%time
cluster_grpd = (df_tracks
                .groupby('fairmot_cluster')['label']
                .value_counts()
                .sort_values(ascending=False)
                .to_frame()
                .rename(columns={'label': 'label_count'})
                .reset_index()
                .groupby('fairmot_cluster')
                )

CPU times: user 9.09 ms, sys: 1.91 ms, total: 11 ms
Wall time: 15 ms


In [20]:
%%time
cluster2player = cluster_grpd.first()['label'].to_dict()
cluster2player_count = cluster_grpd.first()['label_count'].to_dict()
cluster2player_total_count = cluster_grpd['label_count'].sum().to_dict()

CPU times: user 7.52 ms, sys: 1.15 ms, total: 8.67 ms
Wall time: 7 ms


In [21]:
%%time

df_tracks['label_fairmot'] = df_tracks['fairmot_cluster'].map(cluster2player)
df_tracks['label_count_fairmot'] = df_tracks['fairmot_cluster'].map(cluster2player_count)
df_tracks['label_cluster_count_fairmot'] = df_tracks['fairmot_cluster'].map(cluster2player_total_count)
df_tracks['label_perct_fairmot'] = (
    df_tracks['label_count_fairmot'] / df_tracks['label_cluster_count_fairmot']
)

CPU times: user 7.41 ms, sys: 937 µs, total: 8.35 ms
Wall time: 7.68 ms


# Dealing with duplicated and missing `label_fairmot`

At this point, there are 2 problems:
1. There are helmets missing a `label_fairmot`, though they all have at a `label`.
2. For a `video_frame`, there may be duplicated `label_fairmot`s.

In [22]:
def remove_duplicated_label_fairmot(df_tracks, keep_by='label_perct_fairmot'):
    df = df_tracks.copy()
    df.sort_values(keep_by, ascending=False, inplace=True)
    is_duped = df.duplicated(subset=['video_frame', 'label_fairmot'], keep='first')
    return df[~is_duped]

In [23]:
%%time

# This removes duplicates resulting from fairmot postprocessing
df_tracks = remove_duplicated_label_fairmot(df_tracks)

CPU times: user 11 ms, sys: 1.12 ms, total: 12.1 ms
Wall time: 12 ms


In [24]:
# Notice there are many helmets for which fairmot postprocessing
# has not provided a label (player number)
df_hmap.shape, df_tracks.shape

((8916, 8), (6828, 18))

In [25]:
# Use helmet mapping label where fairmot postprocessing
# has not provided one, assigning these backup labels
# as having a `label_count_fairmot` equal to 0.

cols_merge = ['video_frame', 'left', 'width', 'top', 'height', 'label', 'video', 'frame']
df_tracks = pd.merge(df_hmap, df_tracks, 
                     left_on=cols_merge, 
                     right_on=cols_merge,    
                     how='outer')

df_tracks['label_fairmot'].fillna(df_tracks['label'], inplace=True)
df_tracks['label_perct_fairmot'].fillna(1e-5, inplace=True)

In [26]:
# Now every helmet has a post-processed label
df_tracks.shape

(8916, 18)

In [27]:
# Again, there may still be numerous duplicated labels.
# Remove them once again.
df_tracks = remove_duplicated_label_fairmot(df_tracks)

In [28]:
# Notice now some helmets are missing a post-processed label
# agains, but there are fewer missing than before.  
df_tracks.shape

(7861, 18)

In [29]:
df_tracks.duplicated(subset=['video_frame', 'label_fairmot']).sum()

0

# Fill missing labels with unused NGS labels

In [32]:
tracking = pd.read_csv(f'{BASE_DIR}/train_player_tracking.csv')
tracking = add_track_features(tracking)

In [37]:
game, play, view = video.split('_')
game = int(game)
play = int(play)

df_ngs = tracking.query('gameKey==@game and playID==@play')
# est_frame = find_nearest(df_ngs['est_frame'].values, frame)
# df_ngs = df_ngs.query('est_frame==@est_frame')

In [36]:
df_tracks

Unnamed: 0,video_frame,left,width,top,height,label,video,frame,fairmot_cluster,frame_fairmot,top_fairmot,width_fairmot,height_fairmot,dtop,label_fairmot,label_count_fairmot,label_cluster_count_fairmot,label_perct_fairmot
7544,57783_003374_Sideline_447,472,32,403,33,V54,57783_003374_Sideline,447,249.0,447.0,401.0,30.0,27.0,2.0,V54,8.0,8.0,1.00000
1743,57783_003374_Sideline_172,586,12,267,13,V57,57783_003374_Sideline,172,84.0,172.0,260.0,8.0,7.0,7.0,V57,7.0,7.0,1.00000
3428,57783_003374_Sideline_242,868,16,341,14,H25,57783_003374_Sideline,242,209.0,242.0,341.0,14.0,13.0,0.0,H25,1.0,1.0,1.00000
3462,57783_003374_Sideline_243,830,18,303,14,H78,57783_003374_Sideline,243,213.0,243.0,237.0,14.0,14.0,66.0,H78,1.0,1.0,1.00000
3471,57783_003374_Sideline_244,882,16,342,12,H25,57783_003374_Sideline,244,217.0,244.0,341.0,14.0,12.0,1.0,H25,1.0,1.0,1.00000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2207,57783_003374_Sideline_192,530,12,326,12,V38,57783_003374_Sideline,192,,,,,,,V38,,,0.00001
2213,57783_003374_Sideline_192,612,13,360,12,H25,57783_003374_Sideline,192,,,,,,,H25,,,0.00001
6318,57783_003374_Sideline_385,640,37,329,36,V55,57783_003374_Sideline,385,,,,,,,V55,,,0.00001
7358,57783_003374_Sideline_438,693,36,370,46,V90,57783_003374_Sideline,438,,,,,,,V90,,,0.00001


# Drop helmet mapping labels and finalise columns for submission

In [None]:
%%time
ss = df_tracks.copy()

assert ss['label_fairmot'].isna().sum() == 0

ss.drop('label', axis=1, inplace=True)
ss.rename(columns={'label_fairmot': 'label'}, inplace=True)

columns = pd.read_csv(
    '/kaggle/input/nfl-health-and-safety-helmet-assignment/sample_submission.csv').columns
ss = ss[columns]

In [None]:
ss.head()

# Package dev

In [None]:
def load_demo_txt(pth_fmot, video_width=1280, video_height=720):
    '''
    Load FairMOT inference output txt for a video.
    
    Args:
        pth_fmot (str): Path to txt file output by FairMOT's demo.py
        video_width (int): Actual video width.  Default: 1280
        video_height (int): Actual video height.  Default: 720
        
    Returns:
        df_fmot (pd.DataFrame): MOT tracks of helmets. Each row is a 
            helmet in some frame, with an MOT tracking id under column
            'fairmot_cluster'.
    '''
    columns = ['frame', 'id', 'x1', 'y1', 'w', 'h']
    df_fmot = pd.read_csv(pth_fmot, header=None, usecols=range(len(columns)), 
                          names=columns)

    # Rescale output bboxes to match video's dimensions
    fmot_video_width = 1920
    fmot_video_height = 1080
    x_scale = video_width / fmot_video_width
    y_scale = video_height / fmot_video_height
    df_fmot[['x1', 'w']] = x_scale * df_fmot[['x1', 'w']]
    df_fmot[['y1', 'h']] = y_scale * df_fmot[['y1', 'h']]

    # Rename columns to be like for DeepSORT
    df_fmot.rename(columns={'id': 'fairmot_cluster', 
                            'x1': 'left', 
                            'y1': 'top', 
                            'w': 'width', 
                            'h': 'height'}, 
                   inplace=True)

    for c in ['left', 'top', 'width', 'height']:
        df_fmot[c] = df_fmot[c].astype(int)
        
    return df_fmot

In [None]:
def merge_hmap_fmot_bbox(hmap, fmot, drop_dupe_id=False):
    '''
    Merge helmet mapping bboxes and MOT bboxes for a single
    video frame.
    
    Args:
        hmap (pd.DataFrame): Each row a helmet, with columns such
            as 'video_frame', 'label', 'left', 'top', and 'bottom', etc.
        fmot (pd.DataFrame): Each row a helmet, with columns such
            as 'frame', 'fairmot_cluster', 'left', 'top', and 'width', etc.
        drop_dupe_id (bool): If True, repeated MOT ids (`fairmot_cluster`)
            are dropped but the one with 'top' that matches most closely
            with a helmet mapping box.  Otherwise, repeated MOT ids are
            kept.
            
    Returns:
        merged (pd.DataFrame): Each row a helmet with player number
            and MOT id, as well as bounding box properties.
    '''
    # Sort by 'left', then merge by matching the nearest left
    hmap.sort_values(['left'], axis=0, inplace=True)
    fmot.sort_values(['left'], axis=0, inplace=True)
    merged = pd.merge_asof(hmap, fmot, on='left', direction='nearest', suffixes=('', '_fairmot'))

    if drop_dupe_id:
        # For duplicated fairmot_cluster, choose the best matched top.
        # Note this removes excess baseline helmets
        merged['dtop'] = (merged['top'] - merged['top_fairmot']).abs()
        merged.sort_values('dtop', inplace=True)
        merged = merged.groupby('fairmot_cluster').first().reset_index()
    
    return merged

In [None]:
def assign_player_to_track(df):
    '''
    Assign a player number to each MOT id occurence in a 
    video.
    
    Args:
        df (pd.DataFrame): Each row a helmet, with 'label'
            the player number according to helmet mapping and
            'fairmot_cluster' the MOT id according to FairMOT.
    Returns:
        df (pd.DataFrame): Same as input but with player number
            after incorporating FairMOT tracks added under 'label_fairmot'.
            'label_count_fairmot', the number of occurences of the most
            frequent helmet mapping player number for each MOT id's track.
    '''
    grpd = (df
            .groupby('fairmot_cluster')['label']
            .value_counts()
            .sort_values(ascending=False)
            .to_frame()
            .rename(columns={'label': 'label_count'})
            .reset_index()
            .groupby('fairmot_cluster')
            )
    
    cluster2player = grpd.first()['label'].to_dict()
    cluster2player_count = grpd.first()['label_count'].to_dict()
    cluster2player_total_count = grpd['label_count'].sum().to_dict()

    df['label_fairmot'] = df['fairmot_cluster'].map(cluster2player)
    df['label_count_fairmot'] = df['fairmot_cluster'].map(cluster2player_count)
    df['label_cluster_count_fairmot'] = df['fairmot_cluster'].map(cluster2player_total_count)
    df['label_perct_fairmot'] = df['label_count_fairmot'] / df['label_cluster_count_fairmot']

    return df

In [None]:
def remove_duplicated_label_fairmot(df_tracks, keep_by='label_perct_fairmot'):
    '''
    For duplicated labels in the same video frame, keep one and discard the rest.
    
    Args:
        df_tracks (pd.DataFrame): Each row a helmet in some video frame. 
        keep_by (str): Options are:
            'label_perct_fairmot': Keep the label that takes up the largest
                percentage in its MOT cluster.
            'label_count_fairmot': Keep the label that has the largest
                number of count.
            Default: 'label_perct_fairmot'
            
    Returns:
        df (pd.DataFrame): Like input, but in each video frame, the labels
            are unique.
    '''
    df = df_tracks.copy()
    df.sort_values(keep_by, ascending=False, inplace=True)
    is_duped = df.duplicated(subset=['video_frame', 'label_fairmot'], keep='first')
    return df[~is_duped]

In [None]:
def fmot_postprocess_hmap(df_hmap, pth_fmot, 
                          pth_video=None,
                          drop_dupe_id=True,
                          keep_by='label_perct_fairmot',
                          hmap_fills_missing=True):
    '''
    Postprocess helmet mapping results using FairMOT tracks,
     for a video.
     
    Args:
        df_hmap (pd.DataFrame): Helmet mapping results for a video.
        pth_fmot (str): File path FairMOT results for a video, a txt file.
        pth_video (str, None): File path to the video.  If supplied,
            the video is loaded and width and height are extracted,
            to be used to rescale the FairMOT bboxes.
            
        drope_dupe_id (bool): If `True`, drop helmet in a video frame if
            it's labelled with a duplicated FairMOT id.
            
        keep_by (str): Options are:
            'label_perct_fairmot': Keep the label that takes up the largest
                percentage in its MOT cluster.
            'label_count_fairmot': Keep the label that has the largest
                number of count.
            Default: 'label_perct_fairmot'            
            
        hmap_fills_missing (bool): If `True`, fill missing FairMOT labels
            with helmet mapping labels.  Default: True
            
    Returns: 
        df_tracks (pd.DataFrame): Helmets assigned with player numbers
            that taking into account both helmet mapping and FairMOT,
            for a video.  In each video frame, all labels are unique, but
            there may be fewer helmets than there are in helmet mapping
            results.
    '''
    if pth_video is not None:
        cap = cv2.VideoCapture(pth_video)
        width  = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        df_fmot = load_demo_txt(pth_fmot, width, height)
    else:
        df_fmot = load_demo_txt(pth_fmot)
    
    frames = df_hmap['frame'].unique()
    df_tracks = []
    for frame in frames:
        hmap = df_hmap[df_hmap['frame'] == frame].copy()
        fmot = df_fmot[df_fmot['frame'] == frame].copy()
        
        merged = merge_hmap_fmot_bbox(hmap, fmot, drop_dupe_id)
        df_tracks.append(merged)
    
    df_tracks = pd.concat(df_tracks, axis=0)
    
    df_tracks = assign_player_to_track(df_tracks)
    
    # This removes duplicates resulting from fairmot postprocessing
    df_tracks = remove_duplicated_label_fairmot(df_tracks, keep_by)
    
    if hmap_fills_missing:
        # Use helmet mapping label where fairmot postprocessing
        # has not provided one, assigning these backup labels
        # as having a `label_count_fairmot` equal to 0.
        cols_merge = ['video_frame', 
                      'left', 'width', 'top', 'height', 
                      'label', 'video', 'frame']
        df_tracks = pd.merge(df_hmap, df_tracks, 
                             left_on=cols_merge, 
                             right_on=cols_merge,    
                             how='outer')

        df_tracks['label_fairmot'].fillna(df_tracks['label'], inplace=True)
        df_tracks[keep_by].fillna(1e-5, inplace=True)

        # Again, there may still be some duplicated labels.  Remove them once again.
        df_tracks = remove_duplicated_label_fairmot(df_tracks, keep_by)

    assert df_tracks.duplicated(subset=['video_frame', 'label_fairmot']).sum() == 0
    
    return df_tracks

In [None]:
# Load helmet mapping results
ss_hmap = pd.read_csv(pth_ss_hmap)
video_frame = ss_hmap['video_frame'].str.split('_')
ss_hmap['video'] = video_frame.str[:3].str.join('_')
ss_hmap['frame'] = video_frame.str[3].astype(int)

In [None]:
! ls {dir_demo}

In [None]:
%%time

video = '57783_003374_Sideline'

df_hmap = ss_hmap.query('video==@video')

pth_fmot = os.path.join(dir_demo, video, 'results.txt')
pth_video = os.path.join(dir_video, video + '.mp4')

df_tracks_ff = fmot_postprocess_hmap(df_hmap, pth_fmot, pth_video)

In [None]:
df_tracks.sort_values(['video_frame', 'left', 'top']).tail()

In [None]:
df_tracks_ff.sort_values(['video_frame', 'left', 'top']).tail()