This notebook uses CenterNet to detect the helmet as well as the player's orientation and the gap between the helmet and the sensor.

Following notetebooks are used here:
- https://www.kaggle.com/go5kuramubon/merge-label-and-tracking-data
- https://www.kaggle.com/bamps53/create-coco-format-annotations-train-val
- https://www.kaggle.com/robikscube/nfl-helmet-assignment-getting-started-guide
- https://www.kaggle.com/its7171/nfl-baseline-simple-helmet-mapping

In [None]:
debug=True   # use 10% sampling data for debug mode

In [None]:
import os
!pip install ../input/nfllibs/bounded_pool_executor-0.0.3-py3-none-any.whl
!pip install ../input/nfllibs/pqdm-0.1.0-py2.py3-none-any.whl
!pip install ../input/nfllibs/progress-1.6/progress-1.6
!pip install ../input/nfllibs/pycocotools-2.0.2/dist/pycocotools-2.0.2.tar
!pip install ../input/nfllibs/filterpy-1.4.5/filterpy-1.4.5

In [None]:
from PIL import Image
from matplotlib import pyplot as plt
from multiprocessing import Pool, cpu_count
from pqdm.processes import pqdm
from scipy.spatial import distance
from tqdm.auto import tqdm
import cv2
import glob
import itertools
import json
import numpy as np
import os
import pandas as pd
import random
import torch
tqdm.pandas()

# prepair data

In [None]:
# based on https://www.kaggle.com/go5kuramubon/merge-label-and-tracking-data
SAVE_DIR = '../train_images'
BASE_DIR = '../input/nfl-health-and-safety-helmet-assignment'
def add_track_features(tracks, fps=59.94, snap_frame=10):
    """
    Add column features helpful for syncing with video data.
    """
    tracks = tracks.copy()
    tracks["game_play"] = (
        tracks["gameKey"].astype("str")
        + "_"
        + tracks["playID"].astype("str").str.zfill(6)
    )
    tracks["time"] = pd.to_datetime(tracks["time"])
    snap_dict = (
        tracks.query('event == "ball_snap"')
        .groupby("game_play")["time"]
        .first()
        .to_dict()
    )
    tracks["snap"] = tracks["game_play"].map(snap_dict)
    tracks["isSnap"] = tracks["snap"] == tracks["time"]
    tracks["team"] = tracks["player"].str[0].replace("H", "Home").replace("V", "Away")
    tracks["snap_offset"] = (tracks["time"] - tracks["snap"]).astype(
        "timedelta64[ms]"
    ) / 1_000
    # Estimated video frame
    tracks["est_frame"] = (
        ((tracks["snap_offset"] * fps) + snap_frame).round().astype("int")
    )
    return tracks

def add_cols(df):
    df['frame'] = df['video_frame'].str.split('_').str[-1].astype(int)
    df['playID'] = df['video_frame'].str.split('_').str[1].astype(int)
    df['view'] = df['video_frame'].str.split('_').str[2]
    if 'video' not in df.columns:
        df['video'] = df['video_frame'].str.split('_').str[:3].str.join('_') + '.mp4'
    return df

def merge_label_and_tracking(tracking_df, label_df):
    tracking_with_game_index = tracking_df.set_index(["gameKey", "playID", "player"])
    df_list = []
    for key, _label_df in tqdm(label_df.groupby(["gameKey", "playID", "view", "label"])):
        # skip because there are sideline player
        if key[3] == "H00" or key[3] == "V00":
            continue
        tracking_data = tracking_with_game_index.loc[(key[0], key[1], key[3])]
        _label_df = _label_df.sort_values("frame")
        # merge with frame and est_frame
        merged_df = pd.merge_asof(
            _label_df,
            tracking_data,
            left_on="frame",
            right_on="est_frame",
            direction='nearest',
        )
        df_list.append(merged_df)
    all_merged_df = pd.concat(df_list)
    all_merged_df = all_merged_df.sort_values(["video_frame", "label"], ignore_index=True)
    
    return all_merged_df

def compute_overlap(boxes, query_box):
    #'XMin', 'YMin', 'XMax', 'YMax'
    N = boxes.shape[0]
    overlaps = np.zeros((N), dtype=np.float64)
    box_area = (
        (query_box[2] - query_box[0]) *
        (query_box[3] - query_box[1])
    )
    for n in range(N):
        iw = (
            min(boxes[n, 2], query_box[2]) -
            max(boxes[n, 0], query_box[0])
        )
        if iw > 0:
            ih = (
                min(boxes[n, 3], query_box[3]) -
                max(boxes[n, 1], query_box[1])
            )
            if ih > 0:
                ua = np.float64(
                    (boxes[n, 2] - boxes[n, 0]) *
                    (boxes[n, 3] - boxes[n, 1]) +
                    box_area - iw * ih
                )
                overlaps[n] = iw * ih / ua
    return overlaps

def add_xy(df):
    """
    Adds `x1`, `x2`, `y1`, and `y2` columns necessary for computing IoU.

    Note - for pixel math, 0,0 is the top-left corner so box orientation
    defined as right and down (height)
    """
    df["x1"] = df["left"]
    df["x2"] = df["left"] + df["width"]
    df["y1"] = df["top"]
    df["y2"] = df["top"] + df["height"]
    return df

def set_counts_columns(df, tgt, dummy):
    mapping_df = df[[tgt,dummy]].groupby(tgt).count().reset_index().rename(columns={dummy:f'{tgt}_counts'})
    mapping_dict = mapping_df.set_index(tgt).to_dict()[f'{tgt}_counts']
    df[f'{tgt}_counts'] = df[tgt].map(mapping_dict)
    return df, mapping_df


labels = pd.read_csv(f'{BASE_DIR}/train_labels.csv')
tracking = pd.read_csv(f'{BASE_DIR}/train_player_tracking.csv')
tracking = add_track_features(tracking)
labels = add_cols(labels)
labels = merge_label_and_tracking(tracking, labels)
labels['team'] = labels['label'].str[0].map({'H':0, 'V':1})
labels = add_xy(labels)
labels = labels[~(labels.frame == 0)]
labels, mapping_df = set_counts_columns(labels, 'video_frame', 'left')
labels = labels.reset_index()
tracking = tracking.reset_index()

In [None]:
# 4-fold CV
cv_video = [
{'57584_000336_Endzone.mp4', '57584_000336_Sideline.mp4', '57584_002674_Endzone.mp4', '57584_002674_Sideline.mp4', '57594_000923_Endzone.mp4', '57594_000923_Sideline.mp4', '57682_002630_Endzone.mp4', '57682_002630_Sideline.mp4', '57684_001985_Endzone.mp4', '57684_001985_Sideline.mp4', '57787_003413_Endzone.mp4', '57787_003413_Sideline.mp4', '57905_002404_Endzone.mp4', '57905_002404_Sideline.mp4', '57906_000718_Endzone.mp4', '57906_000718_Sideline.mp4', '57907_003615_Endzone.mp4', '57907_003615_Sideline.mp4', '57910_001164_Endzone.mp4', '57910_001164_Sideline.mp4', '57913_000218_Endzone.mp4', '57913_000218_Sideline.mp4', '57915_003093_Endzone.mp4', '57915_003093_Sideline.mp4', '58048_000086_Endzone.mp4', '58048_000086_Sideline.mp4', '58098_001193_Endzone.mp4', '58098_001193_Sideline.mp4'},
{'57676_003572_Endzone.mp4', '57676_003572_Sideline.mp4', '57775_000933_Endzone.mp4', '57775_000933_Sideline.mp4', '57778_004244_Endzone.mp4', '57778_004244_Sideline.mp4', '57781_000252_Endzone.mp4', '57781_000252_Sideline.mp4', '57783_003374_Endzone.mp4', '57783_003374_Sideline.mp4', '57911_000147_Endzone.mp4', '57911_000147_Sideline.mp4', '57911_002492_Endzone.mp4', '57911_002492_Sideline.mp4', '57912_001325_Endzone.mp4', '57912_001325_Sideline.mp4', '57992_000301_Endzone.mp4', '57992_000301_Sideline.mp4', '57992_000350_Endzone.mp4', '57992_000350_Sideline.mp4', '57993_000475_Endzone.mp4', '57993_000475_Sideline.mp4', '58093_001923_Endzone.mp4', '58093_001923_Sideline.mp4', '58094_000423_Endzone.mp4', '58094_000423_Sideline.mp4', '58094_002819_Endzone.mp4', '58094_002819_Sideline.mp4', '58102_002798_Endzone.mp4', '58102_002798_Sideline.mp4', '58104_000352_Endzone.mp4', '58104_000352_Sideline.mp4'},
{'57596_002686_Endzone.mp4', '57596_002686_Sideline.mp4', '57679_003316_Endzone.mp4', '57679_003316_Sideline.mp4', '57686_002546_Endzone.mp4', '57686_002546_Sideline.mp4', '57700_001264_Endzone.mp4', '57700_001264_Sideline.mp4', '57782_000600_Endzone.mp4', '57782_000600_Sideline.mp4', '57785_002026_Endzone.mp4', '57785_002026_Sideline.mp4', '57790_002792_Endzone.mp4', '57790_002792_Sideline.mp4', '57790_002839_Endzone.mp4', '57790_002839_Sideline.mp4', '57904_001367_Endzone.mp4', '57904_001367_Sideline.mp4', '57997_003691_Endzone.mp4', '57997_003691_Sideline.mp4', '57998_002181_Endzone.mp4', '57998_002181_Sideline.mp4', '58005_001254_Endzone.mp4', '58005_001254_Sideline.mp4', '58005_001612_Endzone.mp4', '58005_001612_Sideline.mp4', '58107_004362_Endzone.mp4', '58107_004362_Sideline.mp4'},
{'57583_000082_Endzone.mp4', '57583_000082_Sideline.mp4', '57586_000540_Endzone.mp4', '57586_000540_Sideline.mp4', '57586_001934_Endzone.mp4', '57586_001934_Sideline.mp4', '57586_004152_Endzone.mp4', '57586_004152_Sideline.mp4', '57597_000658_Endzone.mp4', '57597_000658_Sideline.mp4', '57597_001242_Endzone.mp4', '57597_001242_Sideline.mp4', '57680_002206_Endzone.mp4', '57680_002206_Sideline.mp4', '57680_003470_Endzone.mp4', '57680_003470_Sideline.mp4', '57784_001741_Endzone.mp4', '57784_001741_Sideline.mp4', '57786_003085_Endzone.mp4', '57786_003085_Sideline.mp4', '57788_000781_Endzone.mp4', '57788_000781_Sideline.mp4', '57995_000109_Endzone.mp4', '57995_000109_Sideline.mp4', '58000_001306_Endzone.mp4', '58000_001306_Sideline.mp4', '58095_004022_Endzone.mp4', '58095_004022_Sideline.mp4', '58103_003494_Endzone.mp4', '58103_003494_Sideline.mp4', '58106_002918_Endzone.mp4', '58106_002918_Sideline.mp4'}
]
for f in range(4):
    labels.loc[labels['video'].isin(cv_video[f]), 'fold'] = f
labels['fold'] = labels['fold'].astype(int)

In [None]:
if debug:
    labels =labels[labels.frame%10==1]
labels.shape

# find best params
Search for the values of Rotation Angle, Trapezoidal Correction, and Home/Visotor that will result in the smallest gap between the players' coordinates.

In [None]:
# based on https://www.kaggle.com/its7171/nfl-baseline-simple-helmet-mapping
def norm_arr_1dim(a):   
    a = a-a.min()
    max_a = a.max()
    if max_a == 0:
        print('max_a is 0')
    else:
        a = a/max_a
    return a, max_a

def norm_arr(a):
    a[:,0], scale0 = norm_arr_1dim(a[:,0])
    a[:,1], scale1 = norm_arr_1dim(a[:,1])
    return a, scale0, scale1

def dist(a1, a2):
    #print(a1)
    #print(a2)
    distx = np.sum(np.abs(a1[:,0]-a2[:,0]))
    disty = np.sum(np.abs(a1[:,1]-a2[:,1]))
    return distx + disty

def rotate_arr(u, t, pt, w, h, aspect_ratio=(1,1), debug=False):
    xscale, yscale = aspect_ratio[0], aspect_ratio[1]
    if xscale > yscale:
        yscale /= xscale
        xscale = 1
    else:
        xscale /= yscale
        yscale = 1
    aspect_ratio = (xscale, yscale)

    rot_center_x = w*xscale/2
    rot_center_y = w*yscale/2
    R_rot = cv2.getRotationMatrix2D((rot_center_x, rot_center_y), t, 1)
    
    if pt > 1:
        pt = 1 / pt
        src_pts = np.array([[0, 0], [0+int(w*(1-pt)/2), h], [w-int(w*(1-pt)/2), h], [w, 0]], dtype=np.float32)
        dst_pts = np.array([[0, 0], [0, h], [w, h], [w, 0]], dtype=np.float32);
    else:
        src_pts = np.array([[0, 0], [0, h], [w, h], [w, 0]], dtype=np.float32)
        dst_pts = np.array([[0, 0], [0+int(w*(1-pt)/2), h], [w-int(w*(1-pt)/2), h], [w, 0]], dtype=np.float32)
    R_pt = cv2.getPerspectiveTransform(src_pts, dst_pts)

    if debug:
        print('R_rot', R_rot, t)
        print('R_pt', R_pt, pt, w, h, [0+w*pt, 0], [w-w*pt, 0])
        print('src_pts',src_pts)
        print('dst_pts',dst_pts)
    
    u = u.T
    # Trapezoidal correction
    u = np.vstack([u,np.ones(u.shape[1])])
    u = np.dot(R_pt, u)
    u = u[:2,:]/u[2,:]
    # aspect ratio
    u = u*np.expand_dims(aspect_ratio, axis=1)
    #ã€€rotate
    u = np.vstack([u,np.ones(u.shape[1])])
    u = np.dot(R_rot, u)
    return  u.T

def dist_rot(helmets):
    a2_org = helmets[['center_x','center_y']].values.astype(float)#predicted BB
    a2_min = np.min(a2_org, axis=0)
    a2_max = np.max(a2_org, axis=0)
    a2_len = a2_max - a2_min
    a2,a2scl0,a2scl1 = norm_arr(a2_org)
    a1 = helmets[['gt_x','gt_y']].values.astype(float)
    min_dist = 1000000
    mean_x = a1[0].mean()
    if helmets['Endzone'].values[0]:
        w = 53.3
        h = 120
    else:
        w = 120
        h = 53.3
    for dig in range(-DIG_MAX,DIG_MAX+1,DIG_STEP):
        for x_pt in np.arange(1.0, 1.8, 0.1):
            a1_rot = rotate_arr(a1, dig, 1, 1280, 720).copy()
            a1_min = np.min(a1_rot, axis=0)
            a1_max = np.max(a1_rot, axis=0)
            a1_len = a1_max - a1_min
            a1_rot_rescale = (a1_rot-a1_min)*a2_len/a1_len + a2_min 
            a1_rot2 = rotate_arr(a1_rot_rescale, 0, x_pt, 1280, 720).copy()
            a1_rot,a1scl0,a1scl1 = norm_arr(a1_rot2)
            this_dist = dist(a1_rot, a2.copy())
            if min_dist > this_dist:
                min_dist = this_dist
                min_rot = dig
                min_pt = x_pt
                min_scl0 = a1scl0
                min_scl1 = a1scl1
                min_a1_rot = a1_rot.copy()
    helmets['dist0'] = min_dist
    helmets['rot0'] = min_rot
    helmets['pt0'] = min_pt
    helmets['a1scl0'] = min_scl0
    helmets['a1scl1'] = min_scl1
    helmets['x_rot0'] = min_a1_rot[:,0]
    helmets['y_rot0'] = min_a1_rot[:,1]
    helmets['x_org0'] = a1[:,0]
    helmets['y_org0'] = a1[:,1]
    return min_dist, helmets

def mapping_df(args):
    video_frame, df = args
    gameKey,playID,view,frame = video_frame.split('_')
    gameKey = int(gameKey)
    playID = int(playID)
    frame = int(frame)
    this_tracking = df
    len_this_tracking = len(this_tracking)
    df['center_x'] = (df['left']+df['width']/2).astype(int)
    df['center_y'] = (df['top']+df['height']/2).astype(int)
    df_a = df.copy()
    df_h = df.copy()
    
    if view == 'Endzone':
        # Endzone Home
        df_h['Endzone'] = True
        df_h['Home'] = True
        df_h['gt_x'] = 53.3 - df_h['y'].copy()
        df_h['gt_y'] = 120  - df_h['x'].copy()
        # Endzone Visitor
        df_a['Endzone'] = True
        df_a['Home'] = False
        df_a['gt_x'] = df_a['y'].copy()
        df_a['gt_y'] = df_a['x'].copy()
    else:
        # Sideline Home
        df_h['Endzone'] = False
        df_h['Home'] = True
        df_h['gt_x'] = df_h['x'].copy()
        df_h['gt_y'] = 53.3 - df_h['y'].copy()
        # Sideline Visitor
        df_a['Endzone'] = False
        df_a['Home'] = False
        df_a['gt_x'] = 120  - df_a['x'].copy()
        df_a['gt_y'] = df_a['y'].copy()

    min_dist_a, df_a = dist_rot(df_a)
    min_dist_h, df_h = dist_rot(df_h)
    if min_dist_a < min_dist_h:
        tgt_df = df_a
    else:
        tgt_df = df_h
    return tgt_df

In [None]:
multi_thread_type = 'pqdm'
#multi_thread_type = 'none'
#multi_thread_type = 'pool'
DIG_STEP = 1 
DIG_MAX = 80
labels = labels[labels.frame != 0]

if multi_thread_type == 'pqdm':
    df_list = list(labels.groupby('video_frame'))
    submission_df_list = pqdm(df_list, mapping_df, n_jobs=cpu_count())
elif multi_thread_type == 'pool':
    p = Pool(processes=cpu_count())
    submission_df_list = []
    df_list = list(labels.groupby('video_frame'))
    with tqdm(total=len(df_list)) as pbar:
        for this_df in p.imap(mapping_df, df_list):
            submission_df_list.append(this_df)
            pbar.update(1)
    p.close()
else:
    submission_df_list = []
    df_list = list(labels.groupby('video_frame'))
    with tqdm(total=len(df_list)) as pbar:
        for args in df_list:
            #print(args[0])
            this_df = mapping_df(args)
            submission_df_list.append(this_df)
            pbar.update(1)

labels =  pd.concat(submission_df_list)

In [None]:
labels.dist0.hist(bins=100)

In [None]:
labels.rot0.hist(bins=100)

# fix data

In [None]:
# Some players are oriented the other way, so we'll fix that.
# Adjust the angle according to (Endzone or Sideline) and (Home or Visitor)

def fix_bad_data(labels, tgt_col = 'o'):
    labels['HorV'] = labels['label'].str[0]
    labels['gamePlay_label'] = labels['video'].str.rsplit('_').str[0] + '_' + labels['label']
    labels["team_o_mean"] = labels.groupby(["video_frame","HorV"])[tgt_col].transform("mean")
    labels["diff_o_vs_team"] = np.abs(labels[tgt_col] - labels["team_o_mean"])
    labels.loc[labels["diff_o_vs_team"]>180, "diff_o_vs_team"] = np.abs(labels.loc[labels["diff_o_vs_team"]>180, "diff_o_vs_team"] - 360)
    labels["same_direction_vs_team"] = labels["diff_o_vs_team"] < 100
    bad_data = labels[(labels['frame']==1)&(labels.same_direction_vs_team==False)]['gamePlay_label'].unique()
    new_tgt_col = tgt_col + '_fixed'
    labels[new_tgt_col] = labels[tgt_col]
    labels.loc[labels["gamePlay_label"].isin(bad_data), new_tgt_col] -= 180
    labels.loc[labels[new_tgt_col] < 0, new_tgt_col] += 360
    
    labels.loc[(labels['Endzone']==True)&(labels['Home']==True), new_tgt_col] = labels.loc[(labels['Endzone']==True)&(labels['Home']==True), new_tgt_col]-180
    labels.loc[(labels['Endzone']==True)&(labels['Home']==False), new_tgt_col] = labels.loc[(labels['Endzone']==True)&(labels['Home']==False), new_tgt_col]
    labels.loc[(labels['Endzone']==False)&(labels['Home']==True), new_tgt_col] = labels.loc[(labels['Endzone']==False)&(labels['Home']==True), new_tgt_col]-90
    labels.loc[(labels['Endzone']==False)&(labels['Home']==False), new_tgt_col] = labels.loc[(labels['Endzone']==False)&(labels['Home']==False), new_tgt_col]-270
    labels.loc[labels[new_tgt_col]<-180, new_tgt_col] += 360
    labels.loc[labels[new_tgt_col]>180, new_tgt_col] -= 360

    return labels
labels = fix_bad_data(labels, tgt_col = 'o')

# calculate the gap

In [None]:
# calculate the difference between the helmet and the sensor
# how many times the helmet size is off after rotation
def calc_xy_diff(df):
    dig = df.rot0.values[0]
    x_pt = df.pt0.values[0]
    
    a2 = df[['center_x','center_y']].values.astype(float)
    a2_min = np.min(a2, axis=0)
    a2_max = np.max(a2, axis=0)
    a2_len = a2_max - a2_min

    a1 = df[['gt_x','gt_y']].values.astype(float)
    a1_rot = rotate_arr(a1, dig, 1, 1280, 720).copy()
    a1_min = np.min(a1_rot, axis=0)
    a1_max = np.max(a1_rot, axis=0)
    a1_len = a1_max - a1_min
    a1_rot_rescale = (a1_rot-a1_min)*a2_len/a1_len + a2_min 
    a1_rot2 = rotate_arr(a1_rot_rescale, 0, x_pt, 1280, 720).copy()

    a1_min = np.min(a1_rot2, axis=0)
    a1_max = np.max(a1_rot2, axis=0)
    a1_len = a1_max - a1_min
    a1_rot2_rescale = (a1_rot2-a1_min)*a2_len/a1_len + a2_min 
    
    xy_diff = a1_rot2_rescale - a2
    # normalize using mean helmet size
    xy_diff[:,0] = xy_diff[:,0] /  df['width'].mean()
    xy_diff[:,1] = xy_diff[:,1] /  df['height'].mean()
        
    return xy_diff


result = []
for video_frame, df in tqdm(labels.groupby('video_frame')):
    xy_diff = calc_xy_diff(df)
    df['xdiff'] = xy_diff[:,0]
    df['ydiff'] = xy_diff[:,1]
    result.append(df)
labels = pd.concat(result)

# normalization

In [None]:
labels['alpha'] = labels['o_fixed'] - labels['rot0']
labels.loc[labels['alpha']<-180, 'alpha'] += 360
labels.loc[labels['alpha']>180, 'alpha'] -= 360
labels['beta'] = labels['dir'] - labels['rot0']
labels.loc[labels['beta']<-180, 'beta'] += 360
labels.loc[labels['beta']>180, 'beta'] -= 360

In [None]:
norm_dict = {'xdiff': {'mean': 0, 'var': 1.0, 'max': 5.0, 'min': -5.0},
             'ydiff': {'mean': 0, 'var': 1.0, 'max': 5.0, 'min': -5.0},
             's': {'mean': 1.0, 'var': 2.0, 'max': 10.0, 'min': -1.0},
             'a': {'mean': 1.0, 'var': 2.0, 'max': 10.0, 'min': -1.0},
             'dis': {'mean': 0.1, 'var': 0.2, 'max': 1.0, 'min': -1.0},
            }
for lbl in ['xdiff', 'ydiff','s','a','dis','alpha','beta']:
    print(lbl)
    labels[lbl].hist(bins=100)
    plt.show()
    if lbl in norm_dict:
        labels.loc[labels[lbl]>norm_dict[lbl]['max'], lbl] = norm_dict[lbl]['max']
        labels.loc[labels[lbl]<norm_dict[lbl]['min'], lbl] = norm_dict[lbl]['min']
        labels[lbl] -= norm_dict[lbl]['mean']
        labels[lbl] /= norm_dict[lbl]['var']
    labels[lbl].hist(bins=100)
    plt.show()

# make coco format files

In [None]:
# based on https://www.kaggle.com/bamps53/create-coco-format-annotations-train-val

class NumpyEncoder(json.JSONEncoder):
    """ 
    https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable
    Special json encoder for numpy types
    """
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

class COCOConverter:
    """Class to convert competition csv to coco format."""
    def __init__(
        self,
        df: pd.DataFrame, 
        image_height: int = 720, 
        image_width: int = 1280, 
        type_agnostic: bool = True):
        
        self.image_height = image_height
        self.image_width = image_width
        self.type_agnostic = type_agnostic
        if self.type_agnostic:
            self.categories = [{"id": 1, "name": "Helmet"}]
        else:
            self.categories = [
                {"id": 1, "name": "impact_None",},
                {"id": 2, "name": "impact_Helmet"},
                {"id": 3, "name": "impact_Shoulder",},
                {"id": 4, "name": "impact_Body"},
                {"id": 5, "name": "impact_Ground",},
                {"id": 6, "name": "impact_Hand"},
            ]         
        self.df = self._initialize(df)

    def _get_file_name(self, row: pd.Series):
        base_name = row.video[:-4]
        file_name = f'{base_name}_frame{row.frame:04}.jpg'
        return file_name

    def _get_bbox(self, row: pd.Series):
        return [row.left, row.top, row.width, row.height]

    def _initialize(self, df: pd.DataFrame):
        # set category id
        if self.type_agnostic:
            df['impactType'] = 'Helmet'
            df['category_id'] = 1
        else:
            df['category_id'] = df['impactType'].map(
                {
                    'None': 1,
                    'Helmet': 2,
                    'Shoulder': 3,
                    'Body': 4,
                    'Ground': 5,
                    'Hand': 6
                }
            )
        # some preprocesses
        df['file_name'] = df[['video', 'frame']].progress_apply(self._get_file_name, axis=1)
        df['area'] = df['width'] * df['height']
        df['bbox'] = df[['left', 'top', 'width', 'height']].progress_apply(self._get_bbox, axis=1)
        df['iscrowd'] = 0
        return df
        

    def save(self, save_path):
        """
        Save as coco json format.
        But also has many supplemental items like gameKey or view.
        """
        df = self.df.copy()
        image_df = df[['gameKey', 'playID', 'view', 'video', 'frame', 'file_name']].drop_duplicates()
        image_df['height'] = self.image_height
        image_df['width'] = self.image_width
        
        # add image id to images. Note that it's called just "id".
        image_df['id'] = range(1, len(image_df) + 1)
    
        # add image id to annotations.
        df['image_id'] = df[['file_name']].merge(image_df[['file_name', 'id']])['id'].values
        df['id'] = range(1, len(df) + 1)

        print('start dumping...')
        coco_annotations = dict()
        coco_annotations['categories'] = self.categories
        coco_annotations['images'] = [dict(row) for _, row in image_df.iterrows()]
        coco_annotations['annotations'] = [dict(row) for _, row in df.iterrows()]
        json.dump(coco_annotations, open(save_path, 'w'), indent=4, cls=NumpyEncoder)

In [None]:
!mkdir -p ../work
df = labels[['video_frame','gameKey','playID','view','video','frame','label','left','width','top','height','impactType','isDefinitiveImpact','isSidelinePlayer','alpha','beta','xdiff','ydiff','x', 'y', 's', 'a', 'dis', 'o', 'dir', 'fold']]
PH='ph5'
train_coco = COCOConverter(df.copy(), type_agnostic=True)
train_coco.save(f'../work/coco_train_full_{PH}.json')

for fold in range(4):
    train_df = df[df['fold']!=fold].reset_index(drop=True).copy()
    valid_df = df[df['fold']==fold].reset_index(drop=True).copy()
    print('number of train annotations:', len(train_df))
    print('number of valid annotations:', len(valid_df))
    train_coco = COCOConverter(train_df, type_agnostic=True)
    train_coco.save(f'../work/coco_train_fold{fold}_{PH}.json')
    valid_coco = COCOConverter(valid_df, type_agnostic=True)
    valid_coco.save(f'../work/coco_valid_fold{fold}_{PH}.json')

# make image files

In [None]:
# https://www.kaggle.com/bamps53/create-image-dataset
def split_to_images(video_path):
    video_name = os.path.basename(video_path).split('.')[0]
    cam = cv2.VideoCapture(video_path)
    frame_count = 1 # To make it consistant with train_labels.csv
    while True:
        successed, img = cam.read()
        if not successed:
            break
        if debug == False or (frame_count % 10 == 1):
            save_name = f'{SAVE_DIR}/{video_name}_frame{frame_count:04}.jpg'
            cv2.imwrite(save_name, img)
        frame_count += 1

os.makedirs(SAVE_DIR, exist_ok=True)
video_paths = sorted(glob.glob('../input/nfl-health-and-safety-helmet-assignment/train/*'))
num_cpu = cpu_count()
pool = Pool(num_cpu)
with tqdm(total=len(video_paths)) as t:
    for _ in pool.imap_unordered(split_to_images, video_paths):
        t.update(1)
pool.close()
pool.terminate()

# prepair CenterNet

In [None]:
!mkdir -p /kaggle/centernet
%cd /kaggle/centernet

!tar xfz ../input/nfllibs/centernet.tgz
%cd src/lib/models/networks/DCNv2
!python3 setup.py build develop > /dev/null 2>&1
%cd /kaggle/centernet/src/lib/external
!make > /dev/null 2>&1
%cd /kaggle/working

# CenterNet

In [None]:
%cd /kaggle/centernet/src

n_fold = 4
if debug:
    n_fold = 1

for fold in range(n_fold):
    !python main.py ctdet --exp_id nfl_ph5_fold{fold} --batch_size 4 --lr 0.156e-4  --gpus 0 --split_train ../../work/coco_train_fold{fold}_ph5.json --split_val ../../work/coco_valid_fold{fold}_ph5.json --save_all --num_epochs 5 --val_intervals 1
# using full data, so validation can not be trusted for full model.
!python main.py ctdet --exp_id nfl_ph5_full        --batch_size 4 --lr 0.156e-4  --gpus 0 --split_train ../../work/coco_train_full_ph5.json --split_val        ../../work/coco_valid_fold0_ph5.json       --save_all --num_epochs 5 --val_intervals 1

for fold in range(n_fold):
    !python test.py ctdet --exp_id nfl_ph5_fold{fold} --keep_res --load_model ../exp/ctdet/nfl_ph5_fold{fold}/model_5.pth --flip_test --split_test ../../work/coco_valid_fold{fold}_ph5.json --K 50
!python test.py ctdet --exp_id nfl_ph5_full        --keep_res --load_model ../exp/ctdet/nfl_ph5_full/model_5.pth        --flip_test --split_test ../../work/coco_train_full_ph5.json --K 50
%cd /kaggle/working

In [None]:
# result weight file for full model
!ls -Rl /kaggle/centernet/exp/ctdet/nfl_ph5_full/model_5.pth

# visualization

In [None]:
def add_bb_to_image(image, boxes, txt=None, color=(0, 255, 0)):
    for idx, box in enumerate(boxes.astype(int)):
        #print(len(box.shape))
        if len(box) == 2:
            box = box.copy()
            box -= 5
            w = 10
            h = 10
        else:
            w = box[2]
            h = box[3]
        cv2.rectangle(image, (box[0], box[1]), (box[0]+w,  box[1]+h), color, 2)
        if txt is not None:
            cv2.putText(
                image,
                f"{txt[idx]}",
                (box[0], box[1]),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.7,
                color,
                thickness=1,
            )
        
def add_arrow_dgree(image, boxes, alpha, len_arrow = 30, color=(0, 255, 0)):
    ooffset = [np.cos(np.deg2rad(alpha)), np.sin(np.deg2rad(alpha))]
    o_point = boxes[:,:2]+len_arrow*np.array(ooffset).T
    for box, o in zip(boxes[:,:2].astype(int), o_point.astype(int)):
        cv2.arrowedLine(image, (box[0], box[1]), (o[0],  o[1]), color, 2)

print('GT label:')
video_frame = '57584_000336_Endzone_101'
video = video_frame.rsplit('_',1)[0].replace('.mp4','')
frame = int(video_frame.rsplit('_',1)[1])
img = cv2.imread(f"../train_images/{video}_frame{frame:04}.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
df = labels[labels.video_frame==video_frame].copy().reset_index(drop=True)
boxes = df[['left','top','width','height']].values
arrow_arr = df['alpha']
add_bb_to_image(img, boxes, color=(0, 255, 0))
add_arrow_dgree(img, boxes, arrow_arr, color=(255, 64, 64))
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.imshow(img)
plt.show()

print('predicted:')
img = cv2.imread(f"../train_images/{video}_frame{frame:04}.jpg")
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
helmets = pd.read_csv('/kaggle/centernet/exp/ctdet/nfl_ph5_fold0/coco_valid_fold0_ph5.json.csv')
df = helmets[helmets.video_frame==video_frame].copy().reset_index(drop=True)
df = df[df.conf>0.3]
boxes = df[['left','top','width','height']].values
arrow_arr = df['alpha']*180/np.pi
add_bb_to_image(img, boxes, color=(0, 255, 0))
add_arrow_dgree(img, boxes, arrow_arr, color=(255, 64, 64))
fig, ax = plt.subplots(1, 1, figsize=(10, 10))
ax.imshow(img)
plt.show()