# NFL save frame images for training

# import libraries

In [1]:
# general
import os
import gc
import pickle
import glob
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import cv2
import matplotlib.pyplot as plt
import time
import math

# loss metrics
from sklearn.metrics import matthews_corrcoef

# warningの表示方法の設定
import warnings
warnings.filterwarnings("ignore")

# Set Configurations

In [2]:
kaggle = False
class CFG:
    if kaggle:
        BASE_DIR = "/kaggle/input/nfl-player-contact-detection"
        OUTPUT_DIR = "/kaggle/working"
    else:
        BASE_DIR = "/workspace/input"
        OUTPUT_DIR = "/workspace/input"
    TRAIN_HELMET_CSV = os.path.join(BASE_DIR, "train_baseline_helmets.csv")
    TRAIN_TRACKING_CSV = os.path.join(BASE_DIR, "train_player_tracking.csv")
    TRAIN_VIDEO_META_CSV = os.path.join(BASE_DIR, "train_video_metadata.csv")
    TRAIN_LABEL_CSV = os.path.join(BASE_DIR, "train_labels.csv")

    # data config    
    # TRACKING_COLS = ["game_play", "nfl_player_id", "step", "x_position", "y_position", "datetime"]
    TRACKING_COLS = ["game_play", "nfl_player_id", "step", "x_position", "y_position", "datetime",
                     "speed","distance","direction","orientation","acceleration","sa"]

# Read data

In [3]:
videometa_df = pd.read_csv(CFG.TRAIN_VIDEO_META_CSV, parse_dates=["start_time", "end_time", "snap_time"])
target_df = pd.read_csv(CFG.TRAIN_LABEL_CSV, parse_dates=["datetime"])
print(len(target_df))
# get snap frame
videometa_df = videometa_df.query('view=="Endzone"') # どっちでもいいけどとりあえずEndzone
videometa_df = videometa_df[["game_play", "start_time"]]

# add frame to target_df
target_df = pd.merge(target_df, videometa_df, on="game_play", how="left")
target_df["elapsed_time2seconds"] =  target_df["datetime"] - target_df["start_time"]
target_df["elapsed_seconds"] = target_df["elapsed_time2seconds"].dt.total_seconds()
target_df["frame"] = round(target_df["elapsed_seconds"]*59.94)
# target_df["frame"] = round(target_df["snap_frame"] + target_df["step"]*0.1*59.94)
target_df["frame"] = target_df["frame"].map(int)

# set merge key "game_play"_"frame"_"player_id"
target_df["game_frame"] = target_df['game_play'].str.cat(target_df['frame'].astype(str), sep='_')
target_df["game_frame_player_1"] = target_df['game_frame'].str.cat(target_df['nfl_player_id_1'].astype(str), sep='_')
target_df["game_frame_player_2"] = target_df['game_frame'].str.cat(target_df['nfl_player_id_2'].astype(str), sep='_')

4721618


# Concat Tracking csv

In [4]:
tracking_df = pd.read_csv(CFG.TRAIN_TRACKING_CSV, parse_dates=["datetime"])
tracking_df = tracking_df[CFG.TRACKING_COLS]
tracking_df = pd.merge(tracking_df, videometa_df, on="game_play", how="left")
tracking_df["elapsed_time2seconds"] =  tracking_df["datetime"] - tracking_df["start_time"]
tracking_df["elapsed_seconds"] = tracking_df["elapsed_time2seconds"].dt.total_seconds()
tracking_df["frame"] = round(tracking_df["elapsed_seconds"]*59.94)
tracking_df["frame"] = tracking_df["frame"].map(int)
tracking_df["game_frame"] = tracking_df['game_play'].str.cat(tracking_df['frame'].astype(str), sep='_')
tracking_df["game_frame_player"] = tracking_df['game_frame'].str.cat(tracking_df['nfl_player_id'].astype(str), sep='_')
tracking_df = tracking_df.drop(["game_frame", "game_play", "frame", "step", "nfl_player_id", 
                                "elapsed_time2seconds", "elapsed_seconds", "start_time", "datetime"], axis=1)
display(tracking_df)

Unnamed: 0,x_position,y_position,speed,distance,direction,orientation,acceleration,sa,game_frame_player
0,61.59,42.60,1.11,0.11,320.33,263.93,0.71,-0.64,58580_001136_-350_44830
1,59.48,26.81,0.23,0.01,346.84,247.16,1.29,0.90,58580_001136_-350_47800
2,72.19,31.46,0.61,0.06,11.77,247.69,0.63,-0.33,58580_001136_-350_52444
3,57.37,22.12,0.37,0.04,127.85,63.63,0.69,0.62,58580_001136_-350_46206
4,63.25,27.50,0.51,0.05,183.62,253.71,0.31,0.31,58580_001136_-350_52663
...,...,...,...,...,...,...,...,...,...
1353048,72.28,51.80,1.59,0.17,345.36,342.68,0.49,-0.41,58575_003081_1138_48476
1353049,74.76,54.50,0.98,0.10,342.39,354.46,0.63,-0.24,58575_003081_1138_44887
1353050,74.54,55.75,1.12,0.11,352.79,349.20,0.63,0.46,58575_003081_1138_44174
1353051,80.44,48.77,1.93,0.20,340.78,346.51,0.53,-0.51,58575_003081_1138_45217


In [5]:
shift_cols = ["x_position","y_position"]
shift_nums = range(-6,6,1)
for col in shift_cols:
    for num in shift_nums:
        tracking_df[f"{col}_shift{num}"] = tracking_df[col].shift(num)
tracking_df = tracking_df.fillna(0)

In [6]:
print(len(target_df))
for player_id in [1,2]:
    print(player_id)
    tracking_player = tracking_df.copy()
    tracking_player.rename(columns={"game_frame_player":f"game_frame_player_{player_id}"}, inplace=True)
    rename_cols = [col for col in tracking_player.columns if col != f"game_frame_player_{player_id}"]
    tracking_player = tracking_player.rename(columns={rename_col: f"{rename_col}_{player_id}" for rename_col in rename_cols})
    target_df = pd.merge(target_df, tracking_player, on=f"game_frame_player_{player_id}", how="left")
print(target_df.columns)
print(len(target_df))
target_df = target_df.drop(["start_time","elapsed_time2seconds", "elapsed_seconds"], axis=1)

4721618
1
2
Index(['contact_id', 'game_play', 'datetime', 'step', 'nfl_player_id_1',
       'nfl_player_id_2', 'contact', 'start_time', 'elapsed_time2seconds',
       'elapsed_seconds', 'frame', 'game_frame', 'game_frame_player_1',
       'game_frame_player_2', 'x_position_1', 'y_position_1', 'speed_1',
       'distance_1', 'direction_1', 'orientation_1', 'acceleration_1', 'sa_1',
       'x_position_shift-6_1', 'x_position_shift-5_1', 'x_position_shift-4_1',
       'x_position_shift-3_1', 'x_position_shift-2_1', 'x_position_shift-1_1',
       'x_position_shift0_1', 'x_position_shift1_1', 'x_position_shift2_1',
       'x_position_shift3_1', 'x_position_shift4_1', 'x_position_shift5_1',
       'y_position_shift-6_1', 'y_position_shift-5_1', 'y_position_shift-4_1',
       'y_position_shift-3_1', 'y_position_shift-2_1', 'y_position_shift-1_1',
       'y_position_shift0_1', 'y_position_shift1_1', 'y_position_shift2_1',
       'y_position_shift3_1', 'y_position_shift4_1', 'y_position_shift5_

In [7]:
target_df

Unnamed: 0,contact_id,game_play,datetime,step,nfl_player_id_1,nfl_player_id_2,contact,frame,game_frame,game_frame_player_1,...,y_position_shift-4_2,y_position_shift-3_2,y_position_shift-2_2,y_position_shift-1_2,y_position_shift0_2,y_position_shift1_2,y_position_shift2_2,y_position_shift3_2,y_position_shift4_2,y_position_shift5_2
0,58168_003392_0_38590_43854,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,43854,0,298,58168_003392_298,58168_003392_298_38590,...,15.59,20.08,23.63,38.73,16.79,22.78,38.86,18.62,18.08,28.89
1,58168_003392_0_38590_41257,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,41257,0,298,58168_003392_298,58168_003392_298_38590,...,28.50,26.55,17.02,23.63,15.59,20.08,23.63,38.73,16.79,22.78
2,58168_003392_0_38590_41944,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,41944,0,298,58168_003392_298,58168_003392_298_38590,...,23.43,19.88,23.89,30.61,22.85,28.06,28.92,15.66,23.69,22.90
3,58168_003392_0_38590_42386,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,42386,0,298,58168_003392_298,58168_003392_298_38590,...,26.58,26.73,23.43,19.88,23.89,30.61,22.85,28.06,28.92,15.66
4,58168_003392_0_38590_47944,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,47944,0,298,58168_003392_298,58168_003392_298_38590,...,25.28,21.77,28.29,17.07,26.58,26.73,23.43,19.88,23.89,30.61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4721613,58582_003121_91_48220_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,48220,G,0,840,58582_003121_840,58582_003121_840_48220,...,,,,,,,,,,
4721614,58582_003121_91_47906_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,47906,G,0,840,58582_003121_840,58582_003121_840_47906,...,,,,,,,,,,
4721615,58582_003121_91_38557_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,38557,G,0,840,58582_003121_840,58582_003121_840_38557,...,,,,,,,,,,
4721616,58582_003121_91_47872_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,47872,G,0,840,58582_003121_840,58582_003121_840_47872,...,,,,,,,,,,


In [9]:
# calculate players distance
target_df["players_dis"] = np.sqrt((target_df["x_position_1"] - target_df["x_position_2"])**2 
                                   + (target_df["y_position_1"] - target_df["y_position_2"])**2)

# player_2が"G"のところの距離がNanになっているが0にする
target_df = target_df.fillna(0)

# EDAの結果から10で切っておけば接触の見落としはなさそう(3ぐらいでもほぼ見落としはない)
distance_thr = 2
short_distance_target = target_df.query('players_dis <= @distance_thr')
long_distance_target = target_df.query('players_dis > @distance_thr')

In [10]:
longtarget_filename = os.path.join(CFG.OUTPUT_DIR, f"long_distance_{distance_thr}_target.csv")
shorttarget_filename = os.path.join(CFG.OUTPUT_DIR, f"short_distance_{distance_thr}_target.csv")

long_distance_target.to_csv(longtarget_filename, index=False)
short_distance_target.to_csv(shorttarget_filename, index=False)
target_df = short_distance_target
del long_distance_target, short_distance_target

In [11]:
helmet_df = pd.read_csv(CFG.TRAIN_HELMET_CSV)

In [12]:
# set merge key "game_play"_"frame"_"player_id"
helmet_df["game_frame"] = helmet_df['game_play'].str.cat(helmet_df['frame'].astype(str), sep='_')
helmet_df["game_frame_player"] = helmet_df['game_frame'].str.cat(helmet_df['nfl_player_id'].astype(str), sep='_')

In [13]:
# merge target df & helmet_df
player_views = [[1, "Endzone"],[2, "Endzone"], [1, "Sideline"],[2, "Sideline"]]
print(len(target_df))
for player_id, view in player_views:
    print(player_id, view)
    helmet_view = helmet_df.query('view==@view')
    helmet_view = helmet_view[["game_frame_player", "left", "width", "top", "height"]]
    helmet_view.rename(columns={"game_frame_player":f"game_frame_player_{player_id}"}, inplace=True)
    rename_cols = helmet_view.columns[1:]
    helmet_view = helmet_view.rename(columns={rename_col: f"{view[0]}_{rename_col}_{player_id}" for rename_col in rename_cols})
    target_df = pd.merge(target_df, helmet_view, on=f"game_frame_player_{player_id}", how="left")
    print(len(target_df))

660553
1 Endzone
660553
2 Endzone
660553
1 Sideline
660553
2 Sideline
660553


In [14]:
target_df = target_df.fillna(0)
target_fillna_filename = os.path.join(CFG.OUTPUT_DIR, f"target_fillna0_shift_{distance_thr}.csv")
target_df.to_csv(target_fillna_filename, index=False) #short distance helmet_pos追加してfillnaしたやつ