# NFL save frame images for training

# import libraries

In [1]:
# general
import os
import gc
import pickle
import glob
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import cv2
import matplotlib.pyplot as plt
import time
import math

# loss metrics
from sklearn.metrics import matthews_corrcoef

# warningの表示方法の設定
import warnings
warnings.filterwarnings("ignore")

# Set Configurations

In [2]:
kaggle = False
class CFG:
    if kaggle:
        BASE_DIR = "/kaggle/input/nfl-player-contact-detection"
        OUTPUT_DIR = "/kaggle/working"
    else:
        BASE_DIR = "/workspace/input"
        OUTPUT_DIR = "/workspace/input"
    TRAIN_HELMET_CSV = os.path.join(BASE_DIR, "train_baseline_helmets.csv")
    TRAIN_TRACKING_CSV = os.path.join(BASE_DIR, "train_player_tracking.csv")
    TRAIN_VIDEO_META_CSV = os.path.join(BASE_DIR, "train_video_metadata.csv")
    TRAIN_LABEL_CSV = os.path.join(BASE_DIR, "train_labels.csv")

    # data config    
    TRACKING_COLS = ["game_play", "nfl_player_id", "step", "x_position", "y_position"]

# utils

In [3]:
def get_snap_frame(row):
    elaped_time_start2snap = row.snap_time - row.start_time
    elaped_seconds = elaped_time_start2snap.seconds
    snap_frame = elaped_seconds*59.95
    return round(snap_frame)

In [4]:
def cal_distance(row):
    return math.sqrt((row.x_position_1 - row.x_position_2)**2 + (row.y_position_1 - row.y_position_2)**2)

# Read data

In [5]:
videometa_df = pd.read_csv(CFG.TRAIN_VIDEO_META_CSV, parse_dates=["start_time", "end_time", "snap_time"])
target_df = pd.read_csv(CFG.TRAIN_LABEL_CSV, parse_dates=["datetime"])
print(len(target_df))

4721618


In [6]:
# get snap frame
videometa_df["snap_frame"] = videometa_df.apply(get_snap_frame, axis=1)
videometa_df = videometa_df.query('view=="Endzone"') # どっちでもいいけどとりあえずEndzone
videometa_df = videometa_df[["game_play", "snap_frame"]]
videometa_df["snap_frame"] = videometa_df["snap_frame"].map(int)
videometa_df["snap_frame"].value_counts()

300    237
240      3
Name: snap_frame, dtype: int64

In [7]:
# add snap frame to target_df
target_df = pd.merge(target_df, videometa_df, on="game_play", how="left")
# target_df["frame"] = target_df["snap_frame"] + target_df["step"]
target_df["frame"] = target_df["snap_frame"] + round(target_df["step"]*0.1*59.95) # edit convert
target_df["frame"] = target_df["frame"].map(int)
# set merge key "game_play"_"frame"_"player_id"
target_df["game_frame"] = target_df['game_play'].str.cat(target_df['frame'].astype(str), sep='_')
target_df["game_frame_player_1"] = target_df['game_frame'].str.cat(target_df['nfl_player_id_1'].astype(str), sep='_')
target_df["game_frame_player_2"] = target_df['game_frame'].str.cat(target_df['nfl_player_id_2'].astype(str), sep='_')

# Concat Tracking csv

In [8]:
tracking_df = pd.read_csv(CFG.TRAIN_TRACKING_CSV, parse_dates=["datetime"])

In [9]:
tracking_df = tracking_df[CFG.TRACKING_COLS]
tracking_df = pd.merge(tracking_df, videometa_df, on="game_play", how="left")
tracking_df["frame"] = tracking_df["snap_frame"] + tracking_df["step"]
tracking_df["game_frame"] = tracking_df['game_play'].str.cat(tracking_df['frame'].astype(str), sep='_')
tracking_df["game_frame_player"] = tracking_df['game_frame'].str.cat(tracking_df['nfl_player_id'].astype(str), sep='_')
tracking_df = tracking_df.drop(["game_frame", "game_play", "frame", "step", "nfl_player_id"], axis=1)

In [10]:
print(len(target_df))
for player_id in [1,2]:
    print(player_id)
    tracking_player = tracking_df.copy()
    tracking_player.rename(columns={"game_frame_player":f"game_frame_player_{player_id}"}, inplace=True)
    rename_cols = [col for col in tracking_player.columns if col != f"game_frame_player_{player_id}"]
    tracking_player = tracking_player.rename(columns={rename_col: f"{rename_col}_{player_id}" for rename_col in rename_cols})
    target_df = pd.merge(target_df, tracking_player, on=f"game_frame_player_{player_id}", how="left")
print(target_df.columns)
print(len(target_df))

4721618
1
2
Index(['contact_id', 'game_play', 'datetime', 'step', 'nfl_player_id_1',
       'nfl_player_id_2', 'contact', 'snap_frame', 'frame', 'game_frame',
       'game_frame_player_1', 'game_frame_player_2', 'x_position_1',
       'y_position_1', 'snap_frame_1', 'x_position_2', 'y_position_2',
       'snap_frame_2'],
      dtype='object')
4721618


In [11]:
target_df["players_dis"] = target_df.apply(cal_distance, axis=1)

In [12]:
# player_2が"G"のところの距離がNanになっているが0にする
target_df = target_df.fillna(0)

# EDAの結果から10で切っておけば接触の見落としはなさそう(3ぐらいでもほぼ見落としはない)
distance_thr = 3
short_distance_target = target_df.query('players_dis <= @distance_thr')
long_distance_target = target_df.query('players_dis > @distance_thr')

In [13]:
longtarget_filename = os.path.join(CFG.OUTPUT_DIR, f"long_distance_{distance_thr}_target.csv")
shorttarget_filename = os.path.join(CFG.OUTPUT_DIR, f"short_distance_{distance_thr}_target.csv")

long_distance_target.to_csv(longtarget_filename, index=False)
short_distance_target.to_csv(shorttarget_filename, index=False)
target_df = short_distance_target
del long_distance_target, short_distance_target

In [14]:
helmet_df = pd.read_csv(CFG.TRAIN_HELMET_CSV)

In [15]:
# set merge key "game_play"_"frame"_"player_id"
helmet_df["game_frame"] = helmet_df['game_play'].str.cat(helmet_df['frame'].astype(str), sep='_')
helmet_df["game_frame_player"] = helmet_df['game_frame'].str.cat(helmet_df['nfl_player_id'].astype(str), sep='_')

In [16]:
# merge target df & helmet_df
player_views = [[1, "Endzone"],[2, "Endzone"], [1, "Sideline"],[2, "Sideline"]]
print(len(target_df))
for player_id, view in player_views:
    print(player_id, view)
    helmet_view = helmet_df.query('view==@view')
    helmet_view = helmet_view[["game_frame_player", "left", "width", "top", "height"]]
    helmet_view.rename(columns={"game_frame_player":f"game_frame_player_{player_id}"}, inplace=True)
    rename_cols = helmet_view.columns[1:]
    helmet_view = helmet_view.rename(columns={rename_col: f"{view[0]}_{rename_col}_{player_id}" for rename_col in rename_cols})
    target_df = pd.merge(target_df, helmet_view, on=f"game_frame_player_{player_id}", how="left")
    print(len(target_df))

3781682
1 Endzone
3781682
2 Endzone
3781682
1 Sideline
3781682
2 Sideline
3781682


In [17]:
target_df = target_df.fillna(0)
target_fillna_filename = os.path.join(CFG.OUTPUT_DIR, "target_fillna0_{distance_thr}.csv")
target_df.to_csv(target_fillna_filename, index=False) #short distance helmet_pos追加してfillnaしたやつ