# NFL Baseline
- create target_df (distance in tracking_df is lower than threshold=3)
https://www.kaggle.com/code/stgkrtua/nfl-creatatraindataset-targetdf
- create dataset save frames in target_df
https://www.kaggle.com/code/stgkrtua/nfl-createdataset-saveframes
- check saved images
https://www.kaggle.com/code/stgkrtua/nfl-checkdataset-plotsavedimage

# import libraries

In [None]:
# general
import os
import gc
import pickle
import glob
import random
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import cv2
import matplotlib.pyplot as plt
import time
import math

import sys
sys.path.append('/kaggle/input/timm-pytorch-image-models/pytorch-image-models-master')
import timm


# deep learning
from torch.utils.data import Dataset, DataLoader
from torch.optim import SGD, Adam, AdamW
from torch.optim.lr_scheduler import CosineAnnealingLR, CosineAnnealingWarmRestarts, ReduceLROnPlateau
import torch
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import albumentations as A
from albumentations.pytorch import ToTensorV2

from sklearn.model_selection import GroupKFold

# loss metrics
from sklearn.metrics import matthews_corrcoef, confusion_matrix, roc_auc_score

import cudf

import mlflow
# import wandb
# warningの表示方法の設定
import warnings
warnings.filterwarnings("ignore")

# Set Configurations

In [None]:
CFG = {
        "kaggle" : False,
        "DEBUG" : False,
        # model config
        "model_name" : "swin_s3_tiny_224",
        "out_features" : 20,
        "inp_channels": 3*2,
        "num_img_feature" : 5,
        "pretrained" : True,
        
        "roll_sum_window_size" : 10,
        "features" : ['x_position_1', 'y_position_1', 'x_position_2', 'y_position_2', 
                      'speed_1', 'distance_1', 'direction_1', 'orientation_1','acceleration_1', 'sa_1', 
                      'speed_2', 'distance_2', 'direction_2', 'orientation_2', 'acceleration_2', 'sa_2',
                      'speed_diff', 'distance_diff', 'direction_diff', 'orientation_diff','acceleration_diff', 'sa_diff', # diff-feature
                      'players_dis', 'is_ground'],
        "track_features_x_1" : ['x_position_shift-6_1','x_position_shift-5_1', 'x_position_shift-4_1',
                                'x_position_shift-3_1','x_position_shift-2_1', 'x_position_shift-1_1', 
                                'x_position_shift0_1','x_position_shift1_1', 'x_position_shift2_1', 
                                'x_position_shift3_1','x_position_shift4_1', 'x_position_shift5_1'],    
        "track_features_y_1" : ['y_position_shift-6_1','y_position_shift-5_1', 'y_position_shift-4_1',
                                'y_position_shift-3_1','y_position_shift-2_1', 'y_position_shift-1_1',
                                'y_position_shift0_1','y_position_shift1_1', 'y_position_shift2_1',
                                'y_position_shift3_1','y_position_shift4_1', 'y_position_shift5_1'],
        "track_features_x_2" : ['x_position_shift-6_2','x_position_shift-5_2', 'x_position_shift-4_2',
                                'x_position_shift-3_2','x_position_shift-2_2', 'x_position_shift-1_2',
                                'x_position_shift0_2','x_position_shift1_2', 'x_position_shift2_2',
                                'x_position_shift3_2','x_position_shift4_2', 'x_position_shift5_2'],
        "track_features_y_2" : ['y_position_shift-6_2','y_position_shift-5_2', 'y_position_shift-4_2',
                                'y_position_shift-3_2','y_position_shift-2_2', 'y_position_shift-1_2',
                                'y_position_shift0_2','y_position_shift1_2', 'y_position_shift2_2',
                                'y_position_shift3_2','y_position_shift4_2', 'y_position_shift5_2'],
        # learning config
        "n_epoch" : 5,
        "n_folds": 3,
        "train_folds" : [0,1,2],
        "lr" : 1e-4,
        "T_max" : 10,
        "min_lr" : 1e-8,
        "weight_decay" : 1e-6,

        # etc
        "print_freq" : 1000,
        "random_seed" : 21,

        # data config    
        "img_size" : (224, 224),
        "batch_size" : 32,
        "num_workers" : 2,
        "masksize_helmet_ratio" : 4, # helmetサイズにこの係数をかけたサイズだけ色を残して後は黒塗りする
        "TRAIN_VIDEO_NUM" : 100,
        "VALID_VIDEO_NUM" : 10,
        "sample_num" : -1, 

        "EXP_CATEGORY" : "exps_cudf",
        "EXP_NAME" : "expC001_swins3tiny",
}

if CFG["DEBUG"]:
    CFG["EXP_CATEGORY"] = "DEBUG"
    CFG["EXP_NAME"] = "DEBUG"
    CFG["n_epoch"] = 2
    CFG["sample_num"] = 1000
    CFG["batch_size"] = 32
    CFG["train_folds"] : [0,1]


CFG["INPUT_DIR"] = "/workspace/input"
CFG["OUTPUT_DIR"] = "/workspace/output"
CFG["TRAIN_HELMET_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_baseline_helmets.csv")
CFG["TRAIN_TRACKING_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_player_tracking.csv")
CFG["TRAIN_VIDEO_META_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_video_metadata.csv")
CFG["TRAIN_LABEL_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_labels.csv")
CFG["SAVED_CONTACT_CSV"] = os.path.join(CFG["INPUT_DIR"], "Saved_contact_frames.csv")
CFG["CONTACT_IMG_DIR"] = os.path.join(CFG["INPUT_DIR"], "contact_images")
CFG["MODEL_DIR"] = os.path.join(CFG["OUTPUT_DIR"], CFG["EXP_NAME"] ,"model")
    
# if not CFG["kaggle"] and not CFG["DEBUG"]:
#     os.mkdir(os.path.join(CFG["OUTPUT_DIR"], CFG["EXP_NAME"]))
#     os.mkdir(CFG["MODEL_DIR"])


# Dataset Functions

In [None]:
def add_feature_cols(df_, FEATURE_COLS,remove_col_list):
    additional_cols = list(df_.columns)
    additional_cols = [col for col in additional_cols if not col in remove_col_list]
    FEATURE_COLS.extend(additional_cols)
    return FEATURE_COLS

## target df func

In [None]:
def create_trackmerged_ftr(target_df, FEATURE_COLS,
                           diff_cols = ['x_position', 'y_position', 'speed', 'distance',
                                        'direction', 'orientation', 'acceleration', 'sa']):
    # players distance features
    target_df["players_dis"] = np.sqrt((target_df["x_position_1"] - target_df["x_position_2"])**2 
                                       + (target_df["y_position_1"] - target_df["y_position_2"])**2)
    # GがNanになる。これは0にする
    target_df["players_dis"] = target_df["players_dis"].fillna(0)
    FEATURE_COLS.append("players_dis")
    
    # players distance sum(in shift range time : default(-6~6 frames not step))
    players_distance_sum = 0
    for idx in range(-6,6,1):
        players_distance_sum += np.sqrt((target_df[f"x_position_shift{idx}_1"] - target_df[f"x_position_shift{idx}_2"])**2 
                                       + (target_df[f"y_position_shift{idx}_1"] - target_df[f"y_position_shift{idx}_2"])**2)
    target_df["players_distance_sum"] = players_distance_sum
    # GがNanになる。これは0にする
    target_df["players_distance_sum"] = target_df["players_distance_sum"].fillna(0)
    FEATURE_COLS.append("players_distance_sum")

    # players each axis distance sum(in shift range time : default(-6~6 frames not step))
    for axis in ["x", "y"]:
        axis_distance_1 = 0
        axis_distance_2 = 0
        for idx in range(-6, 5, 1):
            axis_distance_1 += abs(target_df[f"{axis}_position_shift{idx}_1"] - target_df[f"{axis}_position_shift{idx+1}_1"])
            axis_distance_2 += abs(target_df[f"{axis}_position_shift{idx}_2"] - target_df[f"{axis}_position_shift{idx+1}_2"])
        target_df[f"{axis}_move_distance_1"] = axis_distance_1
        target_df[f"{axis}_move_distance_2"] = axis_distance_2
        FEATURE_COLS.extend([f"{axis}_move_distance_1", f"{axis}_move_distance_2"])
    
    # players difference ftr (in each step)
    for col in diff_cols:
        colname = f"{col}_diff"
        target_df[colname] = abs(target_df[f"{col}_1"] - target_df[f"{col}_2"])
        FEATURE_COLS.append(colname)
    
    return target_df, FEATURE_COLS

In [None]:
def create_roll_ftr(target_df, FEATURE_COLS,
                    roll_cols = ['players_dis', 'x_position_diff', 'y_position_diff', 
                                  'speed_diff', 'distance_diff','direction_diff',
                                  'orientation_diff', 'acceleration_diff', 'sa_diff']):
    print("original length", len(target_df))
    key_cols = ["game_play", "nfl_player_id_1", "nfl_player_id_2"]
    roll_df = target_df[roll_cols+key_cols].copy()
    roll_df["key"] = roll_df["game_play"] + "_" + roll_df["nfl_player_id_1"].astype("str") + "_" + roll_df["nfl_player_id_2"].astype("str")

    group_roll_df = roll_df.groupby("key").rolling(CFG["roll_sum_window_size"])[roll_cols].sum().fillna(-999).sort_index()
    for col in roll_cols:
        group_roll_df = group_roll_df.rename(columns={col:col+"_rollsum"})
        FEATURE_COLS.append(col+"_rollsum")

    if not torch.cuda.is_available():
        target_df = pd.concat([target_df, group_roll_df], axis=1).sort_index()
    else:
        target_df = cudf.concat([target_df, group_roll_df], axis=1).sort_index()
    print("after length", len(target_df))
    return target_df, FEATURE_COLS

In [None]:
def create_helmetmerged_ftr(target_df, FEATURE_COLS):
    # helmet center distance feature
    for view in ["Endzone", "Sideline"]:
        for p_id in ["1", "2"]: 
            # get helmet center
            col_name = f"{view[0]}_Wcenter_{p_id}"
            pos_col, size_col =  f"{view[0]}_left_{p_id}", f"{view[0]}_width_{p_id}"
            target_df[col_name] = target_df[pos_col] + (target_df[size_col]//2)
            col_name = f"{view[0]}_Hcenter_{p_id}"
            pos_col, size_col =  f"{view[0]}_top_{p_id}", f"{view[0]}_height_{p_id}"
            target_df[col_name] = target_df[pos_col] + (target_df[size_col]//2)
        
        # helmet center distance
        target_df[f"{view[0]}_helmet_dis"] = np.sqrt((target_df[f"{view[0]}_Wcenter_1"] - target_df[f"{view[0]}_Wcenter_2"])**2 
                                             + (target_df[f"{view[0]}_Hcenter_1"] - target_df[f"{view[0]}_Hcenter_2"])**2)
        # GがNanになるので0にしておく
        target_df[f"{view[0]}_helmet_dis"] = target_df[f"{view[0]}_helmet_dis"].fillna(0)
        FEATURE_COLS.append(f"{view[0]}_helmet_dis")
    
    # helmet cols fillna(0) after get helmet distance 
    helmet_cols = ['E_left_1', 'E_width_1', 'E_top_1', 'E_height_1',
                   'E_left_2', 'E_width_2', 'E_top_2', 'E_height_2', 
                   'S_left_1','S_width_1', 'S_top_1', 'S_height_1', 
                   'S_left_2', 'S_width_2', 'S_top_2', 'S_height_2']
    target_df[helmet_cols] = target_df[helmet_cols].fillna(0)

    return target_df, FEATURE_COLS

In [None]:
def get_categorical_ftr(target_df, FEATURE_COLS):
    target_df["is_ground"] = (target_df["nfl_player_id_2"] == "G").astype(np.int64)
    target_df["players_dis"].mask((target_df["is_ground"]==1), 0, inplace=True)
    
    target_df["nfl_player_id_2"] = target_df["nfl_player_id_2"].replace("G", "99999").astype(np.int64) # when inference this is after cnn pred
    target_df["is_helmet"] = 1 - ((target_df["E_width_1"]==0) & (target_df["E_width_2"]==0)
                                  & (target_df["S_width_1"]==0) & (target_df["S_width_2"]==0)).astype(np.int64)
    
    # set team 
    target_df["team_1"] = (target_df["team_1"] == "home").astype(np.int64)
    target_df["team_2"] = (target_df["team_2"] == "home").astype(np.int64)
    
    FEATURE_COLS.extend(["is_ground", "is_helmet"])
    return target_df, FEATURE_COLS

## tracking df func

In [None]:
def get_tracking_shift(tracking_df, shift_cols=["x_position","y_position"], shift_nums=range(-6,6,1)):
    # get shift key
    tracking_df["game_player"] = tracking_df["game_play"].str.cat(tracking_df['nfl_player_id'].astype(str), sep='_')
    tracking_df["frame_add"] = (tracking_df['step']/10*59.94+5*59.94).astype('int')+5000 #全部0以上の方が並べやすい
    tracking_df["frame_key"] = tracking_df["frame_add"].astype(str).str.zfill(5)
    tracking_df["shift_key"] = tracking_df["game_player"].str.cat(tracking_df["frame_key"].astype(str), sep='_')
    tracking_df = tracking_df.sort_values("shift_key").reset_index(drop=True)
#     display(tracking_df[["shift_key", "game_player", "step", "x_position","y_position"]])
    SHIFT_COLS = []
    for col in shift_cols:
        for num in shift_nums:
            colname = f"{col}_shift{num}"
            tracking_df[colname] = tracking_df[col].shift(num)
            SHIFT_COLS.append(colname)
    tracking_df = tracking_df.drop(["game_player", "frame_add","frame_key", "shift_key"], axis=1)
    return tracking_df, SHIFT_COLS

In [None]:
def target_merge_tracking(target_df, tracking_df, FEATURE_COLS, SHIFT_COLS,
                          TRACKING_COLS = ["game_play", "nfl_player_id", "step", 
                                           "x_position", "y_position", "datetime",
                                           "speed","distance","direction","orientation",
                                           "acceleration","sa", "team", "jersey_number"] ):
    print("original length", len(target_df))
    # set merge-key (game_step_player_1, 2) to merge tracking_df
    target_df["game_step"] = target_df['game_play'].str.cat(target_df['step'].astype(str), sep='_')
    target_df["game_step_player_1"] = target_df['game_step'].str.cat(target_df['nfl_player_id_1'].astype(str), sep='_')
    target_df["game_step_player_2"] = target_df['game_step'].str.cat(target_df['nfl_player_id_2'].astype(str), sep='_')

    # merge-key
    TRACKING_COLS.extend(SHIFT_COLS)
    tracking_df = tracking_df[TRACKING_COLS]
    tracking_df["frame"] = (tracking_df['step']/10*59.94+5*59.94).astype('int')+1
    tracking_df["game_step"] = tracking_df['game_play'].str.cat(tracking_df['step'].astype(str), sep='_')
    tracking_df["game_step_player"] = tracking_df['game_step'].str.cat(tracking_df['nfl_player_id'].astype(str), sep='_')
    tracking_df = tracking_df.drop(["game_step", "game_play", "step", "nfl_player_id"], axis=1)
    
    for player_id in [1,2]:
        tracking_player = tracking_df.copy()
        tracking_player.rename(columns={"game_step_player":f"game_step_player_{player_id}"}, inplace=True)
        rename_cols = [col for col in tracking_player.columns if col != f"game_step_player_{player_id}"]
        tracking_player = tracking_player.rename(columns={rename_col: f"{rename_col}_{player_id}" for rename_col in rename_cols})
        target_df = target_df.merge(tracking_player, on=[f"game_step_player_{player_id}"], how="left")
        # add features col
        FEATURE_COLS = add_feature_cols(tracking_player, FEATURE_COLS,
                                        [f"game_step_player_{player_id}", f"frame_{player_id}", f"datetime_{player_id}"])
    target_df["frame"] = target_df["frame_1"]
    FEATURE_COLS.append("frame")
    
    target_df = target_df.drop(["frame_1", "frame_2", "game_step_player_1", "game_step_player_2",
                                "datetime_1", "datetime_2"], axis=1)
#     print(target_df.columns)
    print(len(target_df.columns))
    print("original length", len(target_df))
    return target_df, FEATURE_COLS

## helmet df func

In [None]:
def target_merge_helmet(target_df, helmet_df, FEATURE_COLS):
    print("original length", len(target_df))
    # set merge-key (game_frame_player_1,2) to merge helmet_df
    target_df["game_frame"] = target_df['game_play'].str.cat(target_df['frame'].astype(str), sep='_')
    target_df["game_frame_player_1"] = target_df['game_frame'].str.cat(target_df['nfl_player_id_1'], sep='_')
    target_df["game_frame_player_2"] = target_df['game_frame'].str.cat(target_df['nfl_player_id_2'], sep='_')
    # set merge key
    helmet_df["game_frame"] = helmet_df['game_play'].str.cat(helmet_df['frame'].astype(str), sep='_')
    helmet_df["game_frame_player"] = helmet_df['game_frame'].str.cat(helmet_df['nfl_player_id'].astype(str), sep='_')
    
    # merge target df & helmet_df
    player_views = [[1, "Endzone"],[2, "Endzone"], [1, "Sideline"],[2, "Sideline"]]
    for player_id, view in player_views:
        helmet_view = helmet_df[helmet_df["view"]==view]
        helmet_view = helmet_view[["game_frame_player", "left", "width", "top", "height"]]
        helmet_view.rename(columns={"game_frame_player":f"game_frame_player_{player_id}"}, inplace=True)
        rename_cols = helmet_view.columns[1:]
        helmet_view = helmet_view.rename(columns={rename_col: f"{view[0]}_{rename_col}_{player_id}" for rename_col in rename_cols})
        if not torch.cuda.is_available():
            target_df = pd.merge(target_df, helmet_view, on=f"game_frame_player_{player_id}", how="left")
        else:
            target_df = target_df.merge(helmet_view, on=[f"game_frame_player_{player_id}"], how="left")  
        # add features col
        FEATURE_COLS = add_feature_cols(helmet_view, FEATURE_COLS, [f"game_frame_player_{player_id}"])

    print(len(target_df.columns))
    print("original length", len(target_df))
    return target_df, FEATURE_COLS

---

# Load Target

In [11]:
if not torch.cuda.is_available():
    target_df = pd.read_csv(CFG["TRAIN_LABEL_CSV"], parse_dates=["datetime"])    
else:
    target_df = cudf.read_csv(CFG["TRAIN_LABEL_CSV"], parse_dates=["datetime"])
FEATURE_COLS = ["nfl_player_id_1", "nfl_player_id_2", "step"]
display(target_df)

Unnamed: 0,contact_id,game_play,datetime,step,nfl_player_id_1,nfl_player_id_2,contact
0,58168_003392_0_38590_43854,58168_003392,2020-09-11 03:01:48.100,0,38590,43854,0
1,58168_003392_0_38590_41257,58168_003392,2020-09-11 03:01:48.100,0,38590,41257,0
2,58168_003392_0_38590_41944,58168_003392,2020-09-11 03:01:48.100,0,38590,41944,0
3,58168_003392_0_38590_42386,58168_003392,2020-09-11 03:01:48.100,0,38590,42386,0
4,58168_003392_0_38590_47944,58168_003392,2020-09-11 03:01:48.100,0,38590,47944,0
...,...,...,...,...,...,...,...
4721613,58582_003121_91_48220_G,58582_003121,2021-10-12 02:42:29.100,91,48220,G,0
4721614,58582_003121_91_47906_G,58582_003121,2021-10-12 02:42:29.100,91,47906,G,0
4721615,58582_003121_91_38557_G,58582_003121,2021-10-12 02:42:29.100,91,38557,G,0
4721616,58582_003121_91_47872_G,58582_003121,2021-10-12 02:42:29.100,91,47872,G,0


# Merge tracking_df

In [12]:
if not torch.cuda.is_available():
    tracking_df = pd.read_csv(CFG["TRAIN_TRACKING_CSV"], parse_dates=["datetime"])
else:
    tracking_df = cudf.read_csv(CFG["TRAIN_TRACKING_CSV"], parse_dates=["datetime"])

In [13]:
tracking_df, SHIFT_COLS = get_tracking_shift(tracking_df)
target_df, FEATURE_COLS = target_merge_tracking(target_df, tracking_df, FEATURE_COLS, SHIFT_COLS)
del tracking_df

original length 4721618
77
original length 4721618


In [14]:
arget_df, FEATURE_COLS = create_trackmerged_ftr(target_df, FEATURE_COLS)
target_df, FEATURE_COLS = create_roll_ftr(target_df, FEATURE_COLS)

original length 4721618
after length 4721618


In [15]:
target_df = target_df[target_df["players_dis"] <= 2].reset_index(drop=True)
print(len(target_df))

660560


# Merge helmet df

In [16]:
if not torch.cuda.is_available():
    helmet_df = pd.read_csv(CFG["TRAIN_HELMET_CSV"])
else:
    helmet_df = cudf.read_csv(CFG["TRAIN_HELMET_CSV"])

target_df, FEATURE_COLS = target_merge_helmet(target_df, helmet_df, FEATURE_COLS)
del helmet_df

original length 660560
119
original length 660560


In [17]:
target_df, FEATURE_COLS = create_helmetmerged_ftr(target_df, FEATURE_COLS)
target_df, FEATURE_COLS = get_categorical_ftr(target_df, FEATURE_COLS)
print(len(target_df))

660560


# Reduce Data

In [18]:
target_df["is_E_helmet"] = 1 -  ((target_df["E_width_1"]==0) & (target_df["E_width_2"]==0)).astype(int)
target_df["is_S_helmet"] = 1 -  ((target_df["S_width_1"]==0) & (target_df["S_width_2"]==0)).astype(int)
target_df["both_helmet"] = (target_df["is_E_helmet"]==1) & (target_df["is_S_helmet"]==1).astype(int)
target_df = target_df[target_df["both_helmet"]==1].reset_index(drop=True)
# target_df.to_csv("target_cudf.csv", index=False)
target_df = target_df.fillna(0)
target_df = target_df.to_pandas()

Saved_contact_frames = pd.read_csv(CFG["SAVED_CONTACT_CSV"])
saved_contact_ids = list(Saved_contact_frames["contact_id"].values)
target_df = target_df[target_df["contact_id"].isin(saved_contact_ids)]

if CFG["DEBUG"]:
    target_df = target_df.sample(CFG["sample_num"]).reset_index(drop=True)
print(len(target_df))
print(len(target_df["game_play"].unique()))

457965
240


In [19]:
df_filename = os.path.join(CFG["OUTPUT_DIR"], CFG["EXP_NAME"] ,"input_cudf_expc001.csv")
target_df.to_csv(df_filename, index=False)