# NFL Baseline
- create target_df (distance in tracking_df is lower than threshold=3)
https://www.kaggle.com/code/stgkrtua/nfl-creatatraindataset-targetdf
- create dataset save frames in target_df
https://www.kaggle.com/code/stgkrtua/nfl-createdataset-saveframes
- check saved images
https://www.kaggle.com/code/stgkrtua/nfl-checkdataset-plotsavedimage

# import libraries

In [1]:
# general
import os
import gc
import pickle
import glob
import random
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import time
import math
from tqdm.notebook import tqdm

import sys
# warningの表示方法の設定
import warnings
warnings.filterwarnings("ignore")

# Set Configurations

In [2]:
CFG = {
    "DEBUG" : False,
    "img_size" : (224, 224),
    "masksize_helmet_ratio" : 4,
    "sample_num" : -1,
}

if CFG["DEBUG"]:
    CFG["sample_num"] = 10

CFG["INPUT_DIR"] = "/workspace/input"
CFG["OUTPUT_DIR"] = "/workspace/output"
CFG["TRAIN_HELMET_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_baseline_helmets.csv")
CFG["TRAIN_TRACKING_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_player_tracking.csv")
CFG["TRAIN_VIDEO_META_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_video_metadata.csv")
CFG["TRAIN_LABEL_CSV"] = os.path.join(CFG["INPUT_DIR"], "train_labels.csv")
CFG["TARGET_CSV"] = os.path.join(CFG["INPUT_DIR"], "target_fillna0_shift_2.csv")
CFG["TRAIN_E_IMG_DIR"] = os.path.join(CFG["INPUT_DIR"], "train_frames")
CFG["TRAIN_S_IMG_DIR"] = CFG["TRAIN_E_IMG_DIR"]
CFG["CONTACT_IMG_DIR"] = os.path.join(CFG["INPUT_DIR"], "contact_images_nomask")

## Dataset Utils

In [3]:
def set_inimg_window(crop_pos, mask_size, img_size=(720, 1280)):#crop_pos = [left, top, right, bot]
    if mask_size[1] >= img_size[0]:
        top, bot = 0, img_size[1]
    else:
        top=(crop_pos[1] + crop_pos[3])//2 - mask_size[1]//2
        bot=(crop_pos[1] + crop_pos[3])//2 + mask_size[1]//2
        if top < 0:
            bot = bot - top
            top = 0
        elif bot > img_size[0]:
            top = top - (bot-img_size[0])
            bot = img_size[0]

    if mask_size[0] >= img_size[1]:
        left, right = 0, img_size[1]
    else:
        left = (crop_pos[0] + crop_pos[2])//2 - mask_size[0]//2
        right = (crop_pos[0] + crop_pos[2])//2 + mask_size[0]//2
        if left < 0:
            right = right - left
            left = 0
        elif right > img_size[1]:
            left = left - (right - img_size[1])
            right = img_size[1]
    crop_area = np.array([left, top, right, bot]).astype(np.int)
    return crop_area

In [4]:
def get_crop_area(p1_helmet, p2_helmet, input_size=(720, 1280)):#helmet[left, width, top, height]
    if (p1_helmet[1]==0 and p1_helmet[3]==0) and (p2_helmet[1]==0 and p2_helmet[3]==0):
        crop_area = [0, 0, input_size[1], input_size[0]]
        # print("bose player's helmet is not detected.")
        return crop_area
    elif (p2_helmet[1]==0 and p2_helmet[3]==0) and (p1_helmet[1] != 0 and p1_helmet[3]!=0):
        # print("p1 detected.")
        crop_x_center, crop_y_center = p1_helmet[0] + (p1_helmet[1])//2, p1_helmet[2] + (p1_helmet[3])//2
        helmet_base_size = (p1_helmet[1] + p1_helmet[3])*0.5*CFG["masksize_helmet_ratio"]*4
        output_size = [helmet_base_size, helmet_base_size]
    elif (p1_helmet[1]==0 and p1_helmet[3]==0) and (p2_helmet[1]!=0 and p2_helmet[3]!=0):
        # print("p2 detected.")
        crop_x_center, crop_y_center = p2_helmet[0] + (p2_helmet[1])//2, p2_helmet[2] + (p2_helmet[3])//2
        helmet_base_size = (p2_helmet[1] + p2_helmet[3])*0.5*CFG["masksize_helmet_ratio"]*4
        output_size = [helmet_base_size, helmet_base_size]
    else:
    #     print("p1 and p2 detected.")
        p1_x_center, p1_y_center = p1_helmet[0] + (p1_helmet[1])//2, p1_helmet[2] + (p1_helmet[3])//2
        p2_x_center, p2_y_center = p2_helmet[0] + (p2_helmet[1])//2, p2_helmet[2] + (p2_helmet[3])//2
        crop_x_center, crop_y_center = (p1_x_center + p2_x_center)//2, (p1_y_center + p2_y_center)//2
        helmet_base_size = (abs(p1_x_center - p2_x_center) + abs(p1_y_center - p2_y_center))*0.5 \
                            + ((p1_helmet[1] + p2_helmet[1])*0.5 + (p1_helmet[3] + p2_helmet[3])*0.5)*0.5*CFG["masksize_helmet_ratio"]*2
        output_size = [helmet_base_size, helmet_base_size]
    
    # print("crop center", crop_x_center, crop_y_center)
    crop_left = crop_x_center - output_size[1]//2
    crop_top = crop_y_center - output_size[0]//2
    crop_right = crop_x_center + output_size[1]//2
    crop_bot = crop_y_center + output_size[0]//2
    crop_area = [crop_left, crop_top, crop_right, crop_bot]
    crop_area = set_inimg_window(crop_area, output_size)
    return crop_area

# Load Data

In [5]:
target_df = pd.read_csv(CFG["TARGET_CSV"])
if CFG["DEBUG"]:
    target_df = target_df.sample(CFG["sample_num"]).reset_index(drop=True)
    CFG["CONTACT_IMG_DIR"] = os.path.join(CFG["INPUT_DIR"], "contact_images_DEBUG")

print(len(target_df))
display(target_df["contact"].value_counts())

660553


0    596241
1     64312
Name: contact, dtype: int64

In [6]:
target_df["frame"] =  (target_df['step']/10*59.94+5*59.94).astype('int')+1
display(target_df)

Unnamed: 0,contact_id,game_play,datetime,step,nfl_player_id_1,nfl_player_id_2,contact,frame,game_frame,game_frame_player_1,...,E_top_2,E_height_2,S_left_1,S_width_1,S_top_1,S_height_1,S_left_2,S_width_2,S_top_2,S_height_2
0,58168_003392_0_38590_44822,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,44822,0,300,58168_003392_298,58168_003392_298_38590,...,254.0,33.0,468.0,13.0,372.0,18.0,427.0,14.0,384.0,19.0
1,58168_003392_0_38590_39947,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,39947,0,300,58168_003392_298,58168_003392_298_38590,...,281.0,34.0,468.0,13.0,372.0,18.0,473.0,15.0,342.0,18.0
2,58168_003392_0_38590_42565,58168_003392,2020-09-11 03:01:48.100000+00:00,0,38590,42565,0,300,58168_003392_298,58168_003392_298_38590,...,313.0,19.0,468.0,13.0,372.0,18.0,478.0,15.0,400.0,18.0
3,58168_003392_0_41944_42565,58168_003392,2020-09-11 03:01:48.100000+00:00,0,41944,42565,0,300,58168_003392_298,58168_003392_298_41944,...,313.0,19.0,511.0,13.0,415.0,15.0,478.0,15.0,400.0,18.0
4,58168_003392_0_37211_46445,58168_003392,2020-09-11 03:01:48.100000+00:00,0,37211,46445,0,300,58168_003392_298,58168_003392_298_37211,...,291.0,33.0,374.0,15.0,512.0,17.0,421.0,15.0,499.0,18.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
660548,58582_003121_91_48220_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,48220,G,0,846,58582_003121_840,58582_003121_840_48220,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
660549,58582_003121_91_47906_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,47906,G,0,846,58582_003121_840,58582_003121_840_47906,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
660550,58582_003121_91_38557_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,38557,G,0,846,58582_003121_840,58582_003121_840_38557,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
660551,58582_003121_91_47872_G,58582_003121,2021-10-12 02:42:29.100000+00:00,91,47872,G,0,846,58582_003121_840,58582_003121_840_47872,...,0.0,0.0,742.0,20.0,242.0,27.0,0.0,0.0,0.0,0.0


In [7]:
saved_list = []

for view in ["Endzone", "Sideline"]:
    file_id_tmp = None
    start_time = time.time()
    for idx in range(len(target_df)):
        print("\r {}/{}".format(idx, len(target_df)), end="")
        target_info = target_df.iloc[idx]
        # read frame image
        game_play = target_info.game_play
        frame = target_info.frame
        contact_id = target_info.contact_id
        file_id = f"{game_play}_{view}_{frame:04}.jpg"
        if file_id != file_id_tmp:
            filename = os.path.join(CFG["TRAIN_E_IMG_DIR"], file_id)
            img = cv2.imread(filename)
        if not img is None:
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            view_tmp = view
            file_id_tmp = file_id
        else:
            print(f"\t\t {file_id} does not exists.")
            continue
        mask_img = img.copy()        
        # player highlight mask
        player1 = target_info.nfl_player_id_1
        player2 = target_info.nfl_player_id_2
        if view == "Endzone":
            p1_helmet = np.array([target_info.E_left_1, target_info.E_width_1,
                                target_info.E_top_1, target_info.E_height_1]).astype(np.int)
            p2_helmet = np.array([target_info.E_left_2, target_info.E_width_2,
                                target_info.E_top_2, target_info.E_height_2]).astype(np.int)
        elif view == "Sideline":
            p1_helmet = np.array([target_info.S_left_1, target_info.S_width_1,
                                target_info.S_top_1, target_info.S_height_1]).astype(np.int)
            p2_helmet = np.array([target_info.S_left_2, target_info.S_width_2,
                                target_info.S_top_2, target_info.S_height_2]).astype(np.int)
        
        # crop players area
        crop_area = get_crop_area(p1_helmet, p2_helmet)# crop_area=[left, top, right, bot]

        mask_img = mask_img[crop_area[1]:crop_area[3], crop_area[0]:crop_area[2], :]
        mask_img = cv2.resize(mask_img, dsize=CFG["img_size"])
        
        contact_fileid = f"{contact_id}_{view}.jpg"
        contact_filename = os.path.join(CFG["CONTACT_IMG_DIR"], contact_fileid)
        cv2.imwrite(contact_filename, mask_img)
        saved_list.append([contact_id, view])
        if (idx+1)%10000 == 0:
            elapsed_time = (time.time() - start_time)/60
            print(f"\t\t idx = {idx}, elapsed time = {elapsed_time:.3f} min")

elapsed_time = (time.time() - start_time)/60
print(f"\t\t idx = {idx}, elapsed time = {elapsed_time:.3f} min")
        


 2521/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2522/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2523/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2524/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2525/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2526/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2527/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2528/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2529/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2530/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2531/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2532/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2533/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2534/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2535/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2536/660553		 58168_003392_Endzone_0714.jpg does not exists.
 2537/66

In [8]:
len(saved_list)

1310373

In [9]:
csvname = os.path.join(CFG["INPUT_DIR"], "Saved_contact_frames.csv")
pd.DataFrame(saved_list, columns=["contact_id", "view"]).to_csv(csvname, index=False)