In [None]:
from IPython.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

In [None]:
import os
import json
import pickle
from typing import Optional

import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
import shap
from sklearn.model_selection import GroupKFold, StratifiedGroupKFold, PredefinedSplit
from sklearn.metrics import matthews_corrcoef, roc_auc_score
from sklearn.preprocessing import LabelEncoder  
from dataclasses import dataclass
from tqdm import tqdm
from sklearn.metrics import matthews_corrcoef, confusion_matrix

matplotlib.rcParams['animation.embed_limit'] = 2**128
import matplotlib.animation as animation
from matplotlib import rc
rc('animation', html='jshtml')
from utils.general import LabelEncoders, LGBMSerializer, reduce_dtype
from utils.nfl import merge_tracking, distance, cast_player_id
from utils.metrics import binarize_pred, summarize_per_play_mcc
from utils.visualize import EDAConfig, plot_animation
from utils.visualize_helmet import save_separate_videos, save_concat_video

In [None]:
TRAINED_DATA_PATH = "./"
tracking = reduce_dtype(cast_player_id(pd.read_csv("../input/nfl-player-contact-detection/train_player_tracking.csv")))
labels = cast_player_id(pd.read_csv("../input/nfl-player-contact-detection/train_labels.csv"))
serializer = LGBMSerializer.from_file(os.path.join(TRAINED_DATA_PATH, "lgb"))
oof = np.load(os.path.join(TRAINED_DATA_PATH, "oof.npy"))
x_train = np.load(os.path.join(TRAINED_DATA_PATH, "X_train.npy"))

labels = merge_tracking(
    labels,
    tracking,
    ["x_position", "y_position"]
)
labels["distance"] = distance(labels["x_position_1"], labels["y_position_1"], labels["x_position_2"], labels["y_position_2"])
is_hard_sample = np.logical_or(labels["distance"]<=3, labels["nfl_player_id_2"] == -1)

assert is_hard_sample.sum() == len(oof)

is_ground = (labels[is_hard_sample]["nfl_player_id_2"] == -1).values
y_pred = binarize_pred(oof, serializer.threshold_1, serializer.threshold_2, is_ground)

labels.loc[is_hard_sample, "oof"] = oof
labels.loc[is_hard_sample, "y_pred"] = y_pred
labels['error'] = (labels['contact'] - labels['oof'].fillna(0.0)).abs()

per_play_mcc_df = summarize_per_play_mcc(labels)
per_play_mcc_df[per_play_mcc_df["number_of_contacts"]>0].sort_values(by="mcc").head(20)

helmet = pd.read_csv('../input/nfl-player-contact-detection/train_baseline_helmets.csv')

In [None]:
def get_id_by_index(labels, index):
    row = labels.loc[index]
    return row.game_play, row.nfl_player_id_1, row.nfl_player_id_2

# GAME_PLAY = "58529_001853"  # Noneならランダムに選ぶ
# PLAYER_ID_1 = None  # intで指定。指定しない場合、GAME_PLAYからランダムに選ぶ。
# PLAYER_ID_2 = None  # intで指定(G:-1)。指定しない場合、GAME_PLAYからランダムに選ぶ。
CONTACT_TO_G = False  # player_id_2を指定しないとき、対Gを選択するかどうか

# index直指定
# GAME_PLAY, PLAYER_ID_1, PLAYER_ID_2 = get_id_by_index(labels, 1089722)
# print(GAME_PLAY, PLAYER_ID_1, PLAYER_ID_2)

# errorの平均で悪い順にみる
samples = labels.groupby(['game_play', 'nfl_player_id_1', 'nfl_player_id_2'])['error'].mean().reset_index().sort_values('error', ascending=False)
GAME_PLAY, PLAYER_ID_1, PLAYER_ID_2, error_mean = samples.iloc[0].values
print(GAME_PLAY, PLAYER_ID_1, PLAYER_ID_2, error_mean)

In [None]:
if GAME_PLAY is None:
    sample = labels[(labels["contact"]==1)&(labels["nfl_player_id_2"]==-1)].sample(1).iloc[0]
    GAME_PLAY = sample.game_play

config = EDAConfig(serializer, tracking, labels, 
                   GAME_PLAY,
                   x_train,
                   is_hard_sample,
                   player_id_1=PLAYER_ID_1,
                   player_id_2=PLAYER_ID_2,
                   to_g=CONTACT_TO_G)

# plot_animation(config)

In [None]:
# concat_path = save_concat_video(labels, helmet, GAME_PLAY, PLAYER_ID_1, PLAYER_ID_2)
end_path, side_path = save_separate_videos(labels, helmet, GAME_PLAY, PLAYER_ID_1, PLAYER_ID_2)

In [None]:
from IPython.display import Video

def play_video(video_path: str):
    frac = 1.25 # scaling factor for display
    display(
        Video(data=video_path, embed=True, height=int(720*frac), width=int(1280*frac))
    )
    
play_video(side_path)