In [1]:
import pickle

from tqdm import tqdm

import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import h5py

import processor
import xgb_model
import utils

In [2]:
data_dir = "/media/data/files/nfl_bdb_2025/data"
test_weeks = [8, 9]
proc = processor.SeparationDataProcessor(data_dir)

In [3]:
with open("xgb_model_all_all_weeks.pkl", "rb") as f:
    bst = pickle.load(f)

In [None]:
cache_fname = proc.cache_file_fname
with h5py.File(cache_fname, "r") as all_weeks_f:
    X = []
    y = []
    for week_num in test_weeks:
        f = all_weeks_f[f"week_{week_num}"]
        n = f["seq_arr"].shape[0]
        for idx in tqdm(range(n)):
            y.append(f["separation_arr"][idx])
            play_players_df = pd.DataFrame(
                f["play_players_arr"][idx, :, :],
                columns=xgb_model.decode(f["play_players_cols"]),
            )
            play_players_df["nflId"] = play_players_df["nflId"].astype(int)
            play_overall_df = pd.DataFrame(
                f["play_overall_arr"][idx, :, :],
                columns=xgb_model.decode(f["play_overall_cols"]),
            )
            play_overall_df["nflId"] = play_overall_df["nflId"].astype(int)
            meta_df = pd.DataFrame(
                f["meta_arr"][idx, :].reshape(1, -1),
                columns=xgb_model.decode(f["meta_cols"]),
            )
            
            pos_cols = xgb_model.get_position_cols()
        
            seq_mask = f["seq_mask"][idx, :, 0, 0].astype(int)
            idxs = np.where(seq_mask)[0]
            seq_len = idxs.max() + 1
            pos_arr = f["seq_arr"][idx, :seq_len, :, :]
            pos_df = []
            for seq_idx in range(pos_arr.shape[0]):
                seq_df = pd.DataFrame(pos_arr[seq_idx, :, :], columns=xgb_model.decode(f["seq_cols"]))
                seq_df = seq_df.drop(seq_df[seq_df["club_football"] == 1.0].index)
                seq_df["nflId"] = seq_df["nflId"].astype(int)
                
                seq_df = seq_df.merge(play_players_df, how="outer", on="nflId")
                seq_df = seq_df.merge(play_overall_df, how="outer", on=["gameId", "playId", "nflId"])
                
                seq_df["position_ord"] = np.argmax(seq_df[pos_cols].to_numpy(), axis=1)
                seq_df = seq_df.sort_values(by="position_ord")
                seq_df = seq_df[seq_df["wasTargettedReceiver"] == 1.0]
                seq_df = seq_df.merge(meta_df, how="outer", on=["gameId", "playId"])

                pos_df.append(seq_df)
            X.append(pd.concat(pos_df, axis=0))

 43%|█████████████████████████████████████████████████████████████████████████▍                                                                                                  | 337/789 [01:48<03:02,  2.47it/s]

In [None]:
in_motion_idxs = np.where([x["inMotionAtBallSnap"].sum() > 0 for x in X])
shift_idxs = np.where([x["shiftSinceLineset"].sum() > 0 for x in X])
motion_idxs = np.where([x["motionSinceLineset"].sum() > 0 for x in X])

In [None]:
pred_arr = []
for idx in tqdm(range(len(X))):
    y_true = y[idx]
    seq_df = X[idx][utils.get_target_feature_cols()]
    pred_sep = bst.predict(seq_df)
    pred_arr.append(pred_sep)

In [None]:
plt.figure()
plt.plot(pred_arr[1451])
plt.show()

In [None]:
motion_idxs

In [None]:
plt.figure()
plt.plot(pred_arr[1029])
plt.show()