<a href="https://colab.research.google.com/github/jskaza/nfl-big-data-bowl-2023/blob/master/model_training_multiclass.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [230]:
import os
import datetime
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import pandas as pd
import tensorflow as tf
import math
from google.colab import drive
from sklearn.linear_model import LogisticRegression
drive.mount("/content/drive")
%load_ext tensorboard

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
df = pd.read_json("/content/drive/MyDrive/nfl-big-data-bowl-2023/data/dataset_week1.json")
df.head()

Unnamed: 0,nfl_id,opponent_x,opponent_y,teammate_x,teammate_y,ball_x,ball_y,game_id,play_id,frame_id,...,pff_sack,pff_position_lined_up,quarter,down,yards_to_go,absolute_yardline_number,personnel_o,defenders_in_box,offense_formation,score_delta
0,41263,"[-2.61, -1.29, 0.1, 1.73, 3]","[0.45, 0.15, -0.54, -0.37, 0.18]","[-1.99, 1.26, 2.75, 8.71]","[-2.12, -1.79, -2.14, -2.34]",0.0,0.0,2021090900,97,6,...,0,LEO,1,3,2,43,"1 RB, 1 TE, 3 WR",6.0,SHOTGUN,0
1,42403,"[3, 1.73, 0.1, -1.29, -2.61]","[0.18, -0.37, -0.54, 0.15, 0.45]","[2.75, 1.26, -1.99, -5.03]","[-2.14, -1.79, -2.12, -1.74]",0.0,0.0,2021090900,97,6,...,0,ROLB,1,3,2,43,"1 RB, 1 TE, 3 WR",6.0,SHOTGUN,0
2,44955,"[1.73, 0.1, 3, -1.29, -2.61]","[-0.37, -0.54, 0.18, 0.15, 0.45]","[2.75, -1.99, -5.03, 8.71]","[-2.14, -2.12, -1.74, -2.34]",0.0,0.0,2021090900,97,6,...,0,DRT,1,3,2,43,"1 RB, 1 TE, 3 WR",6.0,SHOTGUN,0
3,53441,"[-1.29, 0.1, -2.61, 1.73, 3]","[0.15, -0.54, 0.45, -0.37, 0.18]","[-5.03, 1.26, 2.75, 8.71]","[-1.74, -1.79, -2.14, -2.34]",0.0,0.0,2021090900,97,6,...,0,LILB,1,3,2,43,"1 RB, 1 TE, 3 WR",6.0,SHOTGUN,0
4,53504,"[1.73, 3, 0.1, -1.29, -2.61]","[-0.37, 0.18, -0.54, 0.15, 0.45]","[1.26, -1.99, 8.71, -5.03]","[-1.79, -2.12, -2.34, -1.74]",0.0,0.0,2021090900,97,6,...,0,RE,1,3,2,43,"1 RB, 1 TE, 3 WR",6.0,SHOTGUN,0


In [231]:
def make_features(df: pd.DataFrame, feats: list, outcomes: list):
    X, y  = [], []
    grouped_df = df.groupby(["nfl_id","game_id","play_id"])
    for group_name, group_df in grouped_df:
      X.append(group_df[feats].to_numpy())
      y.append(group_df[outcomes].to_numpy()[0])
    
    X = tf.keras.utils.pad_sequences(X ,dtype="float", padding="pre", value= -99)
    y = tf.keras.utils.pad_sequences(y ,dtype="float", padding="pre", value= -99)
    return X, y

In [232]:
feats = ["x","y","ball_x","ball_y"]
outcomes = ["pff_hurry"]
X, y = make_features(df, feats, outcomes)

split = 0.8
num_train = round(split * X.shape[0])

X_train = X[:num_train]
y_train = y[:num_train]

X_test = X[num_train:]
y_test = y[num_train:]


In [225]:
# set random seed for reproducibility

tf.random.set_seed(42)
np.random.seed(42)
NUM_EPOCHS = 50

# create the model
model = tf.keras.Sequential()
model.add(tf.keras.layers.Masking(mask_value=-99.,
                                  input_shape= X.shape[1:]))
model.add(tf.keras.layers.LSTM(32, input_shape = X.shape[1:]))
model.add(tf.keras.layers.Dense(y.shape[1], activation="sigmoid"))

# compile and fit the model
model.compile(loss="binary_crossentropy", optimizer="adam", metrics = [tf.keras.metrics.AUC(), tf.keras.metrics.Precision(), tf.keras.metrics.Recall(),
                                                                       tf.keras.metrics.TruePositives(), tf.keras.metrics.TrueNegatives(), tf.keras.metrics.FalsePositives(),
                                                                       tf.keras.metrics.FalseNegatives()])

logdir = os.path.join("/content/drive/MyDrive/nfl-big-data-bowl-2023/logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
tensorboard_callback = tf.keras.callbacks.TensorBoard(logdir, histogram_freq=1)

model.fit(X_train, y_train, epochs=NUM_EPOCHS, verbose=0, callbacks=[tensorboard_callback])
model.evaluate(X_test, y_test)



[0.13787873089313507,
 0.9370567798614502,
 0.5348837375640869,
 0.38983049988746643,
 23.0,
 874.0,
 20.0,
 36.0]

In [233]:
X2, y2  = [], []
grouped_df = df.groupby(["nfl_id","game_id","play_id"])
for group_name, group_df in grouped_df:
  X2.append(group_df[["x","y"]].to_numpy()[0])
  y2.append(group_df[outcomes].to_numpy()[0])
X2 = np.array(X2)
y2 = np.array(y2)
split = 0.8
num_train = round(split * X2.shape[0])

X2_train = np.stack(X2[:num_train])
y2_train = np.stack(y2[:num_train])

X2_test = np.stack(X2[num_train:])
y2_test = np.stack(y2[num_train:])

mod = LogisticRegression()
mod.fit(X_train, y_train)
probs = mod.predict_proba(X_test)[:, 1]

ValueError: ignored

In [None]:
%tensorboard --logdir /content/drive/MyDrive/nfl-big-data-bowl-2023/logs