In [1]:
import os
import pandas as pd
import tensorflow as tf
import numpy as np
import polars as pl

env = "local"

In [2]:
if env == "local":
    os.chdir("/Users/samuel/Documents/GitHub/QB-GPT/")
else:
    from google.colab import drive
    drive.mount('/content/gdrive')
    os.chdir("/content/gdrive/MyDrive/NFL_Challenge/NFL-GPT/NFL data")

In [3]:
os.listdir()

['data_models',
 '.DS_Store',
 'app',
 'LICENSE',
 'models',
 'README.md',
 '.gitignore',
 'L2_plot.png',
 '.gitattributes',
 'data_preprocessing',
 'index',
 '.git',
 'notebooks']

In [4]:
training_data = tf.data.Dataset.load("data_models/Helenos/train_data_tfp")
testing_data = tf.data.Dataset.load("data_models/Helenos/test_data_tfp")

train_length = [i for i,_ in enumerate(training_data)][-1] + 1
test_length = [i for i,_ in enumerate(testing_data)][-1] + 1

print("Train length is : ", str(train_length))
print("Test length is : ", str(test_length))

batch_size = 32

training_data = training_data.shuffle(train_length).batch(batch_size)
testing_data = testing_data.shuffle(test_length).batch(batch_size)

Train length is :  141673
Test length is :  60718


In [5]:
from models.modeling.QBGPT.models import QBGPT, LargeQBGPT

moves_to_pred = 10876
input_size = 10878
starts_size = 1033
scrimmage_size = 100
positions_id = 29

temp_ids = 52
off_def_size = 2
token_type_size = 3
play_type_size = 9

model_tiny = QBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = off_def_size,
                    type_vocab_size = token_type_size,
                    playtype_vocab_size = play_type_size,
                    embedding_dim = 64,
                    hidden_dim = 64,
                    to_pred_size = moves_to_pred)

model_tiny.load_weights("models/modeling/QBGPT/weights/model_tiny/QBGPT")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x2c7ca1590>

In [6]:
from models.modeling.StratFormer.models import StratEncoder

tiny_encoder = StratEncoder(num_spec_token= 1,
                            hidden_dim=64,
                            team_vocab_size=32,
                            player_vocab_size=7229,
                            season_vocab_size= 7,
                            down_vocab_size= 5,
                            base_encoder=model_tiny.Encoder)

tiny_encoder.load_weights("models/modeling/StratFormer/weights/stratformer_tiny/StratFormer/")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x2c7337150>

In [15]:
import tensorflow_probability as tfp

tfpl = tfp.layers
tfd = tfp.distributions

class Helenos(tf.keras.Model):
    def __init__(self, Encoder):
        super(Helenos, self).__init__()
        
        self.Encoder = Encoder
        self.DenseVar2 = tf.keras.layers.Dense(3, activation = "relu")
        self.Pred = tfpl.DistributionLambda(lambda t : tfd.ExponentiallyModifiedGaussian(loc = t[:,0],
                                                                                         scale = tf.math.softplus(t[:,1]),
                                                                                         rate = tf.math.softplus(t[:,2])))
        
    def call(self, x):
        encoded_off = self.Encoder(x["off"])
        encoded_off = encoded_off[:,0,:]
        encoded_def = self.Encoder(x["def"])
        encoded_def = encoded_def[:,0,:]
        encoded = encoded_off-encoded_def
        dis = self.DenseVar2(encoded)
        pred = self.Pred(dis)
        return pred

In [16]:
Helenos = Helenos(tiny_encoder)
Helenos.Encoder.BaseEncoder.trainable = False

In [17]:
negloglik = lambda y, rv_y: -rv_y.log_prob(y)
Helenos.compile(loss=negloglik, optimizer=tf.keras.optimizers.Adam(1e-4))



In [18]:
Helenos.fit(training_data, validation_data = testing_data, epochs=2)

Epoch 1/2
Epoch 2/2


<keras.src.callbacks.History at 0x2cca8dcd0>

In [22]:
preds = []
trues = []

for b in testing_data:
    preds.append(Helenos.predict(b[0]))
    trues.append(b[1])



In [23]:
trues = np.array([np.array(v)[1].flatten() for v in trues]).flatten()
preds = np.array([np.array(v)[1].flatten() for v in preds]).flatten()

In [25]:
msle = tf.keras.metrics.MeanSquaredLogarithmicError()
msle(trues, preds)

<tf.Tensor: shape=(), dtype=float32, numpy=2.3955703>

In [26]:
mse = tf.keras.metrics.MeanSquaredError()
mse(trues, preds)

<tf.Tensor: shape=(), dtype=float32, numpy=129.64987>

In [27]:
mape = tf.keras.metrics.MeanAbsolutePercentageError()
mape(trues, preds)

<tf.Tensor: shape=(), dtype=float32, numpy=2026782000.0>

In [28]:
mae = tf.keras.metrics.MeanAbsoluteError()
mae(trues, preds)

<tf.Tensor: shape=(), dtype=float32, numpy=8.500327>