In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf


from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
os.chdir("/content/gdrive/MyDrive/NFL_Challenge/NFL-GPT/NFL data")
os.listdir()

['.DS_Store',
 'Contact Detection',
 'Punt Prediction',
 'Analytics',
 'Impact Detection',
 'data bowl 2021',
 'data bowl 2023',
 'data bowl 2022',
 'data bowl 2020',
 'asonty',
 'Highlights_NGS_2019',
 'Highlights_NGS_Prime',
 'final_df.parquet',
 'tokens.json',
 'mapped_df.parquet',
 'train_test_split.csv',
 'class_weights.parquet',
 'checkpoint',
 'models (1)',
 'models',
 'test_tokens_NFL_GPT',
 'train_tokens_NFL_GPT',
 'training_history.csv']

In [3]:
class CustomSparseCategoricalAccuracy(tf.keras.metrics.Metric):
    def __init__(self, name='custom_sparse_categorical_accuracy', **kwargs):
        super(CustomSparseCategoricalAccuracy, self).__init__(name=name, **kwargs)
        self.total = self.add_weight(name='total', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        mask = tf.not_equal(y_true, -100)  # Create a mask for valid labels
        valid_labels = tf.boolean_mask(y_true, mask)
        valid_labels = tf.cast(valid_labels, dtype=tf.int32)

        preds = tf.argmax(y_pred, axis = -1)
        valid_preds = tf.boolean_mask(preds, mask)
        valid_preds = tf.cast(valid_preds, dtype=tf.int32)

        correct = tf.equal(valid_labels, valid_preds)

        accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))

        self.total.assign_add(accuracy)
        self.count.assign_add(1.0)

    def result(self):
        return self.total / self.count if self.count > 0 else 0.0

    def reset_state(self):
        self.total.assign(0.0)
        self.count.assign(0.0)

class CustomSparseCategoricalCrossentropy(tf.keras.losses.Loss):
    def __init__(self, from_logits=False, class_weights=None):
        super(CustomSparseCategoricalCrossentropy, self).__init__()
        self.from_logits = from_logits
        self.class_weights = class_weights

    def call(self, y_true, y_pred):
        mask = tf.not_equal(y_true, -100)  # Create a mask for valid labels
        valid_labels = tf.boolean_mask(y_true, mask)
        valid_logits = tf.boolean_mask(y_pred, mask)

        # Apply class weights if provided
        if self.class_weights is not None:
            # Create a tensor of weights using tf.gather
            weights = tf.gather(tf.constant(list(self.class_weights.values()), dtype=tf.float32), tf.cast(valid_labels, tf.int32))
            weighted_loss = tf.keras.losses.sparse_categorical_crossentropy(valid_labels, valid_logits, from_logits=self.from_logits)
            weighted_loss = weighted_loss * weights
            loss = tf.reduce_mean(weighted_loss)
        else:
            loss = tf.keras.losses.sparse_categorical_crossentropy(valid_labels, valid_logits, from_logits=self.from_logits)

        return loss

In [4]:
class PlayTypeEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(PlayTypeEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["PlayType"])
    return embed

class OffDefEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(OffDefEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["OffDef"])
    return embed

class TypeEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(TypeEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["token_type_ids"])
    return embed

class SidefEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(SidefEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["side_ids"])
    return embed

class PositionalEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(PositionalEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["pos_ids"])
    return embed

class InputEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(InputEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["input_ids"])
    return embed

class Embedding(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               positional_vocab_size : int,
               offdef_vocab_size : int,
               side_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int):
        super(Embedding, self).__init__()

        self.InputEmbedding = InputEncoder(vocab_size=input_vocab_size,
                                           embedding_dim=embedding_dim)
        self.PositionalEmbedding = PositionalEncoder(vocab_size=positional_vocab_size,
                                                     embedding_dim=embedding_dim)
        self.OffDefEmbedding = OffDefEncoder(vocab_size=offdef_vocab_size,
                                             embedding_dim=embedding_dim)
        self.TypeEmbedding = OffDefEncoder(vocab_size=type_vocab_size,
                                             embedding_dim=embedding_dim)
        self.SideEmbedding = SidefEncoder(vocab_size=side_vocab_size,
                                             embedding_dim=embedding_dim)
        self.PlayTypeEmbedding = PlayTypeEncoder(vocab_size=playtype_vocab_size,
                                                 embedding_dim=embedding_dim)
        self.Add = tf.keras.layers.Add()

        self.Dense = tf.keras.layers.Dense(embedding_dim)

  def call(self, x):
    input_embed = self.InputEmbedding(x)
    positional_embed = self.PositionalEmbedding(x)
    side_embed = self.SideEmbedding(x)
    type_embed = self.TypeEmbedding(x)
    offdef_embed = self.OffDefEmbedding(x)
    playtype_embed = self.PlayTypeEmbedding(x)

    embed = self.Add([input_embed, side_embed, type_embed, positional_embed, offdef_embed, playtype_embed])
    embed = self.Dense(embed)

    return embed

from typing import List, Optional, Union

def shape_list(tensor: Union[tf.Tensor, np.ndarray]) -> List[int]:
    """
    Deal with dynamic shape in tensorflow cleanly.

    Args:
        tensor (`tf.Tensor` or `np.ndarray`): The tensor we want the shape of.

    Returns:
        `List[int]`: The shape of the tensor as a list.
    """
    if isinstance(tensor, np.ndarray):
        return list(tensor.shape)

    dynamic = tf.shape(tensor)

    if tensor.shape == tf.TensorShape(None):
        return dynamic

    static = tensor.shape.as_list()

    return [dynamic[i] if s is None else s for i, s in enumerate(static)]

class AttentionBlock(tf.keras.Model):
  def __init__(self,
               num_heads : int,
               hidden_dim : int,
               output_dim : int):
        super(AttentionBlock, self).__init__()

        self.num_attention_heads = num_heads
        self.attention_head_size = hidden_dim
        self.total_dim = num_heads * hidden_dim
        self.output_dim = output_dim

        self.Query = tf.keras.layers.Dense(self.total_dim, name = "Query")
        self.Key = tf.keras.layers.Dense(self.total_dim, name = "Key")
        self.Value = tf.keras.layers.Dense(self.total_dim, name = "Value")


        self.Dense = tf.keras.layers.Dense(output_dim, name = "Dense", activation = "relu")
        self.Add = tf.keras.layers.Add(name = "Add")
        self.Drop = tf.keras.layers.Dropout(rate = 0.1)
        self.Norm = tf.keras.layers.BatchNormalization(name = "Norm")

  def transpose_for_scores(self, tensor: tf.Tensor, batch_size: int) -> tf.Tensor:
        # Reshape from [batch_size, seq_length, all_head_size] to [batch_size, seq_length, num_attention_heads, attention_head_size]
        tensor = tf.reshape(tensor=tensor, shape=(batch_size, -1, self.num_attention_heads, self.attention_head_size))

        # Transpose the tensor from [batch_size, seq_length, num_attention_heads, attention_head_size] to [batch_size, num_attention_heads, seq_length, attention_head_size]
        return tf.transpose(tensor, perm=[0, 2, 1, 3])

  def create_causal_masks(self, temp_ids):
      # Use broadcasting to create the 2D comparison tensor
      causal_mask = temp_ids[:, :, tf.newaxis] >= temp_ids[:, tf.newaxis, :]
      causal_mask = (tf.cast(causal_mask, dtype=tf.float32) - 1) * 10000
      reshaped_tensor = tf.expand_dims(causal_mask, axis=1)
      duplicated_tensor = tf.tile(reshaped_tensor, multiples=[1, 3, 1, 1])
      return duplicated_tensor

  def create_attention_mask(self, attn_mask):
    attn_mask = (tf.cast(attn_mask, dtype=tf.float32) -1) * 10000
    reshaped_tensor = tf.expand_dims(attn_mask, axis=1)
    reshaped_tensor = tf.expand_dims(reshaped_tensor, axis=1)
    duplicated_tensor = tf.tile(reshaped_tensor, multiples=[1, 3, 1, 1])
    return duplicated_tensor

  def compute_scaled_attn_scores(self, query, key):
    attention_scores = tf.matmul(query, key, transpose_b=True)  # Transpose the second sequence

    # If you want scaled dot-product attention, divide by the square root of the embedding dimension
    embedding_dim = query.shape[-1]
    scaled_attention_scores = attention_scores / tf.math.sqrt(tf.cast(embedding_dim, dtype=tf.float32))

    return scaled_attention_scores

  def compute_attention_weigths(self, query, key, temp_ids, masks):

    attn_masks = self.create_attention_mask(masks)
    causal_masks = self.create_causal_masks(temp_ids)
    scaled_attn_scores = self.compute_scaled_attn_scores(query, key)

    attn_scores = scaled_attn_scores - attn_masks - causal_masks
    return tf.nn.softmax(attn_scores, axis = -1)

  def get_preds_and_attention(self,
           embeddings,
           temporal_ids,
           attention_masks):

    query = self.Query(embeddings)
    key = self.Key(embeddings)
    value = self.Value(embeddings)

    attention_weights = self.compute_attention_weigths(query, key, temporal_ids, attention_masks)

    attention_scores = tf.matmul(attention_weights, value)
    attention_scores = self.Dense(attention_scores)

    output = self.Add([attention_scores, embeddings])
    output = self.Drop(output)
    output = self.Norm(output)
    return output, attention_weights

  def call(self,
           hidden_states : tf.Tensor,
           temporal_ids,
           attention_masks):

    batch_size = shape_list(hidden_states)[0]

    query = self.Query(hidden_states)
    queries = self.transpose_for_scores(query, batch_size)

    key = self.Key(hidden_states)
    keys = self.transpose_for_scores(key, batch_size)

    value = self.Value(hidden_states)
    values = self.transpose_for_scores(value, batch_size)

    attention_weights = self.compute_attention_weigths(queries, keys, temporal_ids, attention_masks)

    attention_scores = tf.matmul(attention_weights, values)
    attention_scores = tf.transpose(attention_scores, perm=[0, 2, 1, 3])
    attention_scores = tf.reshape(tensor=attention_scores, shape=(batch_size, -1, self.total_dim))

    attention_scores = self.Dense(attention_scores)

    output = self.Add([attention_scores, hidden_states])
    output = self.Drop(output)
    output = self.Norm(output)
    return output

class NFLgpt(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               to_pred_size : int,
               positional_vocab_size : int,
               side_vocab_size : int,
               offdef_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int,
               hidden_dim : int):
        super(NFLgpt, self).__init__()

        self.Embedding = Embedding(input_vocab_size = input_vocab_size,
                                   positional_vocab_size = positional_vocab_size,
                                   side_vocab_size = side_vocab_size,
                                   type_vocab_size = type_vocab_size,
                                   offdef_vocab_size = offdef_vocab_size,
                                   playtype_vocab_size = playtype_vocab_size,
                                   embedding_dim = embedding_dim)

        self.Attention1 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)

        self.Attention2 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)

        self.Attention3 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)


        self.DenseHead = tf.keras.layers.Dense(512, activation = "gelu")

        self.NPPHead = tf.keras.layers.Dense(to_pred_size, activation = "softmax")

  def call(self,
           x):

    embed = self.Embedding(x)
    h1 = self.Attention1(embed, x["pos_ids"], x["attention_mask"])
    h2 = self.Attention2(h1, x["pos_ids"], x["attention_mask"])
    h3 = self.Attention3(h2, x["pos_ids"], x["attention_mask"])

    logits = self.DenseHead(h3)
    pred = self.NPPHead(logits)

    return pred

In [5]:
moves_id = 11164
starts_id = 1983
scrimmage_id = 99
positions_id = 28
temp_ids = 50

model = NFLgpt(input_vocab_size = 13276,
               to_pred_size = moves_id+1,
               positional_vocab_size = temp_ids+2,
               offdef_vocab_size = 2,
               side_vocab_size = 2,
               type_vocab_size = 4,
               playtype_vocab_size = 9,
               embedding_dim = 256,
               hidden_dim = 256)

model.load_weights("models/NFLGPT")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x78fa80e9b2e0>

In [6]:
testing_data = tf.data.Dataset.load("test_tokens_NFL_GPT")
test_length = [i for i,_ in enumerate(testing_data)][-1] + 1

batch_size = 32

testing_data = testing_data.shuffle(test_length).batch(batch_size)

In [8]:
class_weights = pd.read_parquet("class_weights.parquet")
step_range = [(0, 10), (10, 100), (100, 1000), (1000, 10000), (10000, 50000), (50000, 100000), (100000, 300000), (300000, 500000), (500000, 1000000), (1000000, 10000000)]

def insert_weights(df, w):
  df["weights"] = [w for i in range(df.shape[0])]
  return df


from collections import Counter
weights = dict(Counter(class_weights["Zone_ID"].to_numpy()))
weights_df = pd.DataFrame(np.array([[k, v] for k,v in weights.items()]), columns = ["Class", "Count"])

weights_dict = {i : weights_df[(weights_df['Count'] > step_range[i][0]) & (weights_df['Count'] <= step_range[i][1])].reset_index(drop = True) for i in range(len(step_range))}
w_dict = {0 : 1,
          1 : 0.9,
          2 : 0.8,
          3 : 0.7,
          4 : 0.6,
          5 : 0.5,
          6 : 0.4,
          7 : 0.3,
          8 : 0.2,
          9 : 0.05,}

weights_dict = {k:insert_weights(v, w_dict[k]) for k,v in weights_dict.items()}

weights_df = pd.concat(list(weights_dict.values())).reset_index(drop = True)

weights_inv = {v[0] : v[2] for v in weights_df.values}

In [9]:
loss = CustomSparseCategoricalCrossentropy(from_logits=False, class_weights=weights_inv)
acc = CustomSparseCategoricalAccuracy()

In [10]:
i = 0

preds = []
trues = []

for x, y in testing_data:
  if i < 5:
    print(type(x))
    print(type(y))
    print(y.shape)
    print(" ")

    batch_predictions = model.predict(x)  # Set training=False for inference
    preds.append(batch_predictions)
    trues.append(y)
    print(i)
    print(loss(y, batch_predictions))
    acc.update_state(y, batch_predictions)
    print(acc.result())
    print(" ")
    print(" ")

    i+=1

<class 'dict'>
<class 'tensorflow.python.framework.ops.EagerTensor'>
(32, 256)
 
0
tf.Tensor(1.2605363, shape=(), dtype=float32)
tf.Tensor(0.5343066, shape=(), dtype=float32)
 
 
<class 'dict'>
<class 'tensorflow.python.framework.ops.EagerTensor'>
(32, 256)
 
1
tf.Tensor(1.2072968, shape=(), dtype=float32)
tf.Tensor(0.54498875, shape=(), dtype=float32)
 
 
<class 'dict'>
<class 'tensorflow.python.framework.ops.EagerTensor'>
(32, 256)
 
2
tf.Tensor(1.4188111, shape=(), dtype=float32)
tf.Tensor(0.51909417, shape=(), dtype=float32)
 
 
<class 'dict'>
<class 'tensorflow.python.framework.ops.EagerTensor'>
(32, 256)
 
3
tf.Tensor(1.1242297, shape=(), dtype=float32)
tf.Tensor(0.53489095, shape=(), dtype=float32)
 
 
<class 'dict'>
<class 'tensorflow.python.framework.ops.EagerTensor'>
(32, 256)
 
4
tf.Tensor(1.1538084, shape=(), dtype=float32)
tf.Tensor(0.5479027, shape=(), dtype=float32)
 
 
