In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf


from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
os.chdir("/content/gdrive/MyDrive/NFL_Challenge/NFL-GPT/NFL data")
os.listdir()

['.DS_Store',
 'Contact Detection',
 'Punt Prediction',
 'Analytics',
 'Impact Detection',
 'data bowl 2021',
 'data bowl 2023',
 'data bowl 2022',
 'data bowl 2020',
 'asonty',
 'Highlights_NGS_2019',
 'Highlights_NGS_Prime',
 'final_df.parquet',
 'tokens.json',
 'mapped_df.parquet',
 'train_test_split.csv',
 'class_weights.parquet',
 'train_play_prediction_categ',
 'test_play_prediction_categ',
 'train_play_prediction_binary',
 'test_play_prediction_binary',
 'models',
 'index',
 'training_history_model_large.csv',
 'training_history_large_model.csv',
 'test_tokens_NFL_GPT',
 'train_tokens_NFL_GPT',
 'training_history_model_small.csv',
 'training_history_model_medium.csv']

In [3]:
training_data = tf.data.Dataset.load("train_tokens_NFL_GPT")
testing_data = tf.data.Dataset.load("test_tokens_NFL_GPT")

In [4]:
train_length = [i for i,_ in enumerate(training_data)][-1] + 1
test_length = [i for i,_ in enumerate(testing_data)][-1] + 1

In [5]:
print("Train length is : ", str(train_length))
print("Test length is : ", str(test_length))

Train length is :  205851
Test length is :  51463


In [6]:
batch_size = 32

training_data = training_data.shuffle(train_length).batch(batch_size)
testing_data = testing_data.shuffle(test_length).batch(batch_size)

## Model classes

In [7]:
class PlayTypeEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(PlayTypeEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["PlayType"])
    return embed

class PositionEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(PositionEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["position_ids"])
    return embed

class ScrimmageEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(ScrimmageEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["scrim_ids"])
    return embed

class StartEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(StartEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["start_ids"])
    return embed

class OffDefEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(OffDefEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["OffDef"])
    return embed

class TypeEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(TypeEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["token_type_ids"])
    return embed

class PositionalEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(PositionalEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["pos_ids"])
    return embed

class InputEncoder(tf.keras.Model):
  def __init__(self, vocab_size : int, embedding_dim : int):
        super(InputEncoder, self).__init__()

        self.Embedding = tf.keras.layers.Embedding(input_dim = vocab_size,
                                                   output_dim = embedding_dim)

  def call(self, x):
    embed = self.Embedding(x["input_ids"])
    return embed

class Embedding(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               positional_vocab_size : int,
               position_vocab_size : int,
               scrimmage_vocab_size : int,
               start_vocab_size: int,
               offdef_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int):
        super(Embedding, self).__init__()

        self.InputEmbedding = InputEncoder(vocab_size=input_vocab_size,
                                           embedding_dim=embedding_dim)
        self.PositionalEmbedding = PositionalEncoder(vocab_size=positional_vocab_size,
                                                     embedding_dim=embedding_dim)
        self.PositionEmbedding = PositionEncoder(vocab_size=position_vocab_size,
                                                     embedding_dim=embedding_dim)
        self.ScrimEmbedding = ScrimmageEncoder(vocab_size=scrimmage_vocab_size,
                                                     embedding_dim=embedding_dim)
        self.StartEmbedding = StartEncoder(vocab_size=start_vocab_size,
                                                     embedding_dim=embedding_dim)
        self.OffDefEmbedding = OffDefEncoder(vocab_size=offdef_vocab_size,
                                             embedding_dim=embedding_dim)
        self.TypeEmbedding = TypeEncoder(vocab_size=type_vocab_size,
                                             embedding_dim=embedding_dim)
        self.PlayTypeEmbedding = PlayTypeEncoder(vocab_size=playtype_vocab_size,
                                                 embedding_dim=embedding_dim)
        self.Add = tf.keras.layers.Add()

        self.Dense = tf.keras.layers.Dense(embedding_dim)

  def call(self, x):
    input_embed = self.InputEmbedding(x)
    positional_embed = self.PositionalEmbedding(x)
    position_embed = self.PositionEmbedding(x)
    scrim_embed = self.ScrimEmbedding(x)
    start_embed = self.StartEmbedding(x)
    type_embed = self.TypeEmbedding(x)
    offdef_embed = self.OffDefEmbedding(x)
    playtype_embed = self.PlayTypeEmbedding(x)

    embed = self.Add([input_embed,
                      positional_embed,
                      position_embed,
                      scrim_embed,
                      start_embed,
                      type_embed,
                      offdef_embed,
                      playtype_embed])

    embed = self.Dense(embed)

    return embed

In [8]:
from typing import List, Optional, Union

def shape_list(tensor: Union[tf.Tensor, np.ndarray]) -> List[int]:
    """
    Deal with dynamic shape in tensorflow cleanly.

    Args:
        tensor (`tf.Tensor` or `np.ndarray`): The tensor we want the shape of.

    Returns:
        `List[int]`: The shape of the tensor as a list.
    """
    if isinstance(tensor, np.ndarray):
        return list(tensor.shape)

    dynamic = tf.shape(tensor)

    if tensor.shape == tf.TensorShape(None):
        return dynamic

    static = tensor.shape.as_list()

    return [dynamic[i] if s is None else s for i, s in enumerate(static)]

In [9]:
class AttentionBlock(tf.keras.Model):
  def __init__(self,
               num_heads : int,
               hidden_dim : int,
               output_dim : int):
        super(AttentionBlock, self).__init__()

        self.num_attention_heads = num_heads
        self.attention_head_size = hidden_dim
        self.total_dim = num_heads * hidden_dim
        self.output_dim = output_dim

        self.Query = tf.keras.layers.Dense(self.total_dim, name = "Query")
        self.Key = tf.keras.layers.Dense(self.total_dim, name = "Key")
        self.Value = tf.keras.layers.Dense(self.total_dim, name = "Value")


        self.Dense = tf.keras.layers.Dense(output_dim, name = "Dense", activation = "relu")
        self.Add = tf.keras.layers.Add(name = "Add")
        self.Drop = tf.keras.layers.Dropout(rate = 0.1)
        self.Norm = tf.keras.layers.BatchNormalization(name = "Norm")

  def transpose_for_scores(self, tensor: tf.Tensor, batch_size: int) -> tf.Tensor:
        # Reshape from [batch_size, seq_length, all_head_size] to [batch_size, seq_length, num_attention_heads, attention_head_size]
        tensor = tf.reshape(tensor=tensor, shape=(batch_size, -1, self.num_attention_heads, self.attention_head_size))

        # Transpose the tensor from [batch_size, seq_length, num_attention_heads, attention_head_size] to [batch_size, num_attention_heads, seq_length, attention_head_size]
        return tf.transpose(tensor, perm=[0, 2, 1, 3])

  def create_causal_masks(self, temp_ids):
      # Use broadcasting to create the 2D comparison tensor
      causal_mask = temp_ids[:, :, tf.newaxis] >= temp_ids[:, tf.newaxis, :]
      causal_mask = (tf.cast(causal_mask, dtype=tf.float32) - 1) * 10000
      reshaped_tensor = tf.expand_dims(causal_mask, axis=1)
      duplicated_tensor = tf.tile(reshaped_tensor, multiples=[1, 3, 1, 1])
      return duplicated_tensor

  def create_attention_mask(self, attn_mask):
    attn_mask = (tf.cast(attn_mask, dtype=tf.float32) -1) * 10000
    reshaped_tensor = tf.expand_dims(attn_mask, axis=1)
    reshaped_tensor = tf.expand_dims(reshaped_tensor, axis=1)
    duplicated_tensor = tf.tile(reshaped_tensor, multiples=[1, 3, 1, 1])
    return duplicated_tensor

  def compute_scaled_attn_scores(self, query, key):
    attention_scores = tf.matmul(query, key, transpose_b=True)  # Transpose the second sequence

    # If you want scaled dot-product attention, divide by the square root of the embedding dimension
    embedding_dim = query.shape[-1]
    scaled_attention_scores = attention_scores / tf.math.sqrt(tf.cast(embedding_dim, dtype=tf.float32))

    return scaled_attention_scores

  def compute_attention_weigths(self, query, key, temp_ids, masks):

    attn_masks = self.create_attention_mask(masks)
    causal_masks = self.create_causal_masks(temp_ids)
    scaled_attn_scores = self.compute_scaled_attn_scores(query, key)

    attn_scores = scaled_attn_scores - attn_masks - causal_masks
    return tf.nn.softmax(attn_scores, axis = -1)

  def get_preds_and_attention(self,
           embeddings,
           temporal_ids,
           attention_masks):

    query = self.Query(embeddings)
    key = self.Key(embeddings)
    value = self.Value(embeddings)

    attention_weights = self.compute_attention_weigths(query, key, temporal_ids, attention_masks)

    attention_scores = tf.matmul(attention_weights, value)
    attention_scores = self.Dense(attention_scores)

    output = self.Add([attention_scores, embeddings])
    output = self.Drop(output)
    output = self.Norm(output)
    return output, attention_weights

  def call(self,
           hidden_states : tf.Tensor,
           temporal_ids,
           attention_masks):

    batch_size = shape_list(hidden_states)[0]

    query = self.Query(hidden_states)
    queries = self.transpose_for_scores(query, batch_size)

    key = self.Key(hidden_states)
    keys = self.transpose_for_scores(key, batch_size)

    value = self.Value(hidden_states)
    values = self.transpose_for_scores(value, batch_size)

    attention_weights = self.compute_attention_weigths(queries, keys, temporal_ids, attention_masks)

    attention_scores = tf.matmul(attention_weights, values)
    attention_scores = tf.transpose(attention_scores, perm=[0, 2, 1, 3])
    attention_scores = tf.reshape(tensor=attention_scores, shape=(batch_size, -1, self.total_dim))

    attention_scores = self.Dense(attention_scores)

    output = self.Add([attention_scores, hidden_states])
    output = self.Drop(output)
    output = self.Norm(output)
    return output


In [23]:
class Encoder(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               positional_vocab_size : int,
               position_vocab_size : int,
               scrimmage_vocab_size : int,
               start_vocab_size: int,
               offdef_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int,
               hidden_dim : int):
        super(Encoder, self).__init__()

        self.Embedding = Embedding(input_vocab_size = input_vocab_size,
                                   positional_vocab_size = positional_vocab_size,
                                   position_vocab_size = position_vocab_size,
                                   scrimmage_vocab_size = scrimmage_vocab_size,
                                   start_vocab_size = start_vocab_size,
                                   type_vocab_size = type_vocab_size,
                                   offdef_vocab_size = offdef_vocab_size,
                                   playtype_vocab_size = playtype_vocab_size,
                                   embedding_dim = embedding_dim)

        self.Attention1 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)

        self.DenseHead = tf.keras.layers.Dense(embedding_dim, activation = "relu")

  def call(self,
           x):

    embed = self.Embedding(x)
    h1 = self.Attention1(embed, x["pos_ids"], x["attention_mask"])

    encoded = self.DenseHead(h1)

    return encoded

class EncoderL(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               positional_vocab_size : int,
               position_vocab_size : int,
               scrimmage_vocab_size : int,
               start_vocab_size: int,
               offdef_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int,
               hidden_dim : int):
        super(EncoderL, self).__init__()

        self.Embedding = Embedding(input_vocab_size = input_vocab_size,
                                   positional_vocab_size = positional_vocab_size,
                                   position_vocab_size = position_vocab_size,
                                   scrimmage_vocab_size = scrimmage_vocab_size,
                                   start_vocab_size = start_vocab_size,
                                   type_vocab_size = type_vocab_size,
                                   offdef_vocab_size = offdef_vocab_size,
                                   playtype_vocab_size = playtype_vocab_size,
                                   embedding_dim = embedding_dim)

        self.Attention1 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)
        self.Attention2 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)

        self.DenseHead = tf.keras.layers.Dense(embedding_dim, activation = "relu")

  def call(self,
           x):

    embed = self.Embedding(x)
    h1 = self.Attention1(embed, x["pos_ids"], x["attention_mask"])
    h2 = self.Attention2(h1, x["pos_ids"], x["attention_mask"])

    encoded = self.DenseHead(h2)

    return encoded

class EncoderXL(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               positional_vocab_size : int,
               position_vocab_size : int,
               scrimmage_vocab_size : int,
               start_vocab_size: int,
               offdef_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int,
               hidden_dim : int):
        super(EncoderXL, self).__init__()

        self.Embedding = Embedding(input_vocab_size = input_vocab_size,
                                   positional_vocab_size = positional_vocab_size,
                                   position_vocab_size = position_vocab_size,
                                   scrimmage_vocab_size = scrimmage_vocab_size,
                                   start_vocab_size = start_vocab_size,
                                   type_vocab_size = type_vocab_size,
                                   offdef_vocab_size = offdef_vocab_size,
                                   playtype_vocab_size = playtype_vocab_size,
                                   embedding_dim = embedding_dim)

        self.Attention1 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)
        self.Attention2 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)
        self.Attention3 = AttentionBlock(num_heads = 3,
                                         hidden_dim = hidden_dim,
                                         output_dim = embedding_dim)

        self.DenseHead = tf.keras.layers.Dense(embedding_dim, activation = "relu")

  def call(self,
           x):

    embed = self.Embedding(x)
    h1 = self.Attention1(embed, x["pos_ids"], x["attention_mask"])
    h2 = self.Attention2(h1, x["pos_ids"], x["attention_mask"])
    h3 = self.Attention3(h2, x["pos_ids"], x["attention_mask"])

    encoded = self.DenseHead(h3)

    return encoded

In [24]:
class QBGPT(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               positional_vocab_size : int,
               position_vocab_size : int,
               scrimmage_vocab_size : int,
               start_vocab_size: int,
               offdef_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int,
               hidden_dim : int,
               to_pred_size : int):
        super(QBGPT, self).__init__()

        self.Encoder = Encoder(input_vocab_size = input_vocab_size,
                               positional_vocab_size = positional_vocab_size,
                               position_vocab_size = position_vocab_size,
                               scrimmage_vocab_size = scrimmage_vocab_size,
                               start_vocab_size = start_vocab_size,
                               type_vocab_size = type_vocab_size,
                               offdef_vocab_size = offdef_vocab_size,
                               playtype_vocab_size = playtype_vocab_size,
                               embedding_dim = embedding_dim,
                               hidden_dim = hidden_dim)

        self.Logits = tf.keras.layers.Dense(to_pred_size)

  def call(self, x):

    encoded = self.Encoder(x)
    logits = self.Logits(encoded)

    return logits

class LargeQBGPT(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               positional_vocab_size : int,
               position_vocab_size : int,
               scrimmage_vocab_size : int,
               start_vocab_size: int,
               offdef_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int,
               hidden_dim : int,
               to_pred_size : int):
        super(LargeQBGPT, self).__init__()

        self.Encoder = EncoderL(input_vocab_size = input_vocab_size,
                               positional_vocab_size = positional_vocab_size,
                               position_vocab_size = position_vocab_size,
                               scrimmage_vocab_size = scrimmage_vocab_size,
                               start_vocab_size = start_vocab_size,
                               type_vocab_size = type_vocab_size,
                               offdef_vocab_size = offdef_vocab_size,
                               playtype_vocab_size = playtype_vocab_size,
                               embedding_dim = embedding_dim,
                               hidden_dim = hidden_dim)

        self.Logits = tf.keras.layers.Dense(to_pred_size)

  def call(self, x):

    encoded = self.Encoder(x)
    logits = self.Logits(encoded)

    return logits

class XLargeQBGPT(tf.keras.Model):
  def __init__(self,
               input_vocab_size : int,
               positional_vocab_size : int,
               position_vocab_size : int,
               scrimmage_vocab_size : int,
               start_vocab_size: int,
               offdef_vocab_size : int,
               type_vocab_size : int,
               playtype_vocab_size : int,
               embedding_dim : int,
               hidden_dim : int,
               to_pred_size : int):
        super(XLargeQBGPT, self).__init__()

        self.Encoder = EncoderXL(input_vocab_size = input_vocab_size,
                               positional_vocab_size = positional_vocab_size,
                               position_vocab_size = position_vocab_size,
                               scrimmage_vocab_size = scrimmage_vocab_size,
                               start_vocab_size = start_vocab_size,
                               type_vocab_size = type_vocab_size,
                               offdef_vocab_size = offdef_vocab_size,
                               playtype_vocab_size = playtype_vocab_size,
                               embedding_dim = embedding_dim,
                               hidden_dim = hidden_dim)

        self.Logits = tf.keras.layers.Dense(to_pred_size)

  def call(self, x):

    encoded = self.Encoder(x)
    logits = self.Logits(encoded)

    return logits

In [25]:
class CustomSparseCategoricalCrossentropy(tf.keras.losses.Loss):
    def __init__(self, from_logits=False, class_weights=None):
        super(CustomSparseCategoricalCrossentropy, self).__init__()
        self.from_logits = from_logits
        self.class_weights = class_weights

    def call(self, y_true, y_pred):
        mask = tf.not_equal(y_true, -100)  # Create a mask for valid labels

        if self.from_logits == True:
          valid_preds = tf.nn.softmax(y_pred)
        else:
          valid_preds = y_pred

        valid_labels = tf.boolean_mask(y_true, mask)
        valid_logits = tf.boolean_mask(valid_preds, mask)

        # Apply class weights if provided
        if self.class_weights is not None:
            # Create a tensor of weights using tf.gather
            weights = tf.gather(tf.constant(list(self.class_weights.values()), dtype=tf.float32), tf.cast(valid_labels, tf.int32))
            weighted_loss = tf.keras.losses.sparse_categorical_crossentropy(valid_labels, valid_logits)
            weighted_loss = weighted_loss * weights
            loss = tf.reduce_mean(weighted_loss)
        else:
            loss = tf.keras.losses.sparse_categorical_crossentropy(valid_labels, valid_logits)

        return loss

class CustomSparseCategoricalAccuracy(tf.keras.metrics.Metric):
    def __init__(self, name='custom_sparse_categorical_accuracy', **kwargs):
        super(CustomSparseCategoricalAccuracy, self).__init__(name=name, **kwargs)
        self.total = self.add_weight(name='total', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        mask = tf.not_equal(y_true, -100)  # Create a mask for valid labels
        valid_labels = tf.boolean_mask(y_true, mask)

        preds = tf.nn.softmax(y_pred)
        preds = tf.argmax(preds, axis = -1)
        valid_preds = tf.boolean_mask(preds, mask)

        correct = tf.equal(valid_labels, valid_preds)

        accuracy = tf.reduce_mean(tf.cast(correct, dtype=tf.float32))

        self.total.assign_add(accuracy)
        self.count.assign_add(1.0)

    def result(self):
        return self.total / self.count if self.count > 0 else 0.0

    def reset_state(self):
        self.total.assign(0.0)
        self.count.assign(0.0)

class CustomTopKAccuracy(tf.keras.metrics.Metric):
    def __init__(self, k=3, name='custom_top_k_accuracy', **kwargs):
        super(CustomTopKAccuracy, self).__init__(name=name, **kwargs)
        self.k = k
        self.total = self.add_weight(name='total', initializer='zeros')
        self.count = self.add_weight(name='count', initializer='zeros')

    def update_state(self, y_true, y_pred, sample_weight=None):
        mask = tf.not_equal(y_true, -100)  # Create a mask for valid labels
        valid_labels = tf.boolean_mask(y_true, mask)

        # Get top-k predicted classes
        preds = tf.nn.softmax(y_pred)
        top_k_values, top_k_indices = tf.nn.top_k(preds, k=self.k)
        valid_preds = tf.boolean_mask(top_k_indices, mask)

        # Broadcast valid_labels to match the shape of valid_preds
        valid_labels_broadcasted = tf.tile(tf.expand_dims(valid_labels, axis=-1), [1, self.k])

        valid_labels_broadcasted = tf.cast(valid_labels_broadcasted, dtype=tf.int32)
        valid_preds = tf.cast(valid_preds, dtype=tf.int32)

        correct = tf.reduce_sum(tf.cast(tf.equal(valid_labels_broadcasted, valid_preds), dtype=tf.float32))

        accuracy = correct / tf.cast(tf.shape(valid_labels_broadcasted)[0], dtype=tf.float32)

        self.total.assign_add(accuracy)
        self.count.assign_add(1.0)

    def result(self):
        return self.total / self.count if self.count > 0 else 0.0

    def reset_state(self):
        self.total.assign(0.0)
        self.count.assign(0.0)

In [26]:
class_weights = pd.read_parquet("class_weights.parquet")

In [27]:
step_range = [(0, 10), (10, 100), (100, 1000), (1000, 10000), (10000, 50000), (50000, 100000), (100000, 300000), (300000, 500000), (500000, 1000000), (1000000, 10000000)]

In [28]:
def insert_weights(df, w):
  df["weights"] = [w for i in range(df.shape[0])]
  return df

In [29]:
from collections import Counter
weights = dict(Counter(class_weights["Zone_ID"].to_numpy()))
weights_df = pd.DataFrame(np.array([[k, v] for k,v in weights.items()]), columns = ["Class", "Count"])

weights_dict = {i : weights_df[(weights_df['Count'] > step_range[i][0]) & (weights_df['Count'] <= step_range[i][1])].reset_index(drop = True) for i in range(len(step_range))}
w_dict = {0 : 1,
          1 : 0.9,
          2 : 0.8,
          3 : 0.7,
          4 : 0.6,
          5 : 0.5,
          6 : 0.4,
          7 : 0.3,
          8 : 0.2,
          9 : 0.05,}

weights_dict = {k:insert_weights(v, w_dict[k]) for k,v in weights_dict.items()}

weights_df = pd.concat(list(weights_dict.values())).reset_index(drop = True)

In [30]:
weights_inv = {v[0] : v[2] for v in weights_df.values}

In [31]:
def scheduler(epoch, lr):
  if epoch < 1:
    return 3e-3
  elif (epoch >= 1) & (epoch < 2):
    return 2e-3
  elif (epoch >= 2) & (epoch < 3):
    return 1e-3
  elif (epoch >= 3) & (epoch < 5):
    return 5e-4
  elif (epoch >= 5) & (epoch < 7):
    return 1e-4
  else:
    return 5e-5


schedule = tf.keras.callbacks.LearningRateScheduler(scheduler)

In [32]:
moves_to_pred = 11164
input_size = 11166
starts_size = 1985
scrimmage_size = 100
positions_id = 29
temp_ids = 52

model_large = QBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = 2,
                    type_vocab_size = 2,
                    playtype_vocab_size = 9,
                    embedding_dim = 512,
                    hidden_dim = 512,
                    to_pred_size = moves_to_pred)

model_medium = QBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = 2,
                    type_vocab_size = 2,
                    playtype_vocab_size = 9,
                    embedding_dim = 256,
                    hidden_dim = 256,
                    to_pred_size = moves_to_pred)

model_small = QBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = 2,
                    type_vocab_size = 2,
                    playtype_vocab_size = 9,
                    embedding_dim = 128,
                    hidden_dim = 128,
                    to_pred_size = moves_to_pred)

large_model = LargeQBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = 2,
                    type_vocab_size = 2,
                    playtype_vocab_size = 9,
                    embedding_dim = 256,
                    hidden_dim = 256,
                    to_pred_size = moves_to_pred)

xlarge_model = XLargeQBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = 2,
                    type_vocab_size = 2,
                    playtype_vocab_size = 9,
                    embedding_dim = 256,
                    hidden_dim = 256,
                    to_pred_size = moves_to_pred)

In [None]:
custom_loss = CustomSparseCategoricalCrossentropy(from_logits=True, class_weights=weights_inv)

model_small.compile(optimizer=tf.keras.optimizers.Adam(),
                    loss=custom_loss,
                    metrics=[CustomSparseCategoricalAccuracy(),
                             CustomTopKAccuracy(k=3, name='custom_top_3_accuracy'),
                             CustomTopKAccuracy(k=5, name='custom_top_5_accuracy')])

history_small = model_small.fit(training_data, validation_data = testing_data, epochs=9, callbacks = [schedule])

pd.DataFrame(history_small.history).to_csv("training_history_model_small.csv", index = False, sep = ";")

model_small.save_weights("models/model_small/QBGPT")

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


In [None]:
model_small.summary()

Model: "qbgpt_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_2 (Encoder)         multiple                  1939584   
                                                                 
 dense_8 (Dense)             multiple                  1440156   
                                                                 
Total params: 3379740 (12.89 MB)
Trainable params: 3379484 (12.89 MB)
Non-trainable params: 256 (1.00 KB)
_________________________________________________________________


In [None]:
custom_loss = CustomSparseCategoricalCrossentropy(from_logits=True, class_weights=weights_inv)

model_medium.compile(optimizer=tf.keras.optimizers.Adam(),
                     loss=custom_loss,
                     metrics=[CustomSparseCategoricalAccuracy(),
                              CustomTopKAccuracy(k=3, name='custom_top_3_accuracy'),
                              CustomTopKAccuracy(k=5, name='custom_top_5_accuracy')])

history_medium = model_medium.fit(training_data, validation_data = testing_data, epochs=9, callbacks = [schedule])

pd.DataFrame(history_medium.history).to_csv("training_history_model_medium.csv", index = False, sep = ";")

model_medium.save_weights("models/model_medium/QBGPT")

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


In [None]:
model_medium.summary()

Model: "qbgpt_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_4 (Encoder)         multiple                  4337920   
                                                                 
 dense_20 (Dense)            multiple                  2869148   
                                                                 
Total params: 7207068 (27.49 MB)
Trainable params: 7206556 (27.49 MB)
Non-trainable params: 512 (2.00 KB)
_________________________________________________________________


In [20]:
custom_loss = CustomSparseCategoricalCrossentropy(from_logits=True, class_weights=weights_inv)

model_large.compile(optimizer=tf.keras.optimizers.Adam(),
                    loss=custom_loss,
                    metrics=[CustomSparseCategoricalAccuracy(),
                             CustomTopKAccuracy(k=3, name='custom_top_3_accuracy'),
                             CustomTopKAccuracy(k=5, name='custom_top_5_accuracy')])

history_large = model_large.fit(training_data, validation_data = testing_data, epochs=9, callbacks = [schedule])

pd.DataFrame(history_large.history).to_csv("training_history_model_large.csv", index = False, sep = ";")

model_large.save_weights("models/model_large/QBGPT")

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


In [21]:
model_large.summary()

Model: "qbgpt"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder (Encoder)           multiple                  10510848  
                                                                 
 dense_2 (Dense)             multiple                  5727132   
                                                                 
Total params: 16237980 (61.94 MB)
Trainable params: 16236956 (61.94 MB)
Non-trainable params: 1024 (4.00 KB)
_________________________________________________________________


In [33]:
custom_loss = CustomSparseCategoricalCrossentropy(from_logits=True, class_weights=weights_inv)

large_model.compile(optimizer=tf.keras.optimizers.Adam(),
                    loss=custom_loss,
                    metrics=[CustomSparseCategoricalAccuracy(),
                             CustomTopKAccuracy(k=3, name='custom_top_3_accuracy'),
                             CustomTopKAccuracy(k=5, name='custom_top_5_accuracy')])

history_large = large_model.fit(training_data, validation_data = testing_data, epochs=9, callbacks = [schedule])

pd.DataFrame(history_large.history).to_csv("training_history_large_model.csv", index = False, sep = ";")

large_model.save_weights("models/large_model/QBGPT")

Epoch 1/9
Epoch 2/9
Epoch 3/9
Epoch 4/9
Epoch 5/9
Epoch 6/9
Epoch 7/9
Epoch 8/9
Epoch 9/9


In [34]:
large_model.summary()

Model: "large_qbgpt_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 encoder_l_1 (EncoderL)      multiple                  5127936   
                                                                 
 dense_26 (Dense)            multiple                  2869148   
                                                                 
Total params: 7997084 (30.51 MB)
Trainable params: 7996060 (30.50 MB)
Non-trainable params: 1024 (4.00 KB)
_________________________________________________________________
