In [1]:
import os
import pandas as pd
import numpy as np
import tensorflow as tf

env = "local"

In [2]:
if env != "local":
  !git clone https://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@github.com/samchaineau/QB-GPT.git
  import sys
  sys.path.append("/content/QB-GPT/")

In [3]:
if env == "local":
    os.chdir("/Users/samuel/Documents/GitHub/QB-GPT/")
else:
    from google.colab import drive
    drive.mount('/content/gdrive')
    os.chdir("/content/gdrive/MyDrive/NFL_Challenge/QB-GPT/")

In [4]:
testing_data = tf.data.Dataset.load("data_models/QBGPT/test_tokens_NFL_GPT")
test_length = [i for i,_ in enumerate(testing_data)][-1] + 1

batch_size = 32

testing_data = testing_data.shuffle(test_length).batch(batch_size)

In [5]:
from models.modeling.QBGPT.models import QBGPT, LargeQBGPT, XLargeQBGPT
from models.modeling.QBGPT.losses_and_metrics import CustomSparseCategoricalAccuracy, CustomTopKAccuracy, CustomSparseCategoricalCrossentropy

moves_to_pred = 10876
input_size = 10878
starts_size = 1033
scrimmage_size = 100
positions_id = 29
temp_ids = 52

off_def_size = 2
token_type_size = 3
play_type_size = 9

model_large = LargeQBGPT(input_vocab_size = input_size,
                         positional_vocab_size = temp_ids,
                         position_vocab_size=positions_id,
                         start_vocab_size=starts_size,
                         scrimmage_vocab_size=scrimmage_size,
                         offdef_vocab_size = off_def_size,
                         type_vocab_size = token_type_size,
                         playtype_vocab_size = play_type_size,
                         embedding_dim = 256,
                         hidden_dim = 256,
                         to_pred_size = moves_to_pred)

model_medium = QBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = off_def_size,
                    type_vocab_size = token_type_size,
                    playtype_vocab_size = play_type_size,
                    embedding_dim = 256,
                    hidden_dim = 256,
                    to_pred_size = moves_to_pred)

model_small = QBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = off_def_size,
                    type_vocab_size = token_type_size,
                    playtype_vocab_size = play_type_size,
                    embedding_dim = 128,
                    hidden_dim = 128,
                    to_pred_size = moves_to_pred)

model_tiny = QBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = off_def_size,
                    type_vocab_size = token_type_size,
                    playtype_vocab_size = play_type_size,
                    embedding_dim = 64,
                    hidden_dim = 64,
                    to_pred_size = moves_to_pred)

In [6]:
model_tiny.load_weights("models/modeling/QBGPT/weights/model_tiny/QBGPT")
model_small.load_weights("models/modeling/QBGPT/weights/model_small/QBGPT")
model_medium.load_weights("models/modeling/QBGPT/weights/model_medium/QBGPT")
model_large.load_weights("models/modeling/QBGPT/weights/model_large/QBGPT")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x294630fd0>

## Evaluation protocol

In [7]:
tiny_prediction = []
small_prediction = []
medium_prediction = []
large_prediction = []

trues = []
time = []
playtype = []
positions = []

In [8]:
import gc
from tqdm import tqdm

for batch in tqdm(testing_data, desc="Processing", total=len(testing_data), ncols=100):

  predictions = tf.argmax(model_tiny(batch[0]), axis = -1)
  _ = gc.collect()
  tiny_prediction.append(predictions)

  trues.append(batch[1])
  time.append(batch[0]["pos_ids"])
  playtype.append(batch[0]["PlayType"])
  positions.append(batch[0]["position_ids"])

Processing: 100%|███████████████████████████████████████████████| 1609/1609 [04:30<00:00,  5.95it/s]


In [94]:
import polars as pl

class tokenizer:
    def __init__(self, 
                 moves_index : str,
                 play_index : str,
                 positions_index  : str,
                 scrimmage_index : str,
                 starts_index : str,
                 time_index : str):
        
        moves_index = pl.read_parquet(moves_index)
        self.moves_index = self.convert_index_to_dict(moves_index)
        
        play_index = pl.read_parquet(play_index)
        self.play_index= self.convert_index_to_dict(play_index)
        
        positions_index = pl.read_parquet(positions_index)
        self.positions_index = self.convert_index_to_dict(positions_index)
        
        scrimmage_index = pl.read_parquet(scrimmage_index)
        self.scrimmage_index = self.convert_index_to_dict(scrimmage_index)
        
        starts_index = pl.read_parquet(starts_index)
        self.starts_index = self.convert_index_to_dict(starts_index)
        
        time_index = pl.read_parquet(time_index)
        self.time_index = self.convert_index_to_dict(time_index)
        
        self.index = {"moves" : self.moves_index,
                      "plays" : self.play_index,
                      "positions" : self.positions_index,
                      "scrimmage" : self.scrimmage_index,
                      "starts" : self.starts_index,
                      "time" : self.time_index}
    
    def convert_index_to_dict(self, df : pl.DataFrame):
    
        ID_col = [v for v in df.columns if "ID" in v]
        assert len(ID_col) == 1
        new_id_name = ["ID"]

        val_cols = [v for v in df.columns if v not in ID_col+["Cat"]]
        new_val_name = ["Val_"+str(i) for i in range(1, len(val_cols)+1)]

        past_names = ID_col + val_cols
        new_names = new_id_name+new_val_name

        renaming = {past_names[i]: new_names[i] for i in range(len(new_names))}

        d = (df.
                drop("Cat").
                rename(renaming).
                select(new_names).
                to_dict(as_series=False))

        final_d = {d["ID"][i] : [d[k][i] for k in new_val_name] for i in range(len(d["ID"]))}

        return final_d
    
    def base_decode(self,
                    inputs : list,
                    index : dict):
        return [index[v] if v in index.keys() else "[PAD]" for v in inputs]
    
    def decode(self,
               inputs : list,
               type : str):
        return self.base_decode(inputs, index = self.index[type])
    
    def find_id_by_values(input_dict : dict, 
                          target_list : list):
        
        for key, values in input_dict.items():
            if set(target_list) == set(values):
                return key
        
    def base_encode(self,
                inputs : list,
                index : dict):
        return [self.find_id_by_values(index, v) for v in inputs]
    
    def encode(self,
               inputs : list,
               type : str):
        return self.base_encode(inputs, index = self.index[type])

QBGPT_tokenizer = tokenizer(moves_index= "index/moves_index.parquet",
                            play_index= "index/plays_index.parquet",
                            positions_index= "index/positions_index.parquet",
                            scrimmage_index= "index/scrimmage_index.parquet",
                            starts_index= "index/starts_index.parquet",
                            time_index= "index/time_index.parquet")

def rmse(a : np.array, b : np.array):
    return np.sqrt(np.sum((a - b)**2, axis = 1))
    
def model_rmse(df : pl.DataFrame, tokenizer):
    labels_list = np.array(tokenizer.decode(df.select("label").to_series().to_list(), type = "moves"))
    predicted_list = np.array(tokenizer.decode(df.select("pred").to_series().to_list(), type = "moves"))
    return rmse(labels_list, predicted_list)

In [99]:
tiny_eval_df = (pl.DataFrame({"label" : np.vstack(trues).flatten(),
                             "pred" : np.vstack(tiny_prediction).flatten(),
                             "time" : np.vstack(time).flatten(),
                             "playtype" : np.vstack(playtype).flatten(),
                             "positions" : np.vstack(positions).flatten()}).
                filter(pl.col("label") != -100).
                with_columns((pl.col("label") == pl.col("pred")).cast(pl.Float32).alias("Correct")))

tiny_rmse = model_rmse(tiny_eval_df, tokenizer=QBGPT_tokenizer)

tiny_eval_df = (tiny_eval_df.
                with_columns(pl.Series(tiny_rmse).alias("RMSE")))

#small_eval_df = (pl.DataFrame({"label" : np.reshape(np.vstack(trues), (26349056)),
#                             "pred" : np.reshape(np.vstack(small_prediction), (26349056)),
#                             "time" : np.reshape(np.vstack(time), (26349056)),
#                             "playtype" : np.reshape(np.vstack(playtype), (26349056)),
#                             "positions" : np.reshape(np.vstack(positions), (26349056)),}).
#                filter(pl.col("label") != -100).
#                with_columns((pl.col("label") == pl.col("pred")).cast(pl.Float32).alias("Correct")))

#medium_eval_df = (pl.DataFrame({"label" : np.reshape(np.vstack(trues), (26349056)),
#                             "pred" : np.reshape(np.vstack(medium_prediction), (26349056)),
#                             "time" : np.reshape(np.vstack(time), (26349056)),
#                             "playtype" : np.reshape(np.vstack(playtype), (26349056)),
#                             "positions" : np.reshape(np.vstack(positions), (26349056)),}).
#                filter(pl.col("label") != -100).
#                with_columns((pl.col("label") == pl.col("pred")).cast(pl.Float32).alias("Correct")))

#large_eval_df = (pl.DataFrame({"label" : np.reshape(np.vstack(trues), (26349056)),
#                             "pred" : np.reshape(np.vstack(large_prediction), (26349056)),
#                             "time" : np.reshape(np.vstack(time), (26349056)),
#                             "playtype" : np.reshape(np.vstack(playtype), (26349056)),
#                             "positions" : np.reshape(np.vstack(positions), (26349056)),}).
#                filter(pl.col("label") != -100).
#                with_columns((pl.col("label") == pl.col("pred")).cast(pl.Float32).alias("Correct")))

In [100]:
eval_df_dict = {"tiny_model" : tiny_eval_df,
                "small_model" : tiny_eval_df}

In [109]:
pl.read_parquet("index/positions_index.parquet")

position,position_ID,Cat
str,i64,str
"""C""",0,"""Pos"""
"""CB""",1,"""Pos"""
"""DB""",2,"""Pos"""
"""DE""",3,"""Pos"""
"""DL""",4,"""Pos"""
"""DT""",5,"""Pos"""
"""FB""",6,"""Pos"""
"""FS""",7,"""Pos"""
"""G""",8,"""Pos"""
"""HB""",9,"""Pos"""


In [108]:
(tiny_eval_df.
 filter(pl.col("positions") == 28))

label,pred,time,playtype,positions,Correct,RMSE
i64,i64,i64,i64,i64,f32,f64
4748,4747,11,2,28,0.0,1.0
4665,4664,12,2,28,0.0,1.0
4584,4583,13,2,28,0.0,1.0
4498,4498,14,2,28,1.0,0.0
4412,4412,15,2,28,1.0,0.0
4331,4331,16,2,28,1.0,0.0
4249,4248,17,2,28,0.0,1.0
4166,4166,18,2,28,1.0,0.0
4084,4083,19,2,28,0.0,1.0
4002,4002,20,2,28,1.0,0.0


In [103]:
def average_per_cat(metric : str, cat : str, model_name : str, eval_df : pl.DataFrame):
    return (eval_df.
            select(cat, metric).
            group_by(cat).
            mean().
            sort(cat).
            with_columns(pl.lit(model_name).alias("Model")))
    
def model_comparison_per_cat(metric : str, cat : str, eval_dict : dict):
    return (pl.concat([average_per_cat(metric, cat, k, v) for k,v in eval_dict.items()]).
            pivot(values= metric, columns="Model", index=cat))

In [104]:
cat_to_evaluate = ["playtype", "time", "positions"]

ac_model_comparisons = {m : model_comparison_per_cat("Correct", m, eval_dict=eval_df_dict) for m in cat_to_evaluate}
rmse_model_comparisons = {m : model_comparison_per_cat("RMSE", m, eval_dict=eval_df_dict) for m in cat_to_evaluate}

In [106]:
rmse_model_comparisons["positions"]

positions,tiny_model,small_model
i64,f64,f64
0,0.23248,0.23248
1,0.56131,0.56131
2,0.573626,0.573626
3,0.39805,0.39805
4,0.291743,0.291743
5,0.292566,0.292566
6,0.534664,0.534664
7,0.546626,0.546626
8,0.235299,0.235299
9,0.541283,0.541283
