In [1]:
import os
import pandas as pd
import polars as pl
import numpy as np
import tensorflow as tf

env = "local"

In [2]:
if env == "local":
    os.chdir("/Users/samuel/Documents/GitHub/QB-GPT/")
else:
    from google.colab import drive
    drive.mount('/content/gdrive')
    os.chdir("/content/gdrive/MyDrive/NFL_Challenge/NFL-GPT/NFL data")

In [3]:
os.listdir()

['data_models',
 'test.gif',
 '.DS_Store',
 'app',
 'LICENSE',
 'test_1.gif',
 'models',
 'README.md',
 'test_1_true.gif',
 '.gitignore',
 '.gitattributes',
 'indexv2',
 'data_preprocessing',
 'index',
 '.git',
 'generation.gif',
 'notebooks']

In [4]:
testing_data = tf.data.Dataset.load("data_models/QBGPT/test_tokens_NFL_GPT_v2_eval")

In [5]:
from app.tools import tokenizer, generator

In [6]:
qb_tok = tokenizer(moves_index="indexv2/moves_index.parquet",
                   play_index="indexv2/plays_index.parquet",
                   positions_index="indexv2/positions_index.parquet",
                   scrimmage_index="indexv2/scrimmage_index.parquet",
                   starts_index="indexv2/starts_index.parquet",
                   time_index="indexv2/time_index.parquet",
                   window_size=20)

In [7]:
from models.modeling.QBGPT.models import QBGPT, LargeQBGPT

moves_to_pred = 11170
input_size = 11172
starts_size = 1954
scrimmage_size = 100
positions_id = 29

temp_ids = 52
off_def_size = 2
token_type_size = 3
play_type_size = 9

model_medium = QBGPT(input_vocab_size = input_size,
                    positional_vocab_size = temp_ids,
                    position_vocab_size=positions_id,
                    start_vocab_size=starts_size,
                    scrimmage_vocab_size=scrimmage_size,
                    offdef_vocab_size = off_def_size,
                    type_vocab_size = token_type_size,
                    playtype_vocab_size = play_type_size,
                    embedding_dim = 256,
                    hidden_dim = 256,
                    num_heads = 3,
                    diag_masks = False,
                    to_pred_size = moves_to_pred)

model_medium.load_weights("models/modeling/QBGPT/weights/model_mediumv2/QBGPT")

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x2af0aed10>

In [8]:
gen_test = generator(model=model_medium,
                     tokenizer=qb_tok,
                     temp = 2.5,
                     n_select = 10)

In [9]:
reference = [e[0] for e in list(testing_data)]

In [10]:
def get_unique(arr):
    return np.unique(arr)[0]

In [11]:
df_reference = pd.DataFrame({"gameId" : get_unique(reference[i]["gameId"]),
                             "playId" : get_unique(reference[i]["playId"]),
                             "Traj" : get_unique(reference[i]["Traj"]),
                             "PlayType" : get_unique(reference[i]["PlayType"]),
                             "index" : i,
                             "min_pos" : reference[i]["pos_ids"].numpy().min()} for i in range(len(reference)))

df_reference = df_reference[df_reference["PlayType"] != 6].reset_index(drop=True)
df_reference = df_reference[df_reference["Traj"] == 0].reset_index(drop=True)
df_reference = df_reference[df_reference["min_pos"] == 0].reset_index(drop=True)

In [12]:
n = 5  # Number of elements to sample from each category

# Define a function to sample 'n' elements from each group
def sample_n(group):
    return group.sample(60)

# Group by 'categ' and apply the sampling function to each group
sampled_df = df_reference.groupby('PlayType', group_keys=False).apply(sample_n)

# Reset the index of the sampled DataFrame
sampled_df.reset_index(drop=True, inplace=True)
sampled_df = sampled_df[sampled_df["PlayType"].isin([7, 4, 2, 1])].reset_index(drop=True)
sampled_df.drop("min_pos", axis = "columns", inplace=True)

In [13]:
def filter_var(d):
    return {k:v.numpy().tolist() for k,v in d.items() if k not in ["gameId", "playId", "nflId", "Traj"]}

new_ref_tf = {i : filter_var(reference[sampled_df["index"][i]]) for i in range(sampled_df.shape[0])}

sampled_df["index"] = list(range(sampled_df.shape[0]))

In [14]:
import nfl_data_py as nfl
years_to_get = [2017, 2018, 2019, 2020, 2021, 2022]
season_data = pl.from_pandas(nfl.import_pbp_data(years_to_get))

2017 done.
2018 done.
2019 done.
2020 done.
2021 done.
2022 done.
Downcasting floats.


In [15]:
joining = (season_data.
           select("old_game_id", "play_id", "home_team", "away_team").
           unique().
           rename({"old_game_id" : "gameId", "play_id" : "playId"}).
           with_columns(pl.col("gameId").cast(pl.Int32)).
           with_columns(pl.col("playId").cast(pl.Int32)).
           to_pandas())

In [16]:
sampled_df = pd.merge(sampled_df, joining, on = ["gameId", "playId"], how = "left")

In [17]:
sampled_df.to_json("app/ref_df.json")

In [18]:
import json 

with open('data_models/QBGPT/ref.json', 'w') as fp:
    json.dump(new_ref_tf, fp)

In [19]:
with open('data_models/QBGPT/ref.json', 'r') as fp:
    data = json.load(fp)

In [20]:
def convert_numpy(d):
    return {k:np.array(v) for k,v in d.items()}

In [67]:
data = {int(k):convert_numpy(v) for k,v in data.items()}

In [69]:
essai_l = data[87]

trial_d = qb_tok.truncate_to_time_t(essai_l, 1)
generated = gen_test.generate_sequence(trial_d, 25)
decoded = gen_test.tokenizer.decode_sequence(generated)

step1 = gen_test.prepare_for_plot(decoded)
plot = pd.DataFrame(step1)

decoded_true = gen_test.tokenizer.decode_sequence(essai_l)
step1_true = gen_test.prepare_for_plot(decoded_true)
plot_true = pd.DataFrame(step1_true)

In [70]:
import plotly.express as px
import plotly.io as pio
import matplotlib.pyplot as plt
from PIL import Image
import plotly.graph_objects as go

In [71]:
def get_plot(df, n_frames, name):
    fig = go.Figure(
    layout=go.Layout(
        updatemenus=[dict(type="buttons", direction="right", x=0.9, y=1.16), ],
        xaxis=dict(range=[0, 120],
                   autorange=False, tickwidth=2,
                   title_text="X"),
        yaxis=dict(range=[0, 60],
                   autorange=False,
                   title_text="Y")
    ))

    # Add traces
    i = 1
    frames = {i: [] for i in df["pos_ids"].unique() if i !=0}

    for id in df["ids"].unique():
        spec = df[df["ids"] == id].reset_index(drop = True)
        fig.add_trace(
            go.Scatter(x=spec.input_ids_x[:i],
                    y=spec.input_ids_y[:i],
                    name= spec.position_ids.unique()[0],
                    text= spec.position_ids.unique()[0],
                    visible=True,
                    line=dict(color="#f47738", dash="solid")))
        
        for k in range(i, spec.shape[0]):
            current_frame = spec["pos_ids"][k]
            frames[current_frame].append(go.Scatter(x=spec.input_ids_x[:k], y=spec.input_ids_y[:k]))
            
    frames = list(frames.values())
    frames = [go.Frame(data = v) for v in frames]
        

    # Animation
    fig.update(frames=frames)

    fig.update_xaxes(ticks="outside", tickwidth=2, tickcolor='white', ticklen=10)
    fig.update_yaxes(ticks="outside", tickwidth=2, tickcolor='white', ticklen=1)
    fig.update_layout(yaxis_tickformat=',')
    fig.update_layout(legend=dict(x=0, y=1.1), legend_orientation="h")

    # Buttons
    fig.update_layout(title=f"{name} play",
                    xaxis_title="X",
                    yaxis_title="Y",
                    legend_title="Legend Title",
                    showlegend=False,
                    font=dict(
                        family="Arial",
                        size=14
                    ),
                    hovermode="x",
                    updatemenus=[
                            dict(
                                buttons=list(
                                    [
                                    dict(label="Play",
                                        method="animate",
                                        args=[None, {"frame": {"duration": n_frames}}])
                                    ]
                                             ),
                                type = "buttons",
                                direction="right",
                                pad={"r": 50, "t": 50},
                                showactive=False,
                                x=0.5,
                                yanchor="top")
                            ])
    
    fig.update_layout(template='plotly_dark'
                 )

    fig.update_layout(width=1200, height=600)
    return fig

In [76]:
fig_gen = get_plot(plot, 26, "Generated")

In [43]:
get_plot(plot_true, 26, "True")

In [83]:
frames = list(frames.values())

In [84]:
frames = [go.Frame(data = v) for v in frames]
        

# Animation
fig_gen.update(frames=frames)

fig.update_xaxes(ticks="outside", tickwidth=2, tickcolor='white', ticklen=10)
fig.update_yaxes(ticks="outside", tickwidth=2, tickcolor='white', ticklen=1)
fig.update_layout(yaxis_tickformat=',')
fig.update_layout(legend=dict(x=0, y=1.1), legend_orientation="h")

# Buttons
fig.update_layout(title="Generated play",
                  xaxis_title="X",
                  yaxis_title="Y",
                  legend_title="Legend Title",
                  showlegend=False,
                  font=dict(
                      family="Arial",
                      size=14
                  ),
                  hovermode="x",
                  updatemenus=[
                        dict(
                            buttons=list([
                                dict(label="Play",
                                     method="animate",
                                     args=[None, {"frame": {"duration": 50}}]),
                            ]))
                        ]
                    )

fig.update_layout(width=1200, height=600) 

In [77]:
fig_gen.show()

In [22]:
fig = px.line(plot, x="input_ids_x", y="input_ids_y", animation_frame="pos_ids", color="OffDef", symbol="ids",
                 text="position_ids", title="Player Trajectories Over Time", line_shape="linear",
                 range_x=[0, 140], range_y=[0, 60], # Set X and Y axis ranges
                 render_mode="svg")  # Render mode for smoother lines

# Customize the appearance of the plot
fig.update_traces(marker=dict(size=10), selector=dict(mode='lines'))

fig.update_layout(width=800, height=600) 

In [80]:
import io
import PIL


frames = []
for s, fr in enumerate(fig_gen.frames):
    # set main traces to appropriate traces within plotly frame
    fig_gen.update(data=fr.data)
    # move slider to correct place
    #fig_gen.layout.sliders[0].update(active=s)
    # generate image of current state
    frames.append(PIL.Image.open(io.BytesIO(fig_gen.to_image(format="png"))))
    
# create animated GIF
frames[0].save(
        "generation.gif",
        save_all=True,
        append_images=frames[1:],
        optimize=True,
        duration=500,
        loop=0,
    )

In [354]:
fig = px.line(plot_true, x="input_ids_x", y="input_ids_y", animation_frame="pos_ids", color="OffDef", symbol="ids",
                 text="position_ids", title="Player Trajectories Over Time",
                 range_x=[0, 140], range_y=[0, 60], # Set X and Y axis ranges
                 line_shape="linear",  # Draw lines connecting points
                 render_mode="svg")  # Render mode for smoother lines

# Customize the appearance of the plot
fig.update_traces(marker=dict(size=10), selector=dict(mode='lines'))
fig.update_layout(width=800, height=600) 

In [356]:
frames = []
for s, fr in enumerate(fig.frames):
    # set main traces to appropriate traces within plotly frame
    fig.update(data=fr.data)
    # move slider to correct place
    fig.layout.sliders[0].update(active=s)
    # generate image of current state
    frames.append(PIL.Image.open(io.BytesIO(fig.to_image(format="png"))))
    
# create animated GIF
frames[0].save(
        "test_1_true.gif",
        save_all=True,
        append_images=frames[1:],
        optimize=True,
        duration=500,
        loop=0,)

In [38]:
import plotly.express as px

fig = px.line(plot, x="input_ids_x", y="input_ids_y", animation_frame="pos_ids", color="OffDef", symbol="ids",
                 text="position_ids", title="Player Trajectories Over Time",
                 range_x=[0, 140], range_y=[0, 60], # Set X and Y axis ranges
                 line_shape="linear",  # Draw lines connecting points
                 render_mode="svg")  # Render mode for smoother lines

# Customize the appearance of the plot
fig.update_traces(marker=dict(size=10), selector=dict(mode='lines'))
fig.update_layout(width=800, height=600) 

In [308]:
fig = px.scatter(essai, x="input_ids_x", y="input_ids_y", animation_frame="pos_ids", color="OffDef", symbol="position_ids",
                 text="position_ids", title="Player Trajectories Over Time")

# Customize the appearance of the plot
fig.update_traces(marker=dict(size=10), selector=dict(mode='markers+lines'))
fig.update_layout(width=800, height=600) 
fig.update_xaxes(range=[0, 120])
fig.update_yaxes(range=[0, 60])

fig.show()

In [303]:
import plotly.express as px

# Sample data in a Pandas DataFrame (replace this with your actual data)
data = pd.DataFrame({
    "ID": [1, 2, 3, 4, 5, 6, 7, 8],
    "OffDef": ["Off", "Off", "Off", "Off", "Def", "Def", "Def", "Def"],
    "X": [1, 3, 5, 7, 2, 4, 6, 8],
    "Y": [2, 4, 6, 8, 1, 3, 5, 7],
    "positions": ["A", "B", "C", "D", "X", "Y", "Z", "W"],
    "time": [0, 1, 2, 3, 0, 1, 2, 3]  # Replace this with your time data
})

# Create an animated Plotly scatter plot
fig = px.scatter(data, x="X", y="Y", animation_frame="time", color="OffDef", symbol="OffDef",
                 text="positions", title="Player Trajectories Over Time")

# Customize the appearance of the plot
fig.update_traces(marker=dict(size=10), selector=dict(mode='markers+lines'))

fig.show()