In [1]:
import pandas as pd
import numpy as np
import math
import json
import time
import pickle
import sys

import torch
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F



In [2]:
sys.path.append('../')

In [3]:
torch.manual_seed(1)

<torch._C.Generator at 0x1315685b0>

In [4]:

from dataset import SeqDataset

In [5]:
DATA_DIR = "seq_filtered_sorted_data"

if torch.backends.mps.is_available():
    DEVICE = torch.device("mps")
else:
    DEVICE = torch.device('cpu')
print(f"Using device = {DEVICE}")

TRAIN_SPLIT = 0.6

Using device = mps


In [6]:
with open(f'../cleaned_data/{DATA_DIR}/game_play_id.json') as f:
        list_IDS = json.load(f)
# break ids into train-val-test sets
val_percent = int(len(list_IDS)* (TRAIN_SPLIT+((1-TRAIN_SPLIT)/2)) )
train_IDS, val_IDS, test_IDS = np.split(list_IDS, [ int(len(list_IDS)*TRAIN_SPLIT), val_percent ])

# params = {'batch_size': 256,
#         'shuffle': True,
#         'num_workers': 1}

# # Generators
# training_set = SeqDataset(train_IDS, data_dir=DATA_DIR)
# training_generator = torch.utils.data.DataLoader(training_set, **params)

val_params = {'batch_size': 2,
        'shuffle': True,
        'num_workers': 1}
validation_set = SeqDataset(val_IDS, data_dir=DATA_DIR)
validation_generator = torch.utils.data.DataLoader(validation_set, **val_params)

In [7]:
# seq : shape (seq_length, 24, 9)
def get_closest_defender(seq):
    ball_and_opps = seq[:,0:12, 3:5]
    print(f"ball_and_opps shape = {ball_and_opps.shape}")
    dists = torch.cdist(ball_and_opps[:,0:1,:], ball_and_opps, p=2.0)
    print(f"did th edist")
    dists[:,0,0] = torch.inf 
    print(f'did the inf')
    closest_defender = dists.argmin(dim=2).squeeze(-1)
    return closest_defender

In [8]:
def process_batch(device, batch, labels, lengths, player_ids):

    local_batch = batch.to(device)
    index_labels = torch.argmax(labels, -1).to(device) # (batch_size)
    local_lengths = lengths.to(device)    # [batch_size, 23, embed_dim]


    batch_seq_loss = 0  # avg avg seq loss (e.g, on expected sequence, average loss)
    val_metrics_dict = {"quarter_output_pred":0,
                   "halfway_output_pred":0,
                   "three_quarter_output_pred":0,
                   "final_output_pred":0,
                   "correct_tackler_identified_w_highest_prob_anytime":0,
                   "correct_tackler_had_highest_average_prob":0,
                   "correct_tackler_average_prob":0}
    
    # for each full sequence
    for i in range(0, local_batch.shape[0]):

        single_target = index_labels[i, local_lengths[i]-1].to(device)                                 # ([]), the correct class
        batch = local_batch[i, :local_lengths[i]-1, :].reshape(-1,24,9).to(device)    # (seq_length, 24, 9)
        print(f"batch shape = {batch.shape}")
        closest_def = get_closest_defender(batch).to(device)

        print(f"closest_def shape = {closest_def.shape}")
    
        correct_classify = (closest_def[:local_lengths[0]] == single_target).float()

        print(f"correct_classify shape = {correct_classify.shape}")

        batch_seq_loss += F.nll_loss(torch.log(closest_def), index_labels[i, :local_lengths[i]-1], reduction='mean')

        # shape ([]), 1 or 0 if classified correctly at that point in time
        val_metrics_dict['quarter_output_pred'] += correct_classify[batch.shape[0]//4].item()
        val_metrics_dict['halfway_output_pred'] += correct_classify[batch.shape[0]//2].item()
        val_metrics_dict['three_quarter_output_pred'] += correct_classify[(batch.shape[0]//4)*3].item()
        val_metrics_dict['final_output_pred'] += correct_classify[-1].item()

    return batch_seq_loss, val_metrics_dict
    

In [9]:
print(f"Starting training...")

total_start_time = time.time()

avg_val_loss = 0
val_loss_hist = []
val_metrics_dict = {"quarter_output_pred":0,
                "halfway_output_pred":0,
                "three_quarter_output_pred":0,
                "final_output_pred":0,
                "correct_tackler_identified_w_highest_prob_anytime":0,
                "correct_tackler_had_highest_average_prob":0,
                "correct_tackler_average_prob":0}

gen = iter(validation_generator)
num_val_batches = len(gen)

print(f"num val batches = {num_val_batches}")

for batch_index, (local_batch, local_labels, local_lengths, local_player_ids, local_ids) in enumerate(validation_generator):

    print(f"afetr gen")
    # val_metrics[0] = val loss
    batch_seq_loss, val_batch_metrics_dict = process_batch(DEVICE, local_batch, local_labels, local_lengths, local_player_ids)
    print(f"Por")
    val_loss_hist.append(batch_seq_loss.item())

    avg_val_loss += batch_seq_loss.item()
    val_metrics_dict['quarter_output_pred'] += val_batch_metrics_dict['quarter_output_pred']
    val_metrics_dict['halfway_output_pred'] += val_batch_metrics_dict['halfway_output_pred']
    val_metrics_dict['three_quarter_output_pred'] += val_batch_metrics_dict['three_quarter_output_pred']
    val_metrics_dict['final_output_pred'] += val_batch_metrics_dict['final_output_pred']
    val_metrics_dict['correct_tackler_identified_w_highest_prob_anytime'] += val_batch_metrics_dict['correct_tackler_identified_w_highest_prob_anytime']
    val_metrics_dict['correct_tackler_had_highest_average_prob'] += val_batch_metrics_dict['correct_tackler_had_highest_average_prob']
    val_metrics_dict['correct_tackler_average_prob'] += val_batch_metrics_dict['correct_tackler_average_prob']

avg_val_loss /= (num_val_batches*validation_generator.batch_size)
val_metrics_dict['quarter_output_pred'] /= (num_val_batches*validation_generator.batch_size)
val_metrics_dict['halfway_output_pred'] /= (num_val_batches*validation_generator.batch_size)
val_metrics_dict['three_quarter_output_pred'] /= (num_val_batches*validation_generator.batch_size)
val_metrics_dict['final_output_pred'] /= (num_val_batches*validation_generator.batch_size)
val_metrics_dict['correct_tackler_identified_w_highest_prob_anytime'] /= (num_val_batches*validation_generator.batch_size)
val_metrics_dict['correct_tackler_had_highest_average_prob'] /= (num_val_batches*validation_generator.batch_size)
val_metrics_dict['correct_tackler_average_prob'] /= (num_val_batches*validation_generator.batch_size)


print(f"val_loss={avg_val_loss}")
print(f"val metrics dict = ")
print(f"{list(val_metrics_dict.keys())}")
print(f"{np.array(list(val_metrics_dict.values())).round(3)}")
print(f"#######################")
    
total_end_time = time.time()
print(f"Finished training 1 epochs in {round((total_end_time - total_start_time)/60, 3)} min")

Starting training...


num val batches = 994


: 

In [14]:
local_batch, local_labels, local_lengths, local_player_ids, local_ids = next(iter(validation_generator))
index_labels = torch.argmax(local_labels, -1)
i = 0

In [15]:
single_target = index_labels[i, local_lengths[i]-1]                                 # ([]), the correct class
batch = local_batch[i, :local_lengths[i]-1, :].reshape(-1,24,9)    # (seq_length, 24, 9)


In [26]:
ball_and_opps = batch[:,0:12, 3:5]
dists = torch.cdist(ball_and_opps[:,0:1,:], ball_and_opps, p=2.0)
dists[:,0,0] = torch.inf 
closest_defender = dists.argmin(dim=2).squeeze(-1)

In [31]:
dists.argmin(dim=2).squeeze(-1).shape

torch.Size([22])

In [18]:
closest_def[0]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [None]:
ball_and_opps = seq[:,0:12, 3:5]
dists = torch.cdist(ball_and_opps[:,0:1,:], ball_and_opps, p=2.0)    # (seq_length, 12, 12)
dists[:,0,0] = torch.inf                                      # set distance from ball to ball to inf
closest_defender = dists.argmin(dim=1)               # (seq_length)

In [9]:
local_batch, local_labels, local_lengths, local_player_ids, local_ids = next(iter(validation_generator))

In [10]:
index_labels = torch.argmax(local_labels, -1)
single_target = index_labels[0, local_lengths[0]-1]

In [11]:
single_target

tensor(10)

In [19]:
# ball and oponents
ball_and_opps = local_batch[0].reshape(-1,24,9)[:local_lengths[0],0:12, 3:5]

In [20]:
ball_and_opps.shape

torch.Size([29, 12, 2])

In [37]:
dists = torch.cdist(ball_and_opps[:,0:1,:], ball_and_opps, p=2.0)
dists[:,0,0] = torch.inf  # set distance from ball to ball to inf

In [41]:
dists[0]

tensor([[    inf,  9.0762, 13.6751,  8.2470,  8.9987,  6.9035, 10.9977, 21.8351,
         29.8741, 22.1047, 14.6777,  8.5594]])

In [44]:
dists[:,0,:].argmin(dim=1)

tensor([ 5,  5,  5,  5,  5, 10, 10, 10, 10, 10,  5,  5,  5,  5,  5,  5, 10, 10,
        10, 10, 10, 10, 10, 10, 10, 10, 10,  5,  5])

In [51]:
closest_def = dists[:,0,:].argmin(dim=1)

In [52]:
single_target

NameError: name 'single_target' is not defined

In [35]:
dists = torch.cdist(ball_and_opps, ball_and_opps, p=2.0)
dists[:,0,0] = torch.inf  # set distance from ball to ball to inf
closest_defender = dists[0].argmin()

In [36]:
dists[0,0] = torch.inf

In [38]:
dists[0].argmin()

tensor(11)