In [1]:
import tensorflow as tf
from tensorflow.python.ops.rnn import _transpose_batch_time

from datetime import datetime
import numpy as np
import pandas as pd
import glob, os, sys, math, warnings
import matplotlib.pyplot as plt
import copy, time, glob, os, sys
os.environ["TF_CPP_MIN_LOG_LEVEL"]="3"

# customized ftns 
from helpers import *
from utilities import *
from model import *
from train import train_all_single_policies
# ---------------------------------------------------------
%matplotlib inline
%load_ext autoreload
%autoreload 2
warnings.filterwarnings('ignore')
# warnings.filterwarnings(action='once')
# ---------------------------------------------------------
# directories
main_dir = '../'
game_dir = main_dir+'data/'
Data = LoadData(main_dir, game_dir)
models_path = './models/' 

  from ._conv import register_converters as _register_converters


#### Load raw data

In [2]:
# %%time
game_id = '0021500463'
game_data = Data.load_game(game_id)
events_df = pd.DataFrame(game_data['events'])
print('raw events shape:', events_df.shape)
events_df.head(3)

raw events shape: (231, 8)


Unnamed: 0,end_time_left,home,moments,orig_events,playbyplay,quarter,start_time_left,visitor
0,702.31,"{'abbreviation': 'CHI', 'players': [{'playerid...","[[1, 1451351428029, 708.28, 12.78, None, [[-1,...",[0],GAME_ID EVENTNUM EVENTMSGTYPE EVENTMS...,1,708.28,"{'abbreviation': 'TOR', 'players': [{'playerid..."
1,686.28,"{'abbreviation': 'CHI', 'players': [{'playerid...","[[1, 1451351428029, 708.28, 12.78, None, [[-1,...",[1],GAME_ID EVENTNUM EVENTMSGTYPE EVENTMS...,1,708.28,"{'abbreviation': 'TOR', 'players': [{'playerid..."
2,668.42,"{'abbreviation': 'CHI', 'players': [{'playerid...","[[1, 1451351444029, 692.25, 12.21, None, [[-1,...","[2, 3]",GAME_ID EVENTNUM EVENTMSGTYPE EVENTMS...,1,692.25,"{'abbreviation': 'TOR', 'players': [{'playerid..."


#### Get some suplementary data

In [3]:
# play id to play roles/positions
id_role = id_position(events_df)
check_game_roles_duplicates(id_role)

# its possible that F has similar role as G-f or F-G, we create empty slots to ensure meta order
# ddentify defending and offending runs (this is included in process_moments)
court_index = Data.load_csv('./meta_data/court_index.csv')
court_index = dict(zip(court_index.game_id, court_index.court_position))

# home and visitor ids
homeid = events_df.loc[0].home['teamid']
awayid = events_df.loc[0].visitor['teamid']

#### Pre-process 
filter events, subsample frames, add velocity, reorder moments, re-arrange team order
shot clock, filter out event with short moments

In [4]:
# filter out actions except 1: Make, 2: Miss, 4: Rebound, 6:Personal Foul, 7:Violation
use_event = [1, 2, 4, 6, 7]
discard_event = [3, 5, 8, 9, 10, 12, 13, 18]
events = filter_event_type(events_df, discard_event)
print('After filtering events has shape:', events.shape)
# break up sequences at 24secs shot clock point (or irregular case, e.g. out of bound maybe),
# and obtain the game data
subsample_factor = 0
single_game, single_game_balls = get_game_data_ra(events, court_index, game_id, 
                                                  event_threshold=10, subsample_factor=subsample_factor)
print('Final number of events:', len(single_game))

# get player velocity
fs_base = 1./25 # 1/25 sec/frame   or  25 frames/sec
fs = fs_base * subsample_factor if subsample_factor != 0 else fs_base
single_game = [get_velocity(i, fs, mode=1) for i in single_game]
n_events = len(single_game)

# get basketball velocity
single_game_balls = [np.concatenate([i[:-1, :], get_velocity(i, fs, mode=0)], axis=1) for i in single_game_balls]

After filtering events has shape: (134, 8)
Final number of events: 165


In [5]:
single_game[0].shape

(37, 40)

In [6]:
single_game_balls[0].shape

(37, 6)

#### Role assignment and reorder moment

In [7]:
# first prepare data
n_defend = 5
n_offend = 5
n_ind = 4

# length for each moment
event_lengths = np.array([len(i) for i in single_game])
# repeat the event_lengths 5 times in order to match the unstack later on with moments
event_lengths_repeat = np.concatenate([event_lengths for _ in range(n_defend)], axis=0)
# all the moments
all_moments = np.concatenate(single_game, axis=0)
all_moments_vel = np.concatenate(single_game, axis=0) # vel
# we only need the first 5 players x,y coordindates
# defend
all_defend_moments = all_moments[:, :n_ind*n_defend]
# offend
all_offend_moments = all_moments[:, n_ind*n_offend:]

# flattened
all_defend_moments_ = np.concatenate([all_defend_moments[:, i:i+n_ind] for i in range(0, n_ind*n_defend, n_ind)], axis=0)
all_offend_moments_ = np.concatenate([all_offend_moments[:, i:i+n_ind] for i in range(0, n_ind*n_offend, n_ind)], axis=0)

In [8]:
# create hmm model
n_comp = 7
n_mix = None
RA = RoleAssignment(n_iter=50,verbose=True)

In [9]:
# train
defend_state_sequence_, defend_means, defend_covs, _ = RA.train_hmm(all_defend_moments_, event_lengths_repeat, n_comp, n_mix)
offend_state_sequence_, offend_means, offend_covs, _ = RA.train_hmm(all_offend_moments_, event_lengths_repeat, n_comp, n_mix)
# get role orders
_, defend_roles = RA.assign_roles(all_defend_moments_, all_defend_moments, defend_means, event_lengths)
_, offend_roles = RA.assign_roles(all_offend_moments_, all_offend_moments, offend_means, event_lengths)

         1    -1526457.8325             +nan
         2    -1268924.5854     +257533.2471
         3    -1200828.2524      +68096.3330
         4    -1172055.6233      +28772.6291
         5    -1168202.8601       +3852.7632
         6    -1166918.4365       +1284.4236
         7    -1166079.8787        +838.5578
         8    -1165555.5242        +524.3545
         9    -1165269.0194        +286.5048
        10    -1165082.1474        +186.8720
        11    -1164971.9147        +110.2327
        12    -1164914.3671         +57.5477
        13    -1164891.1977         +23.1694
        14    -1164881.8610          +9.3367
        15    -1164877.5057          +4.3553
        16    -1164875.3154          +2.1902
        17    -1164874.1176          +1.1978
        18    -1164873.3956          +0.7220
        19    -1164872.9145          +0.4811
        20    -1164872.5633          +0.3512
        21    -1164872.2874          +0.2759
        22    -1164872.0588          +0.2287
        23

In [10]:
defend_pos_vel = order_moment_ra([i[:, :n_ind*5] for i in single_game], defend_roles)
offend_pos_vel = order_moment_ra([i[:, n_ind*5:] for i in single_game], offend_roles)

In [11]:
single_game_balls[0].shape

(37, 6)

In [12]:
defend_pos_vel[0].shape

(37, 28)

In [13]:
defend_pos_vel[0]

array([[11.15334, 21.35529, -4.02675, ..., 11.67492, -0.2985 ,  0.495  ],
       [10.99227, 21.31452, -2.59725, ..., 11.69472, -0.29875,  0.7705 ],
       [10.88838, 21.3161 , -2.02775, ..., 11.72554, -0.30525,  1.0805 ],
       ...,
       [ 8.37199, 23.28826, -5.046  , ..., 17.59439,  1.261  ,  0.8865 ],
       [ 8.17015, 23.48841, -4.9075 , ..., 17.62985,  0.03675,  0.445  ],
       [ 7.97385, 23.71319, -4.1805 , ..., 17.64765, -0.16425,  0.76475]])

In [14]:
# concatenate defend, offend roles pos and velocity and the basketball pos and vel
single_game = [np.concatenate([defend_pos_vel[i], offend_pos_vel[i], single_game_balls[i]], axis=1) for i in range(n_events)]
# single_game = [np.concatenate([defend_pos_vel[i], offend_pos_vel[i]], axis=1) for i in range(n_events)]

In [15]:
single_game[0].shape

(37, 62)

In [16]:
all_roles = [np.concatenate([defend_roles[i], offend_roles[i]], axis=1) for i in range(len(single_game))]

In [17]:
all_roles[0].shape

(37, 10)

In [18]:
len(all_roles)

165

In [19]:
n_events

165

#### Show the plot, for the sake of comparison with processed moment later on

In [20]:
# Plot = PlotGame(game_id, main_dir, game_dir)
# # for i in range(plotn): 
# Plot.load_moment2img(game_data, event_number=0, moment_number=0, return_img=True)

In [21]:
# # manual plot check
# plot_check(single_game, plt_ind=0)

#### Create label, train and test set

In [22]:

# # pad short sequence and chunk long sequence with overlaps
# train, target = get_sequences(single_game, sequence_length, overlap)

In [23]:
# # create train and test set
# p = 0.8 # train percentage
# divider = int(len(train)*p)
# train_game, test_game = train[:divider], train[divider:]
# train_target, test_target = target[:divider], target[divider:]

In [25]:
# len(train_game), len(test_game)

In [26]:
# train_game[0].shape

#### Build graph and starts training for all single policies

In [27]:
sequence_length = 30
overlap = 15
batch_size = 32
train_all_single_policies(single_game, batch_size, sequence_length, overlap, models_path)

Wroking on policy 0
Epoch 0    | loss: 97.98    | time took: 2.22s | validation loss: 75.73   
Total time took: 0.01hrs
Epoch 0    | loss: 42.63    | time took: 2.13s | validation loss: 53.95   
Total time took: 0.01hrs
Done saving model for 0 

Wroking on policy 1
Epoch 0    | loss: 157.68   | time took: 2.32s | validation loss: 45.69   
Total time took: 0.01hrs
Epoch 0    | loss: 40.76    | time took: 2.11s | validation loss: 33.61   
Total time took: 0.01hrs
Done saving model for 1 

Wroking on policy 2
Epoch 0    | loss: 11.03    | time took: 2.20s | validation loss: 3.05    
Total time took: 0.01hrs
Epoch 0    | loss: 1.83     | time took: 2.04s | validation loss: 1.91    
Total time took: 0.01hrs
Done saving model for 2 

Wroking on policy 3
Epoch 0    | loss: 11.24    | time took: 2.08s | validation loss: 2.91    
Total time took: 0.01hrs
Epoch 0    | loss: 1.65     | time took: 2.01s | validation loss: 1.65    
Total time took: 0.01hrs
Done saving model for 3 

Wroking on polic

### To do:

    - 1) Regularize the lstm
    - 2) Figure out why there are blanks in the testing
    - 3) may consider to collect those left out from the process of creating batches
    - 4) related to 3), seq_len = 3 may create null batches 

    - Split data to defending and offending, as the model for e.g. forward role in deffending and offending should be pretty different. Remove particular events, like free-throw etc.
    
    - We can use the shot clock as an indicator of when the offending and defending switches.
    
    - The cameras oprate at 25 frames per second, so in order to learn realistic motions, either we sample the 25 frames, or extend the horizon to 50 for example or even longer(this might be too computationally heavy and model would probably drift a lot).
    
    - At the moment if we don't have defending or offending sepearted, at least we need to break down the sequences from the 24 secs shot clock, since it usually stands for a change in game state. (note: shot clock sometimes is None)
    
    - Add tensorboard visualization. Add validation performance (maybe, it would take longer). 
      tensorboard --logdir=./train_logs
      
    - Start thinking about 1) joint training 2) Hidden structure 3) Smooth learning
    - from each sample to next sample theres not much change, subsample them 

### Questions

    * After a team scored and they go back to get ready for defense, is the going back trajectory pretty much random?
    * Do player swap roles during the play? e.g. a forward swapped to a guard, is the forward roles a lot different from gaurds these day? (i.e. can you differentaite a player plaing forward from guard from the game) If yes, then the hidden structure learning/sequencing is necessary. 