In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import glob, os, sys, warnings
from scipy.stats import multivariate_normal
from mpl_toolkits.mplot3d import Axes3D

# custom modules
from utilities import *
from helpers import *
# ---------------------------------------------------------
%matplotlib inline
%load_ext autoreload
%autoreload 2

warnings.filterwarnings('ignore')
# warnings.filterwarnings(action='once')
# ---------------------------------------------------------
# directories
main_dir = '../'
game_dir = main_dir+'data/'
Data = LoadData(main_dir, game_dir)

#### Load raw event data

In [2]:
# %%time
game_id = '0021500463'
game_data = Data.load_game(game_id)
events_df = pd.DataFrame(game_data['events'])
print('raw events shape:', events_df.shape)
events_df.head(2)

raw events shape: (231, 8)


Unnamed: 0,end_time_left,home,moments,orig_events,playbyplay,quarter,start_time_left,visitor
0,702.31,"{'abbreviation': 'CHI', 'players': [{'playerid...","[[1, 1451351428029, 708.28, 12.78, None, [[-1,...",[0],GAME_ID EVENTNUM EVENTMSGTYPE EVENTMS...,1,708.28,"{'abbreviation': 'TOR', 'players': [{'playerid..."
1,686.28,"{'abbreviation': 'CHI', 'players': [{'playerid...","[[1, 1451351428029, 708.28, 12.78, None, [[-1,...",[1],GAME_ID EVENTNUM EVENTMSGTYPE EVENTMS...,1,708.28,"{'abbreviation': 'TOR', 'players': [{'playerid..."


#### Load some meta info

In [3]:
# identify defending and offending runs (this is included in process_moments)
court_index = Data.load_csv('./meta_data/court_index.csv')
court_index = dict(zip(court_index.game_id, court_index.court_position))

# home and visitor ids
homeid = events_df.loc[0].home['teamid']
awayid = events_df.loc[0].visitor['teamid']

#### Pre-process 
filter events, subsample frames, add velocity, reorder moments, re-arrange team order
shot clock, filter out event with short moments

In [64]:
# filter out actions except 1: Make, 2: Miss, 4: Rebound, 6:Personal Foul, 7:Violation
use_event = [1, 2, 4, 6, 7]
discard_event = [3, 5, 8, 9, 10, 12, 13, 18]
events = filter_event_type(events_df, discard_event)
print('After filtering events has shape:', events.shape)
# break up sequences at 24secs shot clock point (or irregular case, e.g. out of bound maybe),
# and obtain the game data
subsample_factor = 0
single_game = get_game_data_ra(events, court_index, game_id, event_threshold=10, subsample_factor=subsample_factor)
print('Final number of events:', len(single_game))

single_game = [get_velocity(i, 1/25, 1) for i in single_game]

After filtering events has shape: (134, 8)
Final number of events: 165


In [8]:
single_game[0][0]

array([11.15334, 21.35529, 16.79035, 20.55978,  9.12233, 39.32051,
       21.15543, 32.71616,  8.40459, 11.67492, 24.18381, 44.21187,
        1.06327,  2.54971,  7.28146, 48.40417, 18.1243 , 14.25539,
       10.82794, 26.70275])

In [10]:
get_velocity(single_game[0], 1/25, 0)[0]

array([-4.02675, -1.01925, -7.8315 , -3.79475, -2.0525 , -0.543  ,
        3.15725,  4.36975, -0.2985 ,  0.495  ,  1.89025, -2.18575,
        1.41575,  0.64225,  1.26825, -2.84325, -1.3705 , -1.2295 ,
       -8.579  , -0.1235 ])

In [11]:
get_velocity(single_game[0], 1/25, 1)[0]

array([11.15334, 21.35529, -4.02675, -1.01925, 16.79035, 20.55978,
       -7.8315 , -3.79475,  9.12233, 39.32051, -2.0525 , -0.543  ,
       21.15543, 32.71616,  3.15725,  4.36975,  8.40459, 11.67492,
       -0.2985 ,  0.495  , 24.18381, 44.21187,  1.89025, -2.18575,
        1.06327,  2.54971,  1.41575,  0.64225,  7.28146, 48.40417,
        1.26825, -2.84325, 18.1243 , 14.25539, -1.3705 , -1.2295 ,
       10.82794, 26.70275, -8.579  , -0.1235 ])

In [15]:
single_game[0].shape

(38, 20)

In [20]:
single_game[0][0]

array([11.15334, 21.35529, -4.02675, -1.01925, 16.79035, 20.55978,
       -7.8315 , -3.79475,  9.12233, 39.32051, -2.0525 , -0.543  ,
       21.15543, 32.71616,  3.15725,  4.36975,  8.40459, 11.67492,
       -0.2985 ,  0.495  , 24.18381, 44.21187,  1.89025, -2.18575,
        1.06327,  2.54971,  1.41575,  0.64225,  7.28146, 48.40417,
        1.26825, -2.84325, 18.1243 , 14.25539, -1.3705 , -1.2295 ,
       10.82794, 26.70275, -8.579  , -0.1235 ])

In [24]:
single_game[0][1]

array([ 1.099227e+01,  2.131452e+01, -2.597250e+00,  3.950000e-02,
        1.647709e+01,  2.040799e+01, -6.561750e+00, -5.241750e+00,
        9.040230e+00,  3.929879e+01, -4.157000e+00, -5.932500e-01,
        2.128172e+01,  3.289095e+01,  2.541750e+00,  3.547500e+00,
        8.392650e+00,  1.169472e+01, -2.987500e-01,  7.705000e-01,
        2.425942e+01,  4.412444e+01,  1.333250e+00, -3.526250e+00,
        1.119900e+00,  2.575400e+00,  9.470000e-01,  1.336000e+00,
        7.332190e+00,  4.829044e+01,  1.075750e+00, -1.535750e+00,
        1.806948e+01,  1.420621e+01, -1.269500e+00, -1.860000e+00,
        1.048478e+01,  2.669781e+01, -7.926750e+00, -4.097500e-01])

#### Prepare data set for training HMM

In [17]:
n_defend = 5
n_offend = 5
n_ind = 4
# length for each moment
event_lengths = np.array([len(i) for i in single_game])
# repeat the event_lengths 5 times in order to match the unstack later on with moments
event_lengths_repeat = np.concatenate([event_lengths for _ in range(n_defend)], axis=0)
# all the moments
all_moments = np.concatenate(single_game, axis=0)
# we only need the first 5 players x,y coordindates
all_defend_moments = all_moments[:, :n_ind*n_defend]
all_offend_moments = all_moments[:, n_ind*n_offend:]

all_defend_moments_ = np.concatenate([all_defend_moments[:, i:i+n_ind] for i in range(0, n_ind*n_defend, n_ind)], axis=0)
all_offend_moments_ = np.concatenate([all_offend_moments[:, i:i+n_ind] for i in range(0, n_ind*n_offend, n_ind)], axis=0)

In [18]:
all_defend_moments_[:2]

array([[11.15334, 21.35529, -4.02675, -1.01925],
       [10.99227, 21.31452, -2.59725,  0.0395 ]])

In [19]:
all_offend_moments_[:2]

array([[24.18381, 44.21187,  1.89025, -2.18575],
       [24.25942, 44.12444,  1.33325, -3.52625]])

#### Create HMM model

In [25]:
n_comp = 7
n_mix = None
RA = RoleAssignment()

In [26]:
defend_state_sequence_, defend_means, defend_covs, _ = RA.train_hmm(all_defend_moments_, event_lengths_repeat, n_comp, n_mix)
offend_state_sequence_, offend_means, offend_covs, _= RA.train_hmm(all_offend_moments_, event_lengths_repeat, n_comp, n_mix)

         1    -1526457.8325             +nan
         2    -1268924.5854     +257533.2471
         3    -1200828.2524      +68096.3330
         4    -1172055.6233      +28772.6291
         5    -1168202.8601       +3852.7632
         6    -1166918.4365       +1284.4236
         7    -1166079.8787        +838.5578
         8    -1165555.5242        +524.3545
         9    -1165269.0194        +286.5048
        10    -1165082.1474        +186.8720
        11    -1164971.9147        +110.2327
        12    -1164914.3671         +57.5477
        13    -1164891.1977         +23.1694
        14    -1164881.8610          +9.3367
        15    -1164877.5057          +4.3553
        16    -1164875.3154          +2.1902
        17    -1164874.1176          +1.1978
        18    -1164873.3956          +0.7220
        19    -1164872.9145          +0.4811
        20    -1164872.5633          +0.3512
        21    -1164872.2874          +0.2759
        22    -1164872.0588          +0.2287
        23

In [27]:
_, defend_roles = RA.assign_roles(all_defend_moments_, all_defend_moments, defend_means, event_lengths)
_, offend_roles = RA.assign_roles(all_offend_moments_, all_offend_moments, offend_means, event_lengths)

In [28]:
defend_roles[0].shape

(37, 5)

In [29]:
all_defend_moments_.shape

(104770, 4)

In [30]:
single_game[0].shape

(37, 40)

In [31]:
defend_roles[0].shape

(37, 5)

In [32]:
# defend_roles[0]

In [33]:
single_game[0][0]

array([11.15334, 21.35529, -4.02675, -1.01925, 16.79035, 20.55978,
       -7.8315 , -3.79475,  9.12233, 39.32051, -2.0525 , -0.543  ,
       21.15543, 32.71616,  3.15725,  4.36975,  8.40459, 11.67492,
       -0.2985 ,  0.495  , 24.18381, 44.21187,  1.89025, -2.18575,
        1.06327,  2.54971,  1.41575,  0.64225,  7.28146, 48.40417,
        1.26825, -2.84325, 18.1243 , 14.25539, -1.3705 , -1.2295 ,
       10.82794, 26.70275, -8.579  , -0.1235 ])

In [None]:
single_game[0][0][list(defend_roles[0][0])]

In [43]:
defend_roles[0].shape

(37, 5)

In [56]:
def unstack_role(role, n=5, n_ind=4):
    '''map the given role to the 10 index i.e. 5 players times 2 x,y coordinates
    '''
    repeats = np.repeat(role*n_ind, [n_ind]*n, axis=1).copy() # 2 for x,y coordinates
    for i in range(n-2):
        repeats[:, range(i+1, n_ind*n, n_ind)] += i+1
    return repeats


droles = [unstack_role(i, n_defend, n_ind) for i in defend_roles]

# a = np.repeat(defend_roles[0]*2, [2]*n_defend, axis=1).copy() # 2 for x,y coordinates
# a[:, range(1, 2*n_defend, 2)] += 1
# a

In [None]:
a = np.zeros(14)
a

In [None]:
droles[0][0]

In [None]:
ro_single_game = []
for i in range(len(single_game)):
    ro_i = []
    for j in range(len(single_game[i])):
        slots = np.zeros(2*n_comp)
        for k, v in enumerate(droles[i][j]):
            slots[v] = single_game[i][j][k]
        ro_i.append(slots)
    ro_single_game.append(np.array(ro_i))

In [None]:
len(ro_single_game)

In [None]:
ro_single_game[0].shape

In [None]:
single_game[0][0].shape

In [None]:
defend_roles[0][0]

In [None]:
order_moment_ra(single_game, defend_roles)[0][0]

In [62]:
order_moment_ra([i[:, :n_ind*5] for i in single_game], defend_roles)[0][0]

array([11.15334, 21.35529, -4.02675, -1.01925,  9.12233, 39.32051,
       -2.0525 , -0.543  ,  0.     ,  0.     ,  0.     ,  0.     ,
        0.     ,  0.     ,  0.     ,  0.     , 21.15543, 32.71616,
        3.15725,  4.36975, 16.79035, 20.55978, -7.8315 , -3.79475,
        8.40459, 11.67492, -0.2985 ,  0.495  ])

In [None]:
order_moment_ra([i[:, 10:] for i in single_game], offend_roles)[0][0]

In [60]:
defend_roles[0][0]

array([0, 5, 1, 4, 6])

In [None]:
a = np.repeat(defend_roles[0]*2, [2]*n_defend, axis=1).copy() # 2 for x,y coordinates
a

#### Vis

In [None]:
EmissionVis(n_comp, defend_means, defend_covs).plot()

In [None]:
EmissionVis(n_comp, offend_means, offend_covs).plot()

---

### Next: try using the predefined position as indexing mechanism for the imitation learning

Questions:

  - Is the latent structure learning trained for each individual agent or their sequences are put together?