In [1]:
import tensorflow as tf
from tensorflow.python.ops.rnn import _transpose_batch_time
from model import sampling_rnn
import numpy as np
import pandas as pd
import copy

from helpers import get_sequences, iterate_minibatches, get_minibatches, check_game_roles_duplicates
    
import matplotlib.pyplot as plt

# import os
# os.environ["TF_CPP_MIN_LOG_LEVEL"]="3"

%matplotlib inline

from utilities import LoadData
from helpers import id_player, id_position, id_teams, get_player_trajectory, segment
from model import rnn_horizon
# ---------------------------------------------------------
%load_ext autoreload
%autoreload 2

# ---------------------------------------------------------
# directories
main_dir = '../'
game_dir = main_dir+'data/'
Data = LoadData(main_dir, game_dir)

  from ._conv import register_converters as _register_converters


In [2]:
# we will use this fixed order as the role order
roles = ['F', 'G', 'C-F', 'G-F', 'F-G', 'C', 'F-C']
role_order = {'F': 0, 'G':4, 'C-F':1, 'G-F':3, 'F-G':3, 'C':2, 'F-C':1}
# its possible that F has similar role as G-f or F-G, we create empty slots to ensure meta order

In [3]:
%%time
game_id = '0021500463'
game_data = Data.load_game(game_id)
events = pd.DataFrame(game_data['events'])

CPU times: user 11.1 s, sys: 196 ms, total: 11.3 s
Wall time: 11.1 s


### Let's just see the forward role model first, and ignore defending vs offending, use home vs visitor
### also ignore sequence length

In [4]:
id_role = id_position(events)
check_game_roles_duplicates(id_role)

0

In [5]:
homeid = events.loc[0].home['teamid']
awayid = events.loc[0].visitor['teamid']

In [6]:
events.loc[0].home['name']

'Chicago Bulls'

In [7]:
game_data['gamedate']

'2015-12-28'

In [8]:
len(events.loc[0,'moments'])

150

In [9]:
events['moments'].apply(lambda x: len(x)).values.min()

6

In [10]:
def order_moment(m, rm, ro, extreme=3):
    '''
        m: moments, rm: role model, ro: role order
        for the case of multiple players sharing the smae roles,
        (this can happen to even with hiddlen structure learning, 
         although it might be allevaited by using lienar assignment)
        so for now, we come up with an extrem case where same role are occupied by say, 3 players,
        then we still follow the meta order but create paddings 3 times.
    '''
    # reorder moments by role based mapping, where first col is player id
    role = [rm[int(i)][0] for i in m[:,0]]
    u_role = list(set(role))
    assert len(u_role) >= 2, 'it goes over extreme case'
    
    d1,d2 = m.shape
    try:
        assert d1 == 5, 'd1,d2 = {0:}, {1:}'.format(d1, d2)
    except:
        print('Warning:', d1, d2, end='\r')
    # initialize slots (5 meta positions)
    slots = np.zeros((extreme*5, d2))
    counter = {}
    for i in range(len(role)):
        role_i = role[i]
        if role_i not in counter.keys():
            counter[role_i] = 0
        else:
            # note: this could possibly be better if add linear assignment
            counter[role_i] += 1
        # filling in the slots
        slots[ro[role_i]*extreme+counter[role_i], :] = m[i, :]
#     return slots[:, 1:] # [, 1:] slice 1 since we don't need the player id anymore
    return slots[:, 1:] 

def one_hot_order(cat=None):
    from sklearn.preprocessing import OneHotEncoder
    main_dir = '../'
    game_dir = main_dir+'data/'
    Data = LoadData(main_dir, game_dir)
    cat = Data.load_csv('./meta_data/id_team.csv')
    # binary encode
    # ensure uniqueness
    assert sum(cat.team_id.duplicated()) == 0
    return dict(zip(cat.team_id, range(0, len(cat))))

def one_hot_encode(mapping, teams):
    nb_classes = len(mapping)
    targets = np.array([mapping[int(i)] for i in teams])
    one_hot_targets = np.eye(nb_classes)[targets]
    
    return one_hot_targets
    
def process_moments(moments, homeid, awayid):
    result = []
    for i in range(len(moments)):
        # ball position array
        dm = len(moments[i][5])
        ball_ind = -1
        player_ind = -1
        if dm == 11: # ball is present
            ball = np.array([moments[i][5][0][2:]])
            player_ind = 1
        elif dm == 10 and moments[i][5][0][:2] != [-1,-1]: # ball is not present
            ball = np.array([[-1, -1, -1]])
            player_ind = 0
        else:
            print('Warning!: There are less than 10 players! (skip)')
            continue
        # get player position data
        pp = np.array(moments[i][5][player_ind:])
        # home
        hpp = pp[pp[:, 0]==homeid, :]
        # visitor
        vpp = pp[pp[:, 0]==awayid, :]
           # add one hot encoding for the teams
        h_team = hpp[:, 0]
        v_team = vpp[:, 0]

        hpp = np.column_stack((hpp[:, 1:], one_hot_encode(one_hot_order(), h_team)))
        vpp = np.column_stack((vpp[:, 1:], one_hot_encode(one_hot_order(), v_team)))
        
        # reorder
        # [:,:-1] ignores the team_id and the last null element
        h = order_moment(hpp[:, :-1], id_role, role_order)
        v = order_moment(vpp[:, :-1], id_role, role_order)

        # combine home and visit
        hv = np.vstack((h,v))
        # stack on the ball position
        result.append(np.column_stack((hv, np.repeat(ball, hv.shape[0],0))))
    result = np.array(result) 
    return result.reshape(result.shape[0], -1)

In [11]:
# a = events.loc[0,'moments']
# # print(len(a), len(a)*10*2*5)
# e0 = process_moments(a)

In [12]:
events.loc[221, :].home['teamid']

1610612741

In [13]:
events.moments[221][29]

[4,
 1451358777223,
 63.0,
 None,
 None,
 [[-1, -1, 75.46226, 24.81421, 2.9923],
  [1610612741, 2200, 85.16542, 14.22962, 0.0],
  [1610612741, 201166, 66.30584, 17.15957, 0.0],
  [1610612741, 202710, 87.15369, 33.42979, 0.0],
  [1610612741, 203503, 82.85255, 33.22433, 0.0],
  [1610612761, 201960, 84.78216, 33.17034, 0.0],
  [1610612761, 200768, 74.49802, 25.36242, 0.0],
  [1610612761, 201942, 57.10716, 29.50134, 0.0],
  [1610612761, 203082, 64.77583, 19.00113, 0.0]]]

In [14]:
events.shape

(231, 8)

In [15]:
# e0.shape

In [16]:
%%time
homeid = events.loc[0].home['teamid']
awayid = events.loc[0].visitor['teamid']
single_game = []
len_th = 10
n = 0
n_short = 0
for k, v in enumerate(events.moments.values):
    print('>>>>>>>', k, end='\r')
    pm = process_moments(v,homeid, awayid)
    if pm.shape[0] >= len_th:
        single_game.append(pm)
        n += 1
    else:
        n_short += 1
print(n, n_short)
# dimensions extreme<3> x n_players<10> x (player_pos<2> + teamid_onehot<25> + ball<3>) = 900

230 1>> 230
CPU times: user 2min 34s, sys: 1.51 s, total: 2min 35s
Wall time: 2min 35s


In [17]:
single_game[0]

array([[16.79035, 20.55978,  0.     , ..., 18.38063, 14.07976,  8.56325],
       [16.47709, 20.40799,  0.     , ..., 18.29249, 14.14733,  8.97331],
       [16.21462, 20.19832,  0.     , ..., 18.17901, 14.27343,  9.20931],
       ...,
       [57.65112, 14.11472,  0.     , ..., 36.21928, 41.42113,  5.03485],
       [58.00482, 14.12736,  0.     , ..., 36.61226, 41.57427,  5.1757 ],
       [58.36874, 14.13119,  0.     , ..., 36.94039, 41.79393,  5.37554]])

In [18]:
# a,b = get_minibatches(signle_game[0], )

In [19]:
# lets first predict role F (ignored the extreme)
game_target = [np.roll(i[:, :2], -1, axis=0) for i in single_game]

In [20]:
D = 900
single_game[0].shape

(150, 900)

In [21]:
game_target[0].shape

(150, 2)

In [38]:
# single_game[0]
seq_len = 10
batch_size = 32

In [39]:
# # train x
# train_x = [get_sequences(i, seq_len, D) for i in single_game]
# train_x = np.concatenate(train_x, axis=0)
# # train y
# train_y = [get_sequences(i, seq_len, 2) for i in game_target]
# train_y = np.concatenate(train_y, axis=0)

# train_x_batches, train_y_batches = get_minibatches(train_x, train_y, batch_size) 
# train_x_batches.shape

In [40]:
# train_x_batches.shape

In [41]:
# train_y_batches.shape

In [48]:
%%time
tf.reset_default_graph()

# batch_size = 32
state_size = 100

# cell
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(state_size, forget_bias=1.)
# initial state
initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)

# input 
h = tf.placeholder(tf.int32)
X = tf.placeholder(tf.float32, [batch_size, None, D], name = 'train_input')
Y = tf.placeholder(tf.float32, [batch_size, None, 2], name = 'train_label')

output, last_states = rnn_horizon(cell=lstm_cell, 
                                   initial_state=initial_state, 
                                   input_=X,
                                   batch_size=batch_size,
                                   seq_lengths=h)
# output as the prediction
# pred = tf.reshape(output, (batch_size, seq_len, 1))

print('output shape', output.shape)
pred = output
# pred = tf.reshape(output, (batch_size, h, 1))
# pred = tf.reshape(output, (batch_size, h, 1))

print('label shape:{0:} | output prediction shape: {1:}'.format(Y.shape, pred.shape))
# loss
loss = tf.losses.mean_squared_error(Y, pred)
# optimzier
opt = tf.train.AdamOptimizer().minimize(loss)

# session
sess = tf.Session()
# Initializing the variables
sess.run(tf.global_variables_initializer())

# iterate
printn = 1e2
horizon = 5
for k in range(1, horizon+1):
    print('Horizon {} ======'.format(k+1))
    # chunk it to each small window
    seq_len = k + 1
#     seq_len = 8
#     train_x = copy.deepcopy(get_sequences(train_original_x, seq_len, 1))

    # train x
    train_x = copy.deepcopy([get_sequences(i, seq_len, D) for i in single_game])
    train_x = copy.deepcopy(np.concatenate(train_x, axis=0))
    # train y
    train_y = copy.deepcopy([get_sequences(i, seq_len, 2) for i in game_target])
    train_y = copy.deepcopy(np.concatenate(train_y, axis=0))
    for i in range(1000):
        epoch_loss =0.
        for batch in iterate_minibatches(train_x, train_y, batch_size, shuffle=False):
            train_xi, train_yi = batch
            print(train_xi.shape, train_yi.shape)
            p, l, _ = sess.run([output, loss, opt], feed_dict={X: train_xi, Y: train_yi, h:seq_len})
            epoch_loss += l

        if i%printn ==0:
            print('Epoch {0:} | loss: {1:.5f}'.format(i, epoch_loss))
    
    
# # save model
# #Create a saver object which will save all the variables
# saver = tf.train.Saver()
# #save the graph
# saver.save(sess, save_path='./models/test_model')

>>>>>>>>>>>>>> Tensor("Placeholder:0", dtype=int32) <<<<<<<<<<<<<<


output shape (32, ?, 2)
label shape:(32, ?, 2) | output prediction shape: (32, ?, 2)
(32, 2, 900) (32, 2, 2)


InvalidArgumentError: Tried to read from index 2 but array size is: 2
	 [[Node: rnn/while/TensorArrayReadV3 = TensorArrayReadV3[dtype=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3/Enter, rnn/while/add, rnn/while/TensorArrayReadV3/Enter_1)]]

Caused by op 'rnn/while/TensorArrayReadV3', defined at:
  File "/home/sam/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/sam/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/sam/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/sam/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/sam/anaconda3/lib/python3.6/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/sam/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2717, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2827, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/sam/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2881, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-48-fcc02342d898>", line 1, in <module>
    get_ipython().run_cell_magic('time', '', "tf.reset_default_graph()\n\n# batch_size = 32\nstate_size = 100\n\n# cell\nlstm_cell = tf.nn.rnn_cell.BasicLSTMCell(state_size, forget_bias=1.)\n# initial state\ninitial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)\n\n# input \nh = tf.placeholder(tf.int32)\nX = tf.placeholder(tf.float32, [batch_size, None, D], name = 'train_input')\nY = tf.placeholder(tf.float32, [batch_size, None, 2], name = 'train_label')\n\noutput, last_states = rnn_horizon(cell=lstm_cell, \n                                   initial_state=initial_state, \n                                   input_=X,\n                                   batch_size=batch_size,\n                                   seq_lengths=h)\n# output as the prediction\n# pred = tf.reshape(output, (batch_size, seq_len, 1))\n\nprint('output shape', output.shape)\npred = output\n# pred = tf.reshape(output, (batch_size, h, 1))\n# pred = tf.reshape(output, (batch_size, h, 1))\n\nprint('label shape:{0:} | output prediction shape: {1:}'.format(Y.shape, pred.shape))\n# loss\nloss = tf.losses.mean_squared_error(Y, pred)\n# optimzier\nopt = tf.train.AdamOptimizer().minimize(loss)\n\n# session\nsess = tf.Session()\n# Initializing the variables\nsess.run(tf.global_variables_initializer())\n\n# iterate\nprintn = 1e2\nhorizon = 5\nfor k in range(1, horizon+1):\n    print('Horizon {} ======'.format(k+1))\n    # chunk it to each small window\n    seq_len = k + 1\n#     seq_len = 8\n#     train_x = copy.deepcopy(get_sequences(train_original_x, seq_len, 1))\n\n    # train x\n    train_x = copy.deepcopy([get_sequences(i, seq_len, D) for i in single_game])\n    train_x = copy.deepcopy(np.concatenate(train_x, axis=0))\n    # train y\n    train_y = copy.deepcopy([get_sequences(i, seq_len, 2) for i in game_target])\n    train_y = copy.deepcopy(np.concatenate(train_y, axis=0))\n    for i in range(1000):\n        epoch_loss =0.\n        for batch in iterate_minibatches(train_x, train_y, batch_size, shuffle=False):\n            train_xi, train_yi = batch\n            print(train_xi.shape, train_yi.shape)\n            p, l, _ = sess.run([output, loss, opt], feed_dict={X: train_xi, Y: train_yi, h:seq_len})\n            epoch_loss += l\n\n        if i%printn ==0:\n            print('Epoch {0:} | loss: {1:.5f}'.format(i, epoch_loss))\n    \n    \n# # save model\n# #Create a saver object which will save all the variables\n# saver = tf.train.Saver()\n# #save the graph\n# saver.save(sess, save_path='./models/test_model')")
  File "/home/sam/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2115, in run_cell_magic
    result = fn(magic_arg_s, cell)
  File "<decorator-gen-59>", line 2, in time
  File "/home/sam/anaconda3/lib/python3.6/site-packages/IPython/core/magic.py", line 188, in <lambda>
    call = lambda f, *a, **k: f(*a, **k)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/IPython/core/magics/execution.py", line 1185, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 20, in <module>
  File "/home/sam/Desktop/raptors/code/model.py", line 148, in rnn_horizon
    outputs_ta, last_state, _ = tf.nn.raw_rnn(cell, loop_fn)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 1157, in raw_rnn
    swap_memory=swap_memory)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 3202, in while_loop
    result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2940, in BuildLoop
    pred, body, original_loop_vars, loop_vars, shape_invariants)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/control_flow_ops.py", line 2877, in _BuildLoop
    body_result = body(*packed_vars_for_body)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/rnn.py", line 1118, in body
    next_time, next_output, cell_state, loop_state)
  File "/home/sam/Desktop/raptors/code/model.py", line 126, in loop_fn
    input_original = inputs_ta.read(time)  # tensor of shape (None, input_dim)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/tf_should_use.py", line 58, in fn
    return method(self, *args, **kwargs)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/tf_should_use.py", line 58, in fn
    return method(self, *args, **kwargs)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/util/tf_should_use.py", line 58, in fn
    return method(self, *args, **kwargs)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/tensor_array_ops.py", line 861, in read
    return self._implementation.read(index, name=name)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/tensor_array_ops.py", line 260, in read
    name=name)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 6419, in tensor_array_read_v3
    dtype=dtype, name=name)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3290, in create_op
    op_def=op_def)
  File "/home/sam/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1654, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

InvalidArgumentError (see above for traceback): Tried to read from index 2 but array size is: 2
	 [[Node: rnn/while/TensorArrayReadV3 = TensorArrayReadV3[dtype=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"](rnn/while/TensorArrayReadV3/Enter, rnn/while/add, rnn/while/TensorArrayReadV3/Enter_1)]]
