In [None]:
import tensorflow as tf
from tensorflow.python.ops.rnn import _transpose_batch_time
from model import sampling_rnn
import numpy as np
import pandas as pd
import copy

from helpers import get_sequences, iterate_minibatches, get_minibatches, check_game_roles_duplicates
    
import matplotlib.pyplot as plt

# import os
# os.environ["TF_CPP_MIN_LOG_LEVEL"]="3"

%matplotlib inline

from utilities import LoadData
from helpers import id_player, id_position, id_teams, get_player_trajectory, segment
from model import rnn_horizon
# ---------------------------------------------------------
%load_ext autoreload
%autoreload 2

# ---------------------------------------------------------
# directories
main_dir = '../'
game_dir = main_dir+'data/'
Data = LoadData(main_dir, game_dir)

In [None]:
# we will use this fixed order as the role order
roles = ['F', 'G', 'C-F', 'G-F', 'F-G', 'C', 'F-C']
role_order = {'F': 0, 'G':4, 'C-F':1, 'G-F':3, 'F-G':3, 'C':2, 'F-C':1}
# its possible that F has similar role as G-f or F-G, we create empty slots to ensure meta order

In [None]:
%%time
game_id = '0021500463'
game_data = Data.load_game(game_id)
events = pd.DataFrame(game_data['events'])

### Let's just see the forward role model first, and ignore defending vs offending, use home vs visitor
### also ignore sequence length

In [None]:
id_role = id_position(events)
check_game_roles_duplicates(id_role)

In [None]:
homeid = events.loc[0].home['teamid']
awayid = events.loc[0].visitor['teamid']

In [None]:
events.loc[0].home['name']

In [None]:
game_data['gamedate']

In [None]:
len(events.loc[0,'moments'])

In [None]:
events['moments'].apply(lambda x: len(x)).values.min()

In [None]:
def order_moment(m, rm, ro, extreme=3):
    '''
        m: moments, rm: role model, ro: role order
        for the case of multiple players sharing the smae roles,
        (this can happen to even with hiddlen structure learning, 
         although it might be allevaited by using lienar assignment)
        so for now, we come up with an extrem case where same role are occupied by say, 3 players,
        then we still follow the meta order but create paddings 3 times.
    '''
    # reorder moments by role based mapping, where first col is player id
    role = [rm[int(i)][0] for i in m[:,0]]
    u_role = list(set(role))
    assert len(u_role) >= 2, 'it goes over extreme case'
    
    d1,d2 = m.shape
    try:
        assert d1 == 5, 'd1,d2 = {0:}, {1:}'.format(d1, d2)
    except:
        print('Warning:', d1, d2, end='\r')
    # initialize slots (5 meta positions)
    slots = np.zeros((extreme*5, d2))
    counter = {}
    for i in range(len(role)):
        role_i = role[i]
        if role_i not in counter.keys():
            counter[role_i] = 0
        else:
            # note: this could possibly be better if add linear assignment
            counter[role_i] += 1
        # filling in the slots
        slots[ro[role_i]*extreme+counter[role_i], :] = m[i, :]
#     return slots[:, 1:] # [, 1:] slice 1 since we don't need the player id anymore
    return slots[:, 1:] 

def one_hot_order(cat=None):
    from sklearn.preprocessing import OneHotEncoder
    main_dir = '../'
    game_dir = main_dir+'data/'
    Data = LoadData(main_dir, game_dir)
    cat = Data.load_csv('./meta_data/id_team.csv')
    # binary encode
    # ensure uniqueness
    assert sum(cat.team_id.duplicated()) == 0
    return dict(zip(cat.team_id, range(0, len(cat))))

def one_hot_encode(mapping, teams):
    nb_classes = len(mapping)
    targets = np.array([mapping[int(i)] for i in teams])
    one_hot_targets = np.eye(nb_classes)[targets]
    
    return one_hot_targets
    
def process_moments(moments, homeid, awayid):
    result = []
    for i in range(len(moments)):
        # ball position array
        dm = len(moments[i][5])
        ball_ind = -1
        player_ind = -1
        if dm == 11: # ball is present
            ball = np.array([moments[i][5][0][2:]])
            player_ind = 1
        elif dm == 10 and moments[i][5][0][:2] != [-1,-1]: # ball is not present
            ball = np.array([[-1, -1, -1]])
            player_ind = 0
        else:
            print('Warning!: There are less than 10 players! (skip)')
            continue
        # get player position data
        pp = np.array(moments[i][5][player_ind:])
        # home
        hpp = pp[pp[:, 0]==homeid, :]
        # visitor
        vpp = pp[pp[:, 0]==awayid, :]
           # add one hot encoding for the teams
        h_team = hpp[:, 0]
        v_team = vpp[:, 0]

        hpp = np.column_stack((hpp[:, 1:], one_hot_encode(one_hot_order(), h_team)))
        vpp = np.column_stack((vpp[:, 1:], one_hot_encode(one_hot_order(), v_team)))
        
        # reorder
        # [:,:-1] ignores the team_id and the last null element
        h = order_moment(hpp[:, :-1], id_role, role_order)
        v = order_moment(vpp[:, :-1], id_role, role_order)

        # combine home and visit
        hv = np.vstack((h,v))
        # stack on the ball position
        result.append(np.column_stack((hv, np.repeat(ball, hv.shape[0],0))))
    result = np.array(result) 
    return result.reshape(result.shape[0], -1)

In [None]:
# a = events.loc[0,'moments']
# # print(len(a), len(a)*10*2*5)
# e0 = process_moments(a)

In [None]:
events.loc[221, :].home['teamid']

In [None]:
events.moments[221][29]

In [None]:
events.shape

In [None]:
# e0.shape

In [None]:
%%time
homeid = events.loc[0].home['teamid']
awayid = events.loc[0].visitor['teamid']
single_game = []
len_th = 10
n = 0
n_short = 0
for k, v in enumerate(events.moments.values):
    print('>>>>>>>', k, end='\r')
    pm = process_moments(v,homeid, awayid)
    if pm.shape[0] >= len_th:
        single_game.append(pm)
        n += 1
    else:
        n_short += 1
print(n, n_short)
# dimensions extreme<3> x n_players<10> x (player_pos<2> + teamid_onehot<25> + ball<3>) = 900

In [None]:
single_game[0]

In [None]:
# a,b = get_minibatches(signle_game[0], )

In [None]:
# lets first predict role F (ignored the extreme)
game_target = [np.roll(i[:, :2], -1, axis=0) for i in single_game]

In [None]:
D = 900
single_game[0].shape

In [None]:
game_target[0].shape

In [None]:
# single_game[0]
seq_len = 10
batch_size = 32

In [None]:
# # train x
# train_x = [get_sequences(i, seq_len, D) for i in single_game]
# train_x = np.concatenate(train_x, axis=0)
# # train y
# train_y = [get_sequences(i, seq_len, 2) for i in game_target]
# train_y = np.concatenate(train_y, axis=0)

# train_x_batches, train_y_batches = get_minibatches(train_x, train_y, batch_size) 
# train_x_batches.shape

In [None]:
# train_x_batches.shape

In [None]:
# train_y_batches.shape

In [None]:
%%time
tf.reset_default_graph()

# batch_size = 32
state_size = 512

# cell
lstm_cell = tf.nn.rnn_cell.BasicLSTMCell(state_size, forget_bias=1.)
# initial state
initial_state = lstm_cell.zero_state(batch_size, dtype=tf.float32)

# input 
h = tf.placeholder(tf.int32)
X = tf.placeholder(tf.float32, [batch_size, None, D], name = 'train_input')
Y = tf.placeholder(tf.float32, [batch_size, None, 2], name = 'train_label')
print('=================0')
output, last_states = rnn_horizon(cell=lstm_cell, 
                                  initial_state=initial_state, 
                                  input_=X,
                                  batch_size=batch_size,
                                  seq_lengths=h)
# output as the prediction
# pred = tf.reshape(output, (batch_size, seq_len, 1))

print('output shape, last_states', output.shape)#, last_states.shape)
print('=================1')
pred = output
# pred = tf.reshape(output, (batch_size, h, 1))
# pred = tf.reshape(output, (batch_size, h, 1))

print('=================label shape:{0:} | output prediction shape: {1:}'.format(Y.shape, pred.shape))
# loss
loss = tf.losses.mean_squared_error(Y, pred)
print('=================2')
# optimzier
opt = tf.train.AdamOptimizer().minimize(loss)
print('=================3')
# session
sess = tf.Session()
print('=================4')
# Initializing the variables
sess.run(tf.global_variables_initializer())
print('=================5')
# iterate
printn = 1e2
horizon = 5
for k in range(1, horizon+1):
    print('Horizon {} ======'.format(k+1))
    # chunk it to each small window
#     seq_len = k + 1
    seq_len = 5
#     seq_len = 8
#     train_x = copy.deepcopy(get_sequences(train_original_x, seq_len, 1))

    # train x
    train_x = copy.deepcopy([get_sequences(i, seq_len, D) for i in single_game])
    train_x = copy.deepcopy(np.concatenate(train_x, axis=0))
    print('train_x.shape:', train_x.shape)
    # train y
    train_y = copy.deepcopy([get_sequences(i, seq_len, 2) for i in game_target])
    train_y = copy.deepcopy(np.concatenate(train_y, axis=0))
    print('train_y.shape:', train_y.shape)
    for i in range(1000):
        epoch_loss =0.
        for batch in iterate_minibatches(train_x, train_y, batch_size, shuffle=False):
            train_xi, train_yi = batch
            print('in iterate bach train_xi.shape, train_yi.shape', train_xi.shape, train_yi.shape)
            p, l, _ = sess.run([output, loss, opt], feed_dict={X: train_xi, Y: train_yi, h:seq_len})
            epoch_loss += l

        if i%printn ==0:
            print('Epoch {0:} | loss: {1:.5f}'.format(i, epoch_loss))
    
    
# # save model
# #Create a saver object which will save all the variables
# saver = tf.train.Saver()
# #save the graph
# saver.save(sess, save_path='./models/test_model')