In [91]:
import json
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(783)
from keras.models import Sequential
from keras.layers import Dense, LSTM, Dropout, BatchNormalization
from keras import regularizers
from keras import initializers

from keras.optimizers import RMSprop as rm
from keras.optimizers import Adagrad as ada
from keras.optimizers import SGD as sgd

with open('../data/general/matches_2017_2018_v1.json') as matches_json:
    matches = pd.read_json(matches_json)

data_height = 6000
data_width = 87
label_team_width = 3
label_player_width = 29

mean = 0
std = 0
min_values = 0
max_values = 0

# min-max scaling
def min_max_scaling(data):
    global min_values
    global max_values
    min_values = np.amin(data, axis=(0,1), keepdims=True)
    max_values = np.amax(data, axis=(0,1), keepdims=True)
    return (data-min_values)/(max_values-min_values)

# normalizing
def normalize(data):
    global mean
    global std
    mean = np.mean(data, axis=(0,1), keepdims=True)
    std = np.std(data, axis=(0,1), keepdims=True)
    return (data - mean)/std

Small data arrays (5 matches - 4 of them train, 1 of them validation)

In [92]:
first_small_match_id = 60561
small_match_count = 5
timestep = 5

x_small = np.ndarray(shape=(small_match_count, data_height, data_width), dtype=np.float)
y_team_small = np.ndarray(shape=(small_match_count, data_height, label_team_width), dtype=np.int)
y_player_small = np.ndarray(shape=(small_match_count, data_height, label_player_width), dtype=np.int)
x_pass_small = None
y_pass_small = None

for id in range(small_match_count):
    x_small[id] = np.load('../data/match_' + str(id + first_small_match_id) + '/x_data.npy')
    y_team_small[id] = np.load('../data/match_' + str(id + first_small_match_id) + '/y_team_data.npy')
    y_player_small[id] = np.load('../data/match_' + str(id + first_small_match_id) + '/y_player_data.npy')
    
    y_pass = np.load('../data/match_' + str(id + first_small_match_id) + '/y_pass_data.npy')
    y_time = np.load('../data/match_' + str(id + first_small_match_id) + '/y_pass_time.npy')
    x_pass = np.ndarray(shape=(y_pass.shape[0], timestep, data_width + label_team_width + label_player_width))
    
    for i, t in enumerate(y_time):
        x_pass[i] = np.concatenate((x_small[id][t-timestep:t], y_team_small[id][t-timestep:t], \
                                    y_player_small[id][t-timestep:t]), axis=1)
    if x_pass_small is None and y_pass_small is None:
        x_pass_small = x_pass
        y_pass_small = y_pass
    else:
        x_pass_small = np.concatenate((x_pass_small, x_pass), axis=0)
        y_pass_small = np.concatenate((y_pass_small, y_pass), axis=0)

In [93]:
x_small = x_small[:, 60:5460, :]
y_team_small = y_team_small[:, 60:5460, :]
y_player_small = y_player_small[:, 60:5460, :]

In [94]:
x_small = normalize(x_small)

with open('../model/mean_std.pkl', 'wb') as file:
    pickle.dump((mean, std), file, pickle.HIGHEST_PROTOCOL)

x_pass_small = normalize(x_pass_small)

with open('../model/mean_std_pass.pkl', 'wb') as file:
    pickle.dump((mean, std), file, pickle.HIGHEST_PROTOCOL)

In [None]:
x_small = min_max_scaling(x_small)

with open('../model/min_max.pkl', 'wb') as file:
    pickle.dump((min_values, max_values), file, pickle.HIGHEST_PROTOCOL)
    
x_pass_small = min_max_scaling(x_pass_small)

with open('../model/min_max_pass.pkl', 'wb') as file:
    pickle.dump((min_values, max_values), file, pickle.HIGHEST_PROTOCOL)

In [95]:
permutation = np.random.permutation(x_small.shape[0])
x_small = x_small[permutation]
y_team_small = y_team_small[permutation]
y_player_small = y_player_small[permutation]

permutation = np.random.permutation(x_pass_small.shape[0])
x_pass_small = x_pass_small[permutation]
y_pass_small = y_pass_small[permutation]

In [96]:
size_80 = int(x_small.shape[0]*0.8)

x_train_small = x_small[:size_80]
x_test_small = x_small[size_80:]

y_train_team_small = y_team_small[:size_80]
y_test_team_small = y_team_small[size_80:]

y_train_player_small = y_player_small[:size_80]
y_test_player_small = y_player_small[size_80:]

size_80 = int(x_pass_small.shape[0]*0.8)

x_train_pass_small = x_pass_small[:size_80]
x_test_pass_small = x_pass_small[size_80:]

y_train_pass_small = y_pass_small[:size_80]
y_test_pass_small = y_pass_small[size_80:]

All data arrays (304 match - 243 of them train, 61 of them validation)

In [81]:
timestep = 5

x_all = np.ndarray(shape=(len(matches['id']), data_height, data_width))
y_team_all = np.ndarray(shape=(len(matches['id']), data_height, label_team_width))
y_player_all = np.ndarray(shape=(len(matches['id']), data_height, label_player_width))
x_pass_all = None
y_pass_all = None

idx = 0
for id in matches['id']:
    x_all[idx] = np.load('../data/match_' + str(id) + '/x_data.npy')
    y_team_all[idx] = np.load('../data/match_' + str(id) + '/y_team_data.npy')
    y_player_all[idx] = np.load('../data/match_' + str(id) + '/y_player_data.npy')
    
    y_pass = np.load('../data/match_' + str(id) + '/y_player_data.npy')
    y_time = np.load('../data/match_' + str(id) + '/y_player_time.npy')
    x_pass = np.ndarray(shape=(y_pass.shape[0], timestep, data_width + label_team_width + label_player_width))
    
    for i, t in enumerate(y_time):
        x_pass[i] = np.concatenate((x_small[id][t-timestep:t], y_team_small[id][t-timestep:t], \
                                    y_player_small[id][t-timestep:t]), axis=1)
    if x_pass_all is None and y_pass_all is None:
        x_pass_all = x_pass
        y_pass_all = y_pass
    else:
        x_pass_all = np.concatenate((x_pass_all, x_pass), axis=0)
        y_pass_all = np.concatenate((y_pass_all, y_pass), axis=0)
    idx +=1

FileNotFoundError: [Errno 2] No such file or directory: '../data/match_60561/y_player_time.npy'

In [None]:
x_all = x_all[:, 60:5460, :]
y_team_all = y_team_all[:, 60:5460, :]
y_player_all = y_player_all[:, 60:5460, :]

In [None]:
x_all = normalize(x_all)

with open('../model/mean_std.pkl', 'wb') as file:
    pickle.dump((mean, std), file, pickle.HIGHEST_PROTOCOL)
    
x_pass_all = normalize(x_pass_all)

with open('../model/mean_std_pass.pkl', 'wb') as file:
    pickle.dump((mean, std), file, pickle.HIGHEST_PROTOCOL)

In [None]:
x_all = min_max_scaling(x_all)

with open('../model/min_max.pkl', 'wb') as file:
    pickle.dump((min_values, max_values), file, pickle.HIGHEST_PROTOCOL)

x_pass_all = min_max_scaling(x_pass_all)

with open('../model/min_max_pass.pkl', 'wb') as file:
    pickle.dump((min_values, max_values), file, pickle.HIGHEST_PROTOCOL)

In [None]:
permutation = np.random.permutation(x_all.shape[0])
x_all = x_all[permutation]
y_team_sall = y_team_all[permutation]
y_player_all = y_player_all[permutation]

permutation = np.random.permutation(x_pass_all.shape[0])
x_pass_all = x_pass_all[permutation]
y_pass_all = y_pass_all[permutation]

In [None]:
size_80 = int(x_all.shape[0]*0.8)

x_train_all = x_all[:size_80]
x_test_all = x_all[size_80:]

y_train_team_all = y_team_all[:size_80]
y_test_team_all = y_team_all[size_80:]

y_train_player_all = y_player_all[:size_80]
y_test_player_all = y_player_all[size_80:]

size_80 = int(x_pass_all.shape[0]*0.8)

x_train_pass_all = x_pass_all[:size_80]
x_test_pass_all = x_pass_all[size_80:]

y_train_pass_all = y_pass_all[:size_80]
y_test_pass_all = y_pass_all[size_80:]

Team ball possesion model

In [88]:
x = x_train_small
#x = x_train_all

y = y_train_team_small
#y = y_train_team_all

timestep = 5400

batch_size = 1
epochs = 100

input_dim = 87
output_dim = 3

model_team = Sequential()
model_team.add(LSTM(300, 
               input_shape=(timestep, input_dim), 
               return_sequences=True,
               kernel_initializer=initializers.he_normal(783)))
model_team.add(BatchNormalization())
model_team.add(Dense(200, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_team.add(BatchNormalization())
model_team.add(Dense(100, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_team.add(BatchNormalization())
model_team.add(Dense(50, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_team.add(BatchNormalization())
model_team.add(Dense(25, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_team.add(BatchNormalization())
model_team.add(Dense(output_dim, kernel_initializer=initializers.he_normal(783), activation='softmax'))

opt = rm()
#opt = ada()
#opt = sgd(lr=0.005, momentum=0.9, decay=1e-5, nesterov=True)

model_team.compile(loss='categorical_crossentropy',
                   optimizer=opt,
                   metrics=['accuracy'])

history_team = model_team.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=2, callbacks=None, 
                              validation_split=0.2, validation_data=None, shuffle=True, class_weight=None, 
                              sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None)

Train on 3 samples, validate on 1 samples
Epoch 1/100


 - 20s - loss: 1.3111 - acc: 0.3936 - val_loss: 1.6596 - val_acc: 0.4207


Epoch 2/100


 - 18s - loss: 0.7815 - acc: 0.6769 - val_loss: 1.4312 - val_acc: 0.4415


Epoch 3/100


 - 16s - loss: 0.6097 - acc: 0.7723 - val_loss: 1.4049 - val_acc: 0.4559


Epoch 4/100


 - 16s - loss: 0.5214 - acc: 0.8188 - val_loss: 1.3835 - val_acc: 0.4406


Epoch 5/100


 - 17s - loss: 0.4706 - acc: 0.8422 - val_loss: 1.4183 - val_acc: 0.4569


Epoch 6/100


 - 17s - loss: 0.4433 - acc: 0.8470 - val_loss: 1.3815 - val_acc: 0.4313


Epoch 7/100


 - 18s - loss: 0.3909 - acc: 0.8726 - val_loss: 1.3655 - val_acc: 0.4456


Epoch 8/100


KeyboardInterrupt: 

In [None]:
x_test = x_test_small
#x_test = x_test_all

y_test = y_test_team_small
#y_test = y_test_team_all

score = model_team.evaluate(x_test, y_test)
print('team possesion test acc: ', score[1])

In [None]:
y_team_pred = model_team.predict(x_test)

In [61]:
# summarize history for accuracy
plt.plot(history_team.history['acc'])
plt.plot(history_team.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='lower right')
plt.savefig('../graphs/team_acc.png')
plt.show()

# summarize history for loss
plt.plot(history_team.history['loss'])
plt.plot(history_team.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.savefig('../graphs/team_loss.png')
plt.show()

In [61]:
model_team.save('../model/team_model.h5')
model_team.save_weights('../model/team_model_weight.h5')
json_data = model_team.to_json()

with open('../model/team_json_data.txt', 'w') as outfile:
    json.dump(json_data, outfile)

Player ball possesion model

In [97]:
x = np.concatenate((x_train_small, y_train_team_small), axis=2)
#x = np.concatenate((x_train_all, y_train_team_all), axis=2)

y = y_train_player_small
#y = y_train_player_all

timestep = 5400

batch_size = 1
epochs = 100

input_dim = 90
output_dim = 29

model_player = Sequential()
model_player.add(LSTM(300, 
               input_shape=(timestep, input_dim), 
               return_sequences=True,
               kernel_initializer=initializers.he_normal(783)))
model_player.add(BatchNormalization())
model_player.add(Dense(200, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_player.add(BatchNormalization())
model_player.add(Dense(100, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_player.add(BatchNormalization())
model_player.add(Dense(50, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_player.add(BatchNormalization())
model_player.add(Dense(output_dim, kernel_initializer=initializers.he_normal(783), activation='softmax'))

opt = rm()
#opt = ada()
#opt = sgd(lr=0.005, momentum=0.9, decay=1e-5, nesterov=True)

model_player.compile(loss='categorical_crossentropy',
                     optimizer=opt,
                     metrics=['accuracy'])

history_player = model_player.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=2, callbacks=None, 
                                  validation_split=0.2, validation_data=None, shuffle=True, class_weight=None, 
                                  sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None)

Train on 3 samples, validate on 1 samples
Epoch 1/100


 - 20s - loss: 3.8284 - acc: 0.1059 - val_loss: 3.8446 - val_acc: 0.1613


Epoch 2/100


 - 16s - loss: 2.7308 - acc: 0.3522 - val_loss: 3.6835 - val_acc: 0.1709


Epoch 3/100


 - 17s - loss: 2.2721 - acc: 0.4600 - val_loss: 3.5844 - val_acc: 0.1802


Epoch 4/100


 - 18s - loss: 2.0151 - acc: 0.5217 - val_loss: 3.4594 - val_acc: 0.2087


Epoch 5/100


 - 18s - loss: 1.8664 - acc: 0.5590 - val_loss: 3.5679 - val_acc: 0.1865


Epoch 6/100


 - 20s - loss: 1.7767 - acc: 0.5754 - val_loss: 3.2683 - val_acc: 0.2748


Epoch 7/100


 - 18s - loss: 1.6329 - acc: 0.6151 - val_loss: 3.4195 - val_acc: 0.2231


Epoch 8/100


In [None]:
x_test = np.concatenate((x_test_small, y_test_team_small), axis=2)
#x_test = np.concatenate((x_test_all, y_test_team_all), axis=2)

y_test = y_test_player_small
#y_test = y_test_player_all

score = model_player.evaluate(x_test, y_test)
print('team possesion test acc: ', score[1])

In [None]:
y_player_pred = model_player.predict(x_test)

In [85]:
# summarize history for accuracy
plt.plot(history_player.history['acc'])
plt.plot(history_player.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='lower right')
plt.savefig('../graphs/player_acc.png')
plt.show()

# summarize history for loss
plt.plot(history_player.history['loss'])
plt.plot(history_player.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.savefig('../graphs/player_loss.png')
plt.show()

In [86]:
model_player.save('../model/player_model.h5')
model_player.save_weights('../model/player_model_weight.h5')
json_data = model_player.to_json()

with open('../model/player_json_data.txt', 'w') as outfile:
    json.dump(json_data, outfile)

Pass model

In [None]:
x = x_train_pass_small
#x = x_train_pass_all

y = y_train_pass_small
#y = y_train_pass_all

batch_size = 100
epochs = 100
timestep = 5

input_dim = 119
output_dim = 28

model_pass = Sequential()
model_pass.add(LSTM(300, 
               input_shape=(timestep, input_dim), 
               return_sequences=True,
               kernel_initializer=initializers.he_normal(783)))
model_pass.add(LSTM(600, 
               input_shape=(timestep, input_dim), 
               return_sequences=False,
               kernel_initializer=initializers.he_normal(783)))
model_pass.add(BatchNormalization())
model_pass.add(Dense(400, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_pass.add(BatchNormalization())
model_pass.add(Dense(200, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_pass.add(BatchNormalization())
model_pass.add(Dense(100, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_pass.add(BatchNormalization())
model_pass.add(Dense(50, kernel_initializer=initializers.he_normal(783), activation='relu'))
model_pass.add(BatchNormalization())
model_pass.add(Dense(output_dim, kernel_initializer=initializers.he_normal(783), activation='softmax'))

opt = rm()
#opt = ada()
#opt = sgd(lr=0.005, momentum=0.9, decay=1e-5, nesterov=True)

model_pass.compile(loss='categorical_crossentropy',
                   optimizer=opt,
                   metrics=['accuracy'])

history_pass = model_pass.fit(x=x, y=y, batch_size=batch_size, epochs=epochs, verbose=2, callbacks=None, 
                              validation_split=0.2, validation_data=None, shuffle=True, class_weight=None, 
                              sample_weight=None, initial_epoch=0, steps_per_epoch=None, validation_steps=None)

In [None]:
x_test = x_test_pass_small
#x_test = x_test_pass_all

y_test = y_test_pass_small
#y_test = y_test_pass_all

score = model_pass.evaluate(x_test, y_test)
print('team possesion test acc: ', score[1])

In [None]:
y_pass_pred = model_pass.predict(x_test)

In [93]:
# summarize history for accuracy
plt.plot(history_pass.history['acc'])
plt.plot(history_pass.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='lower right')
plt.savefig('../graphs/pass_acc.png')
plt.show()

# summarize history for loss
plt.plot(history_pass.history['loss'])
plt.plot(history_pass.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.savefig('../graphs/pass_loss.png')
plt.show()

In [90]:
model_pass.save('../model/pass_model.h5')
model_pass.save_weights('../model/pass_model_weight.h5')
json_data = model_pass.to_json()

with open('../model/pass_json_data.txt', 'w') as outfile:
    json.dump(json_data, outfile)