In [1]:
import numpy as np
import pandas as pd
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder

Using TensorFlow backend.


# Load data

The first cell loads the encoded images and splits them into two (home/away). The images are of shape 9x11x3 and are inputs for the siamese net. The second cell loads the match data. 

In [2]:
formations = np.load("formations.npy")

In [22]:
X = pd.read_csv("data/X_resampled_formation.csv")
y = pd.read_csv("data/y_resampled_formation.csv").values
delete = [
 'home_player_X1',
 'home_player_X2',
 'home_player_X3',
 'home_player_X4',
 'home_player_X5',
 'home_player_X6',
 'home_player_X7',
 'home_player_X8',
 'home_player_X9',
 'home_player_X10',
 'home_player_X11',
 'home_player_Y1',
 'home_player_Y2',
 'home_player_Y3',
 'home_player_Y4',
 'home_player_Y5',
 'home_player_Y6',
 'home_player_Y7',
 'home_player_Y8',
 'home_player_Y9',
 'home_player_Y10',
 'home_player_Y11',
 'away_player_X1',
 'away_player_X2',
 'away_player_X3',
 'away_player_X4',
 'away_player_X5',
 'away_player_X6',
 'away_player_X7',
 'away_player_X8',
 'away_player_X9',
 'away_player_X10',
 'away_player_X11',
 'away_player_Y1',
 'away_player_Y2',
 'away_player_Y3',
 'away_player_Y4',
 'away_player_Y5',
 'away_player_Y6',
 'away_player_Y7',
 'away_player_Y8',
 'away_player_Y9',
 'away_player_Y10',
 'away_player_Y11',
 'away_player_1_attacking_work_rate_high',
]

keep = list(X.columns)
for col in delete:
    keep.remove(col)

X = X[keep]
X = X.values

In [18]:
print(np.where(np.array(X.columns) ==  'home_player_11_defensive_work_rate_medium'))
l = list(X.columns)


(array([428]),)
home_buildUpPlaySpeed away_buildUpPlaySpeed
-
home_buildUpPlayDribbling away_buildUpPlayDribbling
-
home_buildUpPlayPassing away_buildUpPlayPassing
-
home_chanceCreationPassing away_chanceCreationPassing
-
home_chanceCreationCrossing away_chanceCreationCrossing
-
home_chanceCreationShooting away_chanceCreationShooting
-
home_defencePressure away_defencePressure
-
home_defenceAggression away_defenceAggression
-
home_defenceTeamWidth away_defenceTeamWidth
-
home_player_1_overall_rating away_player_1_overall_rating
-
home_player_1_potential away_player_1_potential
-
home_player_1_crossing away_player_1_crossing
-
home_player_1_finishing away_player_1_finishing
-
home_player_1_heading_accuracy away_player_1_heading_accuracy
-
home_player_1_short_passing away_player_1_short_passing
-
home_player_1_volleys away_player_1_volleys
-
home_player_1_dribbling away_player_1_dribbling
-
home_player_1_curve away_player_1_curve
-
home_player_1_free_kick_accuracy away_player_1_free_kick

# Construct Siamese Model

In [58]:
def get_siamese_model(formation_input_shape, attr_input_shape):
    """
        Model architecture
    """
    
    # Define the tensors for the two input images
    home_formation_input = Input(formation_input_shape)
    away_formation_input = Input(formation_input_shape)
    
    home_attr_input = Input(attr_input_shape)
    away_attr_input = Input(attr_input_shape)
    
    
    # Convolutional Neural Network
    cnn = Sequential()
    cnn.add(Conv2D(16, (2,2), activation='relu', input_shape=formation_input_shape))
    cnn.add(MaxPooling2D())
    cnn.add(Flatten())
    cnn.add(Dense(64, activation='sigmoid'))
    
    # Generate the encodings (feature vectors) for the two images
    cnn_encoded_home = cnn(home_formation_input)
    cnn_encoded_away = cnn(away_formation_input)
    
    # attribute encoding MLP
    mlp0 = Sequential()
    mlp0.add(Dense(32, activation='sigmoid', input_shape=attr_input_shape))
    mlp0.add(Dense(32, activation='relu'))
    mlp0.add(Dense(64, activation='tanh'))
    
    mlp_encoded_home = mlp0(home_attr_input)
    mlp_encoded_away = mlp0(away_attr_input)
    
    # prediction generating MLP
    concat = Concatenate()
    encoded_input = concat([mlp_encoded_home, cnn_encoded_home, mlp_encoded_away, cnn_encoded_away])
    mlp1 = Sequential()
    mlp1.add(Dense(64, activation='tanh',input_shape=(64*4,)))
    mlp1.add(Dense(32, activation='relu'))
    mlp1.add(Dense(3,activation='softmax'))
    
    # Add a dense layer with a softmax unit to generate the probabilities of home and away team winning
    prediction = mlp1(encoded_input)
    
    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[home_formation_input,home_attr_input, away_formation_input, away_attr_input], outputs=prediction)
    optimizer = Adam(lr = 0.00005)
    siamese_net.compile(loss="categorical_crossentropy",optimizer=optimizer, metrics=['accuracy'])
    # return the model
    return siamese_net

# Train the Siamese Net

Train with k-fold cross validation. 

In [None]:
# nope
# num_splits = 3
# skf = StratifiedKFold(n_splits=num_splits)
# for i, (train_index, val_index) in enumerate(skf.split(images, label_data_formation.argmax(1))):
    
#     # we need to split the images into home and away teams
#     images_A_train_kf, images_B_train_kf = images[train_index, 0, :, :, :], images[train_index, 1, :, :, :]
#     images_A_val_kf, images_B_val_kf= images[val_index, 0, :, :, :], images[val_index, 1, :, :, :]
    
#     y_train_kf, y_val_kf = label_data_formation[train_index], label_data_formation[val_index]
    
#     siamese_net = get_siamese_model(images_A_train_kf[0].shape)
#     siamese_net.fit(x=[images_A_train_kf, images_B_train_kf], y=y_train_kf, batch_size=16, epochs=1)
    
#     print(siamese_net.evaluate(x=[images_A_val_kf, images_B_val_kf], y=y_val_kf))

Train with entire training dataset.

In [59]:
# split images into home/away teams
formations_home = formations[:, 0, :, :, :]
formation_away = formations[:, 1, :, :, :]
n = formations.shape[0]
m = int(n * .8)

formations_home_train = formations[:m, 0, :, :, :]
formations_away_train = formations[:m, 1, :, :, :]
formations_home_test = formations[m:, 0, :, :, :]
formations_away_test = formations[m:, 1, :, :, :]

attr_home_train = X[:m, :429]
attr_away_train = X[:m, 429:]
attr_home_test = X[m:, :429]
attr_away_test= X[m:, 429:]


# print(attr_home_train.shape)
# print(attr_away_train.shape)
label_train = y[:m]
label_test = y[m:]
siamese_net = get_siamese_model(formations_home_train.shape[1:], (X.shape[1]//2,))
siamese_net.fit(x=[formations_home_train, attr_home_train, formations_away_train, attr_away_train],
                y=label_train,
                validation_data=([formations_home_test, attr_home_test, formations_away_test, attr_away_test], label_test),
                batch_size=2, epochs=10)

Train on 18633 samples, validate on 4659 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x1a47ca8908>

Get the "predictions" of the siamese net as inputs to train the Dense Net.

In [13]:
siamese_train_predictions = siamese_net.predict(x=[images_A, images_B])

In [24]:
pred = np.argmax(siamese_train_predictions, axis=1)
true = np.argmax(label_data_formation, axis=1)
print(np.sum(pred==true)/pred.shape[0])


0.3697406834964795


# Construct Dense Net with Output of Siamese Net

# Train Dense Net with Output of Siamese Net

Train with k-fold cross validation.

In [83]:
num_splits = 3
skf = StratifiedKFold(n_splits=num_splits)
for i, (train_index, val_index) in enumerate(skf.split(data_formation, label_data_formation.argmax(1))):
    
    # we need to split the images into home and away teams
    data_formation_train_kf, siamese_predictions_train_kf = data_formation[train_index], siamese_train_predictions[train_index]
    data_formation_val_kf, siamese_predictions_val_kf= data_formation[val_index], siamese_train_predictions[val_index]
    
    y_train_kf, y_val_kf = label_data_formation[train_index], label_data_formation[val_index]
    
    dense_net = get_dense_model(data_formation[0].shape, siamese_train_predictions[0].shape)
    dense_net.fit(x=[data_formation_train_kf, siamese_predictions_train_kf], y=y_train_kf, batch_size=16, epochs=1)
    
    print(dense_net.evaluate(x=[data_formation_val_kf, siamese_predictions_val_kf], y=y_val_kf))

Epoch 1/1
1.0986455347905264
Epoch 1/1
1.0987146531473049
Epoch 1/1
1.0987583121830873


Train the dense net with the entire dataset.

In [81]:
dense_net = get_dense_model(data_formation[0].shape, siamese_train_predictions[0].shape)
dense_net.fit(x=[data_formation, siamese_train_predictions], y=label_data_formation, batch_size=16, epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x7fdbd8a3d748>