In [67]:
import numpy as np
import pandas as pd
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder

# Load data

The first cell loads the encoded images and splits them into two (home/away). The images are of shape 9x11x3 and are inputs for the siamese net. The second cell loads the match data. 

In [68]:
images = np.load("data/formations.npy")

In [69]:
data_formation = pd.read_csv("data/X_resampled_formation.csv").values
label_data_formation = pd.read_csv("data/y_resampled_formation.csv").values

# Construct Siamese Model

In [70]:
def get_siamese_model(input_shape):
    """
        Model architecture
    """
    
    # Define the tensors for the two input images
    left_input = Input(input_shape)
    right_input = Input(input_shape)
    
    # Convolutional Neural Network
    model = Sequential()
    model.add(Conv2D(64, (2,2), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D())
    model.add(Flatten())
    model.add(Dense(128, activation='sigmoid'))
    
    # Generate the encodings (feature vectors) for the two images
    encoded_l = model(left_input)
    encoded_r = model(right_input)
    
    # Add a customized layer to compute the absolute difference between the encodings
    L1_layer = Lambda(lambda tensors:K.abs(tensors[0] - tensors[1]))
    L1_distance = L1_layer([encoded_l, encoded_r])
    
    # Add a dense layer with a softmax unit to generate the probabilities of home and away team winning
    prediction = Dense(3,activation='softmax')(L1_distance)
    
    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[left_input,right_input], outputs=prediction)
    optimizer = Adam(lr = 0.00006)
    siamese_net.compile(loss="binary_crossentropy",optimizer=optimizer)
    # return the model
    return siamese_net

# Train the Siamese Net

Train with k-fold cross validation. 

In [71]:
num_splits = 3
skf = StratifiedKFold(n_splits=num_splits)
for i, (train_index, val_index) in enumerate(skf.split(images, label_data_formation.argmax(1))):
    
    # we need to split the images into home and away teams
    images_A_train_kf, images_B_train_kf = images[train_index, 0, :, :, :], images[train_index, 1, :, :, :]
    images_A_val_kf, images_B_val_kf= images[val_index, 0, :, :, :], images[val_index, 1, :, :, :]
    
    y_train_kf, y_val_kf = label_data_formation[train_index], label_data_formation[val_index]
    
    siamese_net = get_siamese_model(images_A_train_kf[0].shape)
    siamese_net.fit(x=[images_A_train_kf, images_B_train_kf], y=y_train_kf, batch_size=16, epochs=1)
    
    print(siamese_net.evaluate(x=[images_A_val_kf, images_B_val_kf], y=y_val_kf))

Epoch 1/1
0.6363654960252033
Epoch 1/1
0.6363200212433927
Epoch 1/1
0.6360298367212385


Train with entire training dataset.

In [76]:
# split images into home/away teams
images_A = images[:, 0, :, :, :]
images_B = images[:, 1, :, :, :]

siamese_net = get_siamese_model(images_A[0].shape)
siamese_net.fit(x=[images_A, images_B], y=label_data_formation, batch_size=16, epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x7fdbdbc3dac8>

Get the "predictions" of the siamese net as inputs to train the Dense Net.

In [77]:
siamese_train_predictions = siamese_net.predict(x=[images_A, images_B])

# Construct Dense Net with Output of Siamese Net

In [80]:
def get_dense_model(team_data_input_shape, siamese_model_output_shape):
    """
        Model architecture
    """
    
    input1 = keras.layers.Input(shape=team_data_input_shape)
    input2 = keras.layers.Input(shape=siamese_model_output_shape)
    concat_inputs = keras.layers.Concatenate()([input1, input2])
    
    out = Dense(units=256, activation='softmax')(concat_inputs)
    out = Dense(units=3, activation='softmax')(out)
    
    
    dense_net = keras.models.Model(inputs=[input1, input2], outputs=out)
    dense_net.compile(loss='categorical_crossentropy', optimizer='adam')
    return dense_net

# Train Dense Net with Output of Siamese Net

Train with k-fold cross validation.

In [83]:
num_splits = 3
skf = StratifiedKFold(n_splits=num_splits)
for i, (train_index, val_index) in enumerate(skf.split(data_formation, label_data_formation.argmax(1))):
    
    # we need to split the images into home and away teams
    data_formation_train_kf, siamese_predictions_train_kf = data_formation[train_index], siamese_train_predictions[train_index]
    data_formation_val_kf, siamese_predictions_val_kf= data_formation[val_index], siamese_train_predictions[val_index]
    
    y_train_kf, y_val_kf = label_data_formation[train_index], label_data_formation[val_index]
    
    dense_net = get_dense_model(data_formation[0].shape, siamese_train_predictions[0].shape)
    dense_net.fit(x=[data_formation_train_kf, siamese_predictions_train_kf], y=y_train_kf, batch_size=16, epochs=1)
    
    print(dense_net.evaluate(x=[data_formation_val_kf, siamese_predictions_val_kf], y=y_val_kf))

Epoch 1/1
1.0986455347905264
Epoch 1/1
1.0987146531473049
Epoch 1/1
1.0987583121830873


Train the dense net with the entire dataset.

In [81]:
dense_net = get_dense_model(data_formation[0].shape, siamese_train_predictions[0].shape)
dense_net.fit(x=[data_formation, siamese_train_predictions], y=label_data_formation, batch_size=16, epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x7fdbd8a3d748>