In [24]:
import numpy as np
import pandas as pd
from keras.layers import *
from keras.models import *
from keras.optimizers import *
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder

# Load data

The first cell loads the encoded images and splits them into two (home/away). The images are of shape 9x11x3 and are inputs for the siamese net. The second cell loads the match data. 

In [25]:
formations = np.load("data/formations.npy")

# Construct Siamese Model

In [26]:
def get_siamese_model(formation_input_shape, reduced_attr_input_shape):
    """
        Model architecture
    """
    
    # Define the tensors for the two input images
    home_formation_input = Input(formation_input_shape)
    away_formation_input = Input(formation_input_shape)
    
    attr_input = Input(reduced_attr_input_shape)
    
    
    # Convolutional Neural Network
    cnn = Sequential()
    cnn.add(Conv2D(16, (2,2), activation='relu', input_shape=formation_input_shape))
    cnn.add(MaxPooling2D())
    cnn.add(Flatten())
    cnn.add(Dense(64, activation='sigmoid'))
    
    # Generate the encodings (feature vectors) for the two images
    cnn_encoded_home = cnn(home_formation_input)
    cnn_encoded_away = cnn(away_formation_input)
    
    # attribute encoding MLP
    mlp0 = Sequential()
    mlp0.add(Dense(32, activation='sigmoid', input_shape=reduced_attr_input_shape))
    mlp0.add(Dense(32, activation='relu'))
    mlp0.add(Dense(64, activation='tanh'))
    
    mlp_encoded_attr = mlp0(attr_input)
    
    # prediction generating MLP
    concat = Concatenate()
    encoded_input = concat([mlp_encoded_attr, cnn_encoded_home, cnn_encoded_away])
    mlp1 = Sequential()
    mlp1.add(Dense(64, activation='tanh',input_shape=(64*3,)))
    mlp1.add(Dense(32, activation='relu'))
    mlp1.add(Dense(3,activation='softmax'))
    
    # Add a dense layer with a softmax unit to generate the probabilities of home and away team winning
    prediction = mlp1(encoded_input)
    
    # Connect the inputs with the outputs
    siamese_net = Model(inputs=[attr_input, home_formation_input, away_formation_input], outputs=prediction)
    optimizer = Adam(lr = 0.00005)
    siamese_net.compile(loss="categorical_crossentropy",optimizer=optimizer, metrics=['accuracy'])
    # return the model
    return siamese_net

# Train the Siamese Net

Train with k-fold cross validation. 

In [31]:
# split images into home/away teams
formations_home = formations[:, 0, :, :, :]
formation_away = formations[:, 1, :, :, :]
n = formations.shape[0]
m = int(n * .8)

formations_home_train = formations[:m, 0, :, :, :]
formations_away_train = formations[:m, 1, :, :, :]
formations_home_test = formations[m:, 0, :, :, :]
formations_away_test = formations[m:, 1, :, :, :]
y = pd.read_csv("data/y_resampled_formation.csv").values

for num_features in [5, 10, 25, 50, 100, 250, 500]:
    print("******************" + str(num_features) + "*********************")
    file_name_data_formation = "data/pca/" + str(num_features) + "/data_formation_" + str(num_features) + ".npy"
    file_name_test_data_formation = "data/pca/" + str(num_features) + "/test_data_formation_" + str(num_features) + ".npy"
    
    X = np.load(file_name_data_formation)
    reduced_attr_train = X[:m]
    reduced_attr_test = X[m:]
    label_train = y[:m]
    label_test = y[m:]
    siamese_net = get_siamese_model(formations_home.shape[1:], (X.shape[1],))
#     siamese_net.fit(x=[reduced_attr_train, formations_home_train, formations_away_train],
#                     y=label_train,
#                     validation_data=([reduced_attr_test, formations_home_test, formations_away_test], label_test),
#                     batch_size=8, epochs=20)
    siamese_net.predict(x=[reduced_attr_train, formations_home_train, formations_away_train])
    
# attr_home_train = X[:m, :429]
# attr_away_train = X[:m, 429:]
# attr_home_test = X[m:, :429]
# attr_away_test= X[m:, 429:]


# # print(attr_home_train.shape)
# # print(attr_away_train.shape)
# label_train = y[:m]
# label_test = y[m:]


******************5*********************


InternalError: GPU sync failed

Get the "predictions" of the siamese net as inputs to train the Dense Net.

In [13]:
siamese_train_predictions = siamese_net.predict(x=[images_A, images_B])

In [24]:
pred = np.argmax(siamese_train_predictions, axis=1)
true = np.argmax(label_data_formation, axis=1)
print(np.sum(pred==true)/pred.shape[0])


0.3697406834964795


# Construct Dense Net with Output of Siamese Net

# Train Dense Net with Output of Siamese Net

Train with k-fold cross validation.

In [83]:
num_splits = 3
skf = StratifiedKFold(n_splits=num_splits)
for i, (train_index, val_index) in enumerate(skf.split(data_formation, label_data_formation.argmax(1))):
    
    # we need to split the images into home and away teams
    data_formation_train_kf, siamese_predictions_train_kf = data_formation[train_index], siamese_train_predictions[train_index]
    data_formation_val_kf, siamese_predictions_val_kf= data_formation[val_index], siamese_train_predictions[val_index]
    
    y_train_kf, y_val_kf = label_data_formation[train_index], label_data_formation[val_index]
    
    dense_net = get_dense_model(data_formation[0].shape, siamese_train_predictions[0].shape)
    dense_net.fit(x=[data_formation_train_kf, siamese_predictions_train_kf], y=y_train_kf, batch_size=16, epochs=1)
    
    print(dense_net.evaluate(x=[data_formation_val_kf, siamese_predictions_val_kf], y=y_val_kf))

Epoch 1/1
1.0986455347905264
Epoch 1/1
1.0987146531473049
Epoch 1/1
1.0987583121830873


Train the dense net with the entire dataset.

In [81]:
dense_net = get_dense_model(data_formation[0].shape, siamese_train_predictions[0].shape)
dense_net.fit(x=[data_formation, siamese_train_predictions], y=label_data_formation, batch_size=16, epochs=1)

Epoch 1/1


<keras.callbacks.History at 0x7fdbd8a3d748>