# Implementation of baseline using Artificial Neural Network with Original + Win-loss + feature vectors feature set and BL

Necessary files: logistic_regression_functions.py, ann_functions.py, testing_functions.py, win_loss_functions.py, feature_vectors_functions, BL_17.csv, BL_18.csv, BL_19.csv, A_BL.csv, H_BL.csv, A_BL_before_threshold.csv, H_BL_before_threshold.csv

In [None]:
import logistic_regression_functions
import ann_functions
import testing_functions
import win_loss_functions
import feature_vectors_functions
import pandas as pd
from datetime import datetime
import requests
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import *
from tqdm import tqdm

The baseline is implemented according to the paper An Improved Prediction System for Football a Match Result bu C. P. Igiri and E. O. Nwachukwu from 2014. The original features as described in the paper are used and the model is evaluated on the BL dataset.

The authors used only one season for training, so I did the same. I used seasons 2016/17 for training season 2017/18 as validation set and the second half of season 2018/19 for testing. The validation set is used to select the best checkpoint of the training according to the validation accuracy.

In [None]:
X_train_win_loss, y_train = win_loss_functions.create_data(['BL_17.csv'], skip_rounds = 6, return_names=True)
results_val_win_loss, matches_per_round = win_loss_functions.create_data_single('BL_18.csv', ['BL_17.csv'], skip_rounds = 6, return_names=True)
# Dates are returned as well for dividing testing season into slices
results_test_win_loss, matches_per_round = win_loss_functions.create_data_single('BL_19.csv', ['BL_17.csv', 'BL_18.csv'],
                                return_dates=True, skip_rounds = 6, return_names=True)

Processing BL_17.csv season file.


In [None]:
results_train_originals, matches_per_round = ann_functions.create_data_single('BL_17.csv', 19, 17,
                                   logistic_regression_functions.team_names_map_bl,
                                   logistic_regression_functions.secondary_team_names_map_bl)
results_val_originals, matches_per_round = ann_functions.create_data_single('BL_18.csv', 19, 18,
                                  logistic_regression_functions.team_names_map_bl,
                                  logistic_regression_functions.secondary_team_names_map_bl)
# Dates are returned as well for dividing testing season into slices
results_test_originals, matches_per_round = ann_functions.create_data_single('BL_19.csv', 19, 19,
                                  logistic_regression_functions.team_names_map_bl,
                                  logistic_regression_functions.secondary_team_names_map_bl,
                                  return_dates=True)
X_train_originals = results_train_originals.drop('FTR', axis=1)
y_train_originals = results_train_originals['FTR']

In [None]:
# Concatenating the two feature sets together
X_train_mix = pd.concat([X_train_originals, X_train_win_loss], axis=1)
results_val_mix = pd.concat([results_val_originals, results_val_win_loss], axis=1)
results_test_mix = pd.concat([results_test_originals, results_test_win_loss], axis=1)
X_train_mix = X_train_mix.loc[:,~X_train_mix.columns.duplicated()].copy()
results_val_mix = results_val_mix.loc[:,~results_val_mix.columns.duplicated()].copy()
results_test_mix = results_test_mix.loc[:,~results_test_mix.columns.duplicated()].copy()

In [None]:
A = pd.read_csv('A_BL.csv')
H = pd.read_csv('H_BL.csv')
A_before_threshold = pd.read_csv('A_BL_before_threshold.csv')
H_before_threshold = pd.read_csv('H_BL_before_threshold.csv')

In [None]:
# Adding the feature vectors to the features
X_train = feature_vectors_functions.add_feature_vector(X_train_mix, A, H)
results_val = feature_vectors_functions.add_feature_vector(results_val_mix, A, H)
results_test = feature_vectors_functions.add_feature_vector(results_test_mix, A, H)

In [None]:
X_val = results_val.drop('FTR', axis=1)
y_val = results_val['FTR']
# The classes need to go from 0 to 2 not from -1 to 1.
y_train += 1
y_val += 1

They didnt say what ANN architecture they used. I used 3 dense layers with 64, 32 and 16 neurons. I used dropout to overcome overfitting.

I always let the model train for 100 epochs and took the weights from the best epoch according to validation accuracy.

To use all the data available, the first half of the testing season was added to the training data.

In [None]:
# Some rounds in the beginning are ignored, this is the correct index
# of the start of the second half of the season
start_test_index = 11 * matches_per_round

In [None]:
X_test_to_append, y_test_to_append = testing_functions.prepare_test_to_append(results_test,
                                                                              start_test_index)
y_test_to_append += 1

In [None]:
# Adding 1st half of testing season to the training data
X_train = pd.concat([X_train, X_test_to_append])
y_train = pd.concat([y_train, y_test_to_append])
# Rounds of the testing dataset
slices = testing_functions.get_slices(results_test, matches_per_round,
                                      start_test_index)
weighted_sum = 0
sum = 0
for slc in tqdm(slices):
  # Creating the model
  model = ann_functions.func_model((X_train.shape[1],))
  model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
  mc = tf.keras.callbacks.ModelCheckpoint('./weights_model.h5',
                                     monitor='val_accuracy',
                                     save_weights_only=True,
                                     save_best_only=True)
  X_test = slc.drop(['FTR', 'Date'], axis=1)
  y_test = slc['FTR']
  y_test += 1
  # Train the model
  history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=8,
                    validation_data=(X_val, y_val),
                    verbose = 0,
                    callbacks=[mc])
  # Load the best checkpoint
  model.load_weights('weights_model.h5')
  weighted_sum += (model.evaluate(X_test, y_test)[1] * len(y_test))
  sum += len(y_test)
  # Add the round to the training dataset
  X_train = pd.concat([X_train, X_test])
  y_train = pd.concat([y_train, y_test])
print('')
print(weighted_sum / sum)

  0%|          | 0/13 [00:00<?, ?it/s]



  8%|▊         | 1/13 [00:21<04:14, 21.24s/it]



 15%|█▌        | 2/13 [00:38<03:29, 19.04s/it]



 23%|██▎       | 3/13 [00:59<03:20, 20.05s/it]



 31%|███       | 4/13 [01:21<03:04, 20.53s/it]



 38%|███▊      | 5/13 [01:42<02:46, 20.79s/it]



 46%|████▌     | 6/13 [02:03<02:26, 20.95s/it]



 54%|█████▍    | 7/13 [02:21<01:59, 19.96s/it]



 62%|██████▏   | 8/13 [02:39<01:36, 19.34s/it]



 69%|██████▉   | 9/13 [03:00<01:19, 19.94s/it]



 77%|███████▋  | 10/13 [03:19<00:58, 19.40s/it]



 85%|████████▍ | 11/13 [03:40<00:39, 19.97s/it]



 92%|█████████▏| 12/13 [04:01<00:20, 20.35s/it]



100%|██████████| 13/13 [04:20<00:00, 20.07s/it]


0.5686274640310823





The testing accuracy is 56.86%.

Experimenting with feature vectors extracted before applying the threshold.

In [None]:
# y_train was changed, load it again
X_train_win_loss, y_train = win_loss_functions.create_data(['BL_17.csv'], skip_rounds = 6, return_names=True)

Processing BL_17.csv season file.


In [None]:
# Adding the feature vectors to the features
X_train = feature_vectors_functions.add_feature_vector(X_train_mix, A_before_threshold, H_before_threshold)
results_val = feature_vectors_functions.add_feature_vector(results_val_mix, A_before_threshold, H_before_threshold)
results_test = feature_vectors_functions.add_feature_vector(results_test_mix, A_before_threshold, H_before_threshold)

In [None]:
X_val = results_val.drop('FTR', axis=1)
y_val = results_val['FTR']
# The classes need to go from 0 to 2 not from -1 to 1.
y_train += 1
y_val += 1

In [None]:
# Some rounds in the beginning are ignored, this is the correct index
# of the start of the second half of the season
start_test_index = 11 * matches_per_round

In [None]:
X_test_to_append, y_test_to_append = testing_functions.prepare_test_to_append(results_test,
                                                                              start_test_index)
y_test_to_append += 1

In [None]:
# Adding 1st half of testing season to the training data
X_train = pd.concat([X_train, X_test_to_append])
y_train = pd.concat([y_train, y_test_to_append])
# Rounds of the testing dataset
slices = testing_functions.get_slices(results_test, matches_per_round,
                                      start_test_index)
weighted_sum = 0
sum = 0
for slc in tqdm(slices):
  # Creating the model
  model = ann_functions.func_model((X_train.shape[1],))
  model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
  mc = tf.keras.callbacks.ModelCheckpoint('./weights_model.h5',
                                     monitor='val_accuracy',
                                     save_weights_only=True,
                                     save_best_only=True)
  X_test = slc.drop(['FTR', 'Date'], axis=1)
  y_test = slc['FTR']
  y_test += 1
  # Train the model
  history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=8,
                    validation_data=(X_val, y_val),
                    verbose = 0,
                    callbacks=[mc])
  # Load the best checkpoint
  model.load_weights('weights_model.h5')
  weighted_sum += (model.evaluate(X_test, y_test)[1] * len(y_test))
  sum += len(y_test)
  # Add the round to the training dataset
  X_train = pd.concat([X_train, X_test])
  y_train = pd.concat([y_train, y_test])
print('')
print(weighted_sum / sum)

  0%|          | 0/13 [00:00<?, ?it/s]



  8%|▊         | 1/13 [00:15<03:05, 15.48s/it]



 15%|█▌        | 2/13 [00:36<03:27, 18.87s/it]



 23%|██▎       | 3/13 [00:58<03:19, 19.99s/it]



 31%|███       | 4/13 [01:19<03:04, 20.52s/it]



 38%|███▊      | 5/13 [01:35<02:32, 19.02s/it]



 46%|████▌     | 6/13 [01:56<02:18, 19.78s/it]



 54%|█████▍    | 7/13 [02:13<01:52, 18.75s/it]



 62%|██████▏   | 8/13 [02:30<01:30, 18.04s/it]



 69%|██████▉   | 9/13 [02:51<01:16, 19.04s/it]



 77%|███████▋  | 10/13 [03:12<00:59, 19.72s/it]



 85%|████████▍ | 11/13 [03:33<00:40, 20.19s/it]



 92%|█████████▏| 12/13 [03:52<00:19, 19.58s/it]



100%|██████████| 13/13 [04:10<00:00, 19.24s/it]


0.5686274599405675





The testing accuracy is 56.86%.

Experimenting with feature vectors including home feature vectors for the away teams and away feature vectors for the home teams.

In [None]:
# y_train was changed, load it again
X_train_win_loss, y_train = win_loss_functions.create_data(['BL_17.csv'], skip_rounds = 6, return_names=True)

Processing BL_17.csv season file.


In [None]:
# Adding the feature vectors to the features
X_train = feature_vectors_functions.add_feature_vector(X_train_mix, A, H, include_all=True)
results_val = feature_vectors_functions.add_feature_vector(results_val_mix, A, H, include_all=True)
results_test = feature_vectors_functions.add_feature_vector(results_test_mix, A, H, include_all=True)

In [None]:
X_val = results_val.drop('FTR', axis=1)
y_val = results_val['FTR']
# The classes need to go from 0 to 2 not from -1 to 1.
y_train += 1
y_val += 1

In [None]:
# Some rounds in the beginning are ignored, this is the correct index
# of the start of the second half of the season
start_test_index = 11 * matches_per_round

In [None]:
X_test_to_append, y_test_to_append = testing_functions.prepare_test_to_append(results_test,
                                                                              start_test_index)
y_test_to_append += 1

In [None]:
# Adding 1st half of testing season to the training data
X_train = pd.concat([X_train, X_test_to_append])
y_train = pd.concat([y_train, y_test_to_append])
# Rounds of the testing dataset
slices = testing_functions.get_slices(results_test, matches_per_round,
                                      start_test_index)
weighted_sum = 0
sum = 0
for slc in tqdm(slices):
  # Creating the model
  model = ann_functions.func_model((X_train.shape[1],))
  model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
  mc = tf.keras.callbacks.ModelCheckpoint('./weights_model.h5',
                                     monitor='val_accuracy',
                                     save_weights_only=True,
                                     save_best_only=True)
  X_test = slc.drop(['FTR', 'Date'], axis=1)
  y_test = slc['FTR']
  y_test += 1
  # Train the model
  history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=8,
                    validation_data=(X_val, y_val),
                    verbose = 0,
                    callbacks=[mc])
  # Load the best checkpoint
  model.load_weights('weights_model.h5')
  weighted_sum += (model.evaluate(X_test, y_test)[1] * len(y_test))
  sum += len(y_test)
  # Add the round to the training dataset
  X_train = pd.concat([X_train, X_test])
  y_train = pd.concat([y_train, y_test])
print('')
print(weighted_sum / sum)

  0%|          | 0/13 [00:00<?, ?it/s]



  8%|▊         | 1/13 [00:21<04:14, 21.21s/it]



 15%|█▌        | 2/13 [00:42<03:53, 21.21s/it]



 23%|██▎       | 3/13 [01:03<03:31, 21.20s/it]



 31%|███       | 4/13 [01:24<03:10, 21.21s/it]



 38%|███▊      | 5/13 [01:46<02:50, 21.35s/it]



 46%|████▌     | 6/13 [02:07<02:29, 21.31s/it]



 54%|█████▍    | 7/13 [02:23<01:58, 19.68s/it]



 62%|██████▏   | 8/13 [02:40<01:32, 18.60s/it]



 69%|██████▉   | 9/13 [03:01<01:17, 19.42s/it]



 77%|███████▋  | 10/13 [03:18<00:56, 18.74s/it]



 85%|████████▍ | 11/13 [03:36<00:36, 18.41s/it]



 92%|█████████▏| 12/13 [03:54<00:18, 18.20s/it]



100%|██████████| 13/13 [04:12<00:00, 19.40s/it]


0.5620915133968677





The testing accuracy is 56.21%.

Experimenting with feature vectors extracted before applying the threshold and including home feature vectors for the away teams and away feature vectors for the home teams.

In [None]:
# y_train was changed, load it again
X_train_win_loss, y_train = win_loss_functions.create_data(['BL_17.csv'], skip_rounds = 6, return_names=True)

Processing BL_17.csv season file.


In [None]:
# Adding the feature vectors to the features
X_train = feature_vectors_functions.add_feature_vector(X_train_mix, A_before_threshold, H, include_all=True)
results_val = feature_vectors_functions.add_feature_vector(results_val_mix, A_before_threshold, H_before_threshold, include_all=True)
results_test = feature_vectors_functions.add_feature_vector(results_test_mix, A_before_threshold, H_before_threshold, include_all=True)

In [None]:
X_val = results_val.drop('FTR', axis=1)
y_val = results_val['FTR']
# The classes need to go from 0 to 2 not from -1 to 1.
y_train += 1
y_val += 1

In [None]:
# Some rounds in the beginning are ignored, this is the correct index
# of the start of the second half of the season
start_test_index = 11 * matches_per_round

In [None]:
X_test_to_append, y_test_to_append = testing_functions.prepare_test_to_append(results_test,
                                                                              start_test_index)
y_test_to_append += 1

In [None]:
# Adding 1st half of testing season to the training data
X_train = pd.concat([X_train, X_test_to_append])
y_train = pd.concat([y_train, y_test_to_append])
# Rounds of the testing dataset
slices = testing_functions.get_slices(results_test, matches_per_round,
                                      start_test_index)
weighted_sum = 0
sum = 0
for slc in tqdm(slices):
  # Creating the model
  model = ann_functions.func_model((X_train.shape[1],))
  model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
  mc = tf.keras.callbacks.ModelCheckpoint('./weights_model.h5',
                                     monitor='val_accuracy',
                                     save_weights_only=True,
                                     save_best_only=True)
  X_test = slc.drop(['FTR', 'Date'], axis=1)
  y_test = slc['FTR']
  y_test += 1
  # Train the model
  history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=8,
                    validation_data=(X_val, y_val),
                    verbose = 0,
                    callbacks=[mc])
  # Load the best checkpoint
  model.load_weights('weights_model.h5')
  weighted_sum += (model.evaluate(X_test, y_test)[1] * len(y_test))
  sum += len(y_test)
  # Add the round to the training dataset
  X_train = pd.concat([X_train, X_test])
  y_train = pd.concat([y_train, y_test])
print('')
print(weighted_sum / sum)

  0%|          | 0/13 [00:00<?, ?it/s]



  8%|▊         | 1/13 [00:15<03:08, 15.75s/it]



 15%|█▌        | 2/13 [00:31<02:52, 15.65s/it]



 23%|██▎       | 3/13 [00:47<02:37, 15.71s/it]



 31%|███       | 4/13 [01:08<02:40, 17.89s/it]



 38%|███▊      | 5/13 [01:29<02:32, 19.09s/it]



 46%|████▌     | 6/13 [01:50<02:18, 19.84s/it]



 54%|█████▍    | 7/13 [02:07<01:53, 18.94s/it]



 62%|██████▏   | 8/13 [02:29<01:38, 19.66s/it]



 69%|██████▉   | 9/13 [02:50<01:20, 20.15s/it]



 77%|███████▋  | 10/13 [03:07<00:57, 19.31s/it]



 85%|████████▍ | 11/13 [03:29<00:39, 19.90s/it]



 92%|█████████▏| 12/13 [03:47<00:19, 19.38s/it]



100%|██████████| 13/13 [04:08<00:00, 19.11s/it]


0.5686274611092861





The testing accuracy is 56.86%.