# Implementation of baseline using Artificial Neural Network with Original + Win-loss feature set and BSA

Necessary files: logistic_regression_functions.py, ann_functions.py, testing_functions.py, win_loss_functions.py, BRA.csv

In [None]:
import logistic_regression_functions
import ann_functions
import testing_functions
import win_loss_functions
import pandas as pd
from datetime import datetime
import requests
import numpy as np
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras.layers import *
from tqdm import tqdm

The baseline is implemented according to the paper An Improved Prediction System for Football a Match Result bu C. P. Igiri and E. O. Nwachukwu from 2014. The original features as described in the paper (except for few ones which are not available for the BSA) are used and the model is evaluated on the BSA dataset.

The authors used only one season for training, so I did the same. I used seasons 2017 for training season 2018 as validation set and the second half of season 2019 for testing. The validation set is used to select the best checkpoint of the training according to the validation accuracy.

In [None]:
full_dataset = pd.read_csv('BRA.csv')
full_dataset.rename(columns = {'Home':'HomeTeam', 'Away': 'AwayTeam',
                               'HG': 'FTHG', 'AG': 'FTAG', 'Res': 'FTR',
                               'PH': 'B365H', 'PD': 'B365D',
                               'PA': 'B365A'}, inplace = True)
for season in full_dataset['Season'].unique():
  dataset = full_dataset[full_dataset['Season'] == season]
  dataset.to_csv('BSA_' + str(season)[-2:] + '.csv', index=False)

In [None]:
# Arguments for getting the attacks are not needed
X_train_win_loss, y_train = win_loss_functions.create_data(['BSA_17.csv'], skip_rounds = 6)
results_val_win_loss, matches_per_round = win_loss_functions.create_data_single('BSA_18.csv', ['BSA_17.csv'], skip_rounds = 6)
# Dates are returned as well for dividing testing season into slices
results_test_win_loss, matches_per_round = win_loss_functions.create_data_single('BSA_19.csv', ['BSA_17.csv', 'BSA_18.csv'],
                                return_dates=True, skip_rounds = 6)

Processing BSA_17.csv season file.


In [None]:
results_train_originals, matches_per_round = ann_functions.create_data_single('BSA_17.csv', None, None, None, None, include_stats=False)
results_val_originals, matches_per_round = ann_functions.create_data_single('BSA_18.csv', None, None, None, None, include_stats=False)
# Dates are returned as well for dividing testing season into slices
results_test_originals, matches_per_round = ann_functions.create_data_single('BSA_19.csv', None, None, None, None, include_stats=False,
                                  return_dates=True)
X_train_originals = results_train_originals.drop('FTR', axis=1)
y_train_originals = results_train_originals['FTR']

In [None]:
# Concatenating the two feature sets together
X_train = pd.concat([X_train_originals, X_train_win_loss], axis=1)
results_val = pd.concat([results_val_originals, results_val_win_loss], axis=1)
results_test = pd.concat([results_test_originals, results_test_win_loss], axis=1)
X_train = X_train.loc[:,~X_train.columns.duplicated()].copy()
results_val = results_val.loc[:,~results_val.columns.duplicated()].copy()
results_test = results_test.loc[:,~results_test.columns.duplicated()].copy()

In [None]:
X_val = results_val.drop('FTR', axis=1)
y_val = results_val['FTR']
# The classes need to go from 0 to 2 not from -1 to 1.
y_train += 1
y_val += 1

They didnt say what ANN architecture they used. I used 3 dense layers with 64, 32 and 16 neurons. I used dropout to overcome overfitting.

I always let the model train for 100 epochs and took the weights from the best epoch according to validation accuracy.

To use all the data available, the first half of the testing season was added to the training data.

In [None]:
# Some rounds in the beginning are ignored, this is the correct index
# of the start of the second half of the season
start_test_index = 13 * matches_per_round

In [None]:
X_test_to_append, y_test_to_append = testing_functions.prepare_test_to_append(results_test,
                                                                              start_test_index)
y_test_to_append += 1

For evaluation on the testing dataset I divided the testing data into rounds approximately. The model is trained on the training dataset, then it is evaluated on one round of the testing dataset and this round is added to the training dataset.

In [None]:
# Adding 1st half of testing season to the training data
X_train = pd.concat([X_train, X_test_to_append])
y_train = pd.concat([y_train, y_test_to_append])
# Rounds of the testing dataset
slices = testing_functions.get_slices(results_test, matches_per_round,
                                      start_test_index)
weighted_sum = 0
sum = 0
for slc in tqdm(slices):
  # Creating the model
  model = ann_functions.func_model((X_train.shape[1],))
  model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
  mc = tf.keras.callbacks.ModelCheckpoint('./weights_model.h5',
                                     monitor='val_accuracy',
                                     save_weights_only=True,
                                     save_best_only=True)
  X_test = slc.drop(['FTR', 'Date'], axis=1)
  y_test = slc['FTR']
  y_test += 1
  # Train the model
  history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=8,
                    validation_data=(X_val, y_val),
                    verbose = 0,
                    callbacks=[mc])
  # Load the best checkpoint
  model.load_weights('weights_model.h5')
  weighted_sum += (model.evaluate(X_test, y_test)[1] * len(y_test))
  sum += len(y_test)
  # Add the round to the training dataset
  X_train = pd.concat([X_train, X_test])
  y_train = pd.concat([y_train, y_test])
print('')
print(weighted_sum / sum)

  0%|          | 0/14 [00:00<?, ?it/s]



  7%|▋         | 1/14 [00:18<04:02, 18.66s/it]



 14%|█▍        | 2/14 [00:39<04:02, 20.24s/it]



 21%|██▏       | 3/14 [01:01<03:48, 20.76s/it]



 29%|██▊       | 4/14 [01:20<03:22, 20.24s/it]



 36%|███▌      | 5/14 [01:42<03:05, 20.65s/it]



 43%|████▎     | 6/14 [02:02<02:43, 20.49s/it]



 50%|█████     | 7/14 [02:22<02:22, 20.36s/it]



 57%|█████▋    | 8/14 [03:04<02:43, 27.31s/it]



 64%|██████▍   | 9/14 [03:25<02:06, 25.37s/it]



 71%|███████▏  | 10/14 [04:07<02:01, 30.46s/it]



 79%|███████▊  | 11/14 [04:29<01:23, 27.73s/it]



 86%|████████▌ | 12/14 [04:51<00:52, 26.04s/it]



 93%|█████████▎| 13/14 [05:13<00:24, 24.84s/it]



100%|██████████| 14/14 [05:36<00:00, 24.00s/it]


0.5052631716979178





The testing accuracy is 50.53%.