In [1]:
# General libraries
import pandas as pd
import numpy as np
import warnings
import Utilities as ut
warnings.filterwarnings("ignore")

# Neural network libraries
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping

# Machine Learning libraries
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

#Open the dataset
df = pd.read_csv("dataset/Other/dataset.csv", index_col = [0])
#Drop useless columns
df = df.drop(["MW", "home_team_name", "away_team_name"],1)

Using TensorFlow backend.


### Prepare X

In [2]:
scaler = StandardScaler()

# Scaling only certains rows (excluding dummies)
df[ut.scale_col] = scaler.fit_transform(df[ut.scale_col])

# Dropping our goal
X = df.drop('Result', 1)

### Prepare y

In [3]:
y = df.Result

le = LabelEncoder()

# Normalize the y
le.fit(["A","D","H"])
y = le.transform(y)

# Reshape the array 
transformed_y = y.reshape(-1, 1)

# Trasform the 3 classes in matrix
ohe = OneHotEncoder()
y = ohe.fit_transform(transformed_y).toarray()

array([[0., 1., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       ...,
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.]])

In [4]:
#Split of the scaled dataset in train and test

# Split in 80/20 the dataframe
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size = 0.20, random_state = 42)

## Tuning parameters

In [5]:
# Set early stopping monitor so the model stops training when it won't improve anymore
esm = EarlyStopping(monitor = 'val_loss', patience = 7)

# Set the optimizer
adam = optimizers.Adam(learning_rate = 0.0001)

## Building a Neural Network
We build a Neural Network in order to predict football match results.

<img src ="https://upload.wikimedia.org/wikipedia/commons/thumb/e/e4/Artificial_neural_network.svg/440px-Artificial_neural_network.svg.png">

In [6]:
# Create a Sequential model
model = Sequential()

# Create the input layer 
model.add(Dense(y.shape[1]*16, input_shape = (X.shape[1],),
                activation = 'relu'))
# Dropping neurons
model.add(Dropout(0.5, seed = 23))

# Create a hidden layer
model.add(Dense(y.shape[1]*8, activation = 'relu'))

# Create the output layer
model.add(Dense(y.shape[1], activation = 'softmax')) # Away, Draw or Home
          
# Compile the module
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics = ['accuracy'])

# Train and fit the model
model.fit(X_train, y_train, batch_size = 100, validation_split = 0.01, epochs = 250,
         verbose = 0, callbacks = [esm])

<keras.callbacks.callbacks.History at 0x14353b550>

In [26]:
# Evaluate the keras model
accuracy =  model.evaluate(X_test, y_test)
print(accuracy[1])

0.5696969628334045


## Visualization

In [7]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 48)                4704      
_________________________________________________________________
dropout_1 (Dropout)          (None, 48)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 24)                1176      
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 75        
Total params: 5,955
Trainable params: 5,955
Non-trainable params: 0
_________________________________________________________________


## There is room for improvement

<b>Neural Networks</b> perform better with a larger dataset and with some particular circumstances. The model tends to overfit and tuning hyperparameters could help to avoid this risk. Furthermore, it looks like the dropout doesn't work or even makes the situation worse: a dataframe with more rows would help to prevent this and would perform better with the dropout.