# Compare betting companies

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [2]:
df = pd.read_csv('../data/matches.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22467 entries, 0 to 22466
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   id                22467 non-null  int64  
 1   date              22467 non-null  object 
 2   season_name       22467 non-null  object 
 3   match_api_id      22467 non-null  int64  
 4   home_team_api_id  22467 non-null  int64  
 5   away_team_api_id  22467 non-null  int64  
 6   win_lose_draw     22467 non-null  object 
 7   B365H             22467 non-null  float64
 8   B365D             22467 non-null  float64
 9   B365A             22467 non-null  float64
 10  BWH               22467 non-null  float64
 11  BWD               22467 non-null  float64
 12  BWA               22467 non-null  float64
 13  IWH               22467 non-null  float64
 14  IWD               22467 non-null  float64
 15  IWA               22467 non-null  float64
 16  LBH               22467 non-null  float6

In [3]:
feature_cols = list(
    set(df.columns.tolist()) - 
    {'id', 'date', 'match_api_id', 'home_team_api_id', 'away_team_api_id', 'win_lose_draw', 'season_name'}
    )
target_label = 'win_lose_draw'

X_B365 = df[[feature for feature in feature_cols if feature.startswith('B365')]]
X_BW = df[[feature for feature in feature_cols if feature.startswith('BW')]]
X_IW = df[[feature for feature in feature_cols if feature.startswith('IW')]]
X_LB = df[[feature for feature in feature_cols if feature.startswith('LB')]]

y = df[target_label]

In [33]:
print(pd.concat([X_B365, y], axis=1).head())
print(X_BW.head())
print(X_IW.head())
print(X_LB.head())

   B365D  B365H  B365A win_lose_draw
0   3.40   1.73   5.00             D
1   3.20   1.95   3.60             D
2   3.30   2.38   2.75             A
3   3.75   1.44   7.50             H
4   3.50   5.00   1.65             A
    BWA   BWH   BWD
0  4.20  1.75  3.35
1  3.95  1.80  3.30
2  2.55  2.40  3.30
3  6.80  1.40  4.00
4  1.60  5.00  3.50
   IWD   IWH  IWA
0  3.2  1.85  3.5
1  3.2  1.90  3.5
2  3.1  2.60  2.3
3  3.9  1.40  6.0
4  3.3  4.00  1.7
   LBD   LBH   LBA
0  3.3  1.80  3.75
1  3.2  1.90  3.50
2  3.2  2.50  2.50
3  3.6  1.44  6.50
4  3.4  4.00  1.72


## Non-Linear Multi Layer Perceptron, 3 hidden layers

In [4]:
from sklearn.neural_network import MLPClassifier
from numpy import mean, std
from sklearn.model_selection import KFold, cross_val_score

### B356

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X_B365, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
mlp = MLPClassifier(random_state=42, activation='relu', hidden_layer_sizes=(10, 10, 10))
mlp.fit(X_train, y_train)

print('Accuracy of MLP classifier on training set: {:.2f}'
     .format(mlp.score(X_train, y_train)))
print('Accuracy of MLP classifier on test set: {:.2f}'
     .format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.53
Accuracy of MLP classifier on test set: 0.54


In [7]:
scores = cross_val_score(mlp, X_B365, y, cv=KFold(n_splits=10, random_state=42, shuffle=True))
print('Accuracy: %.5f (%.5f)' % (mean(scores), std(scores)))

Accuracy: 0.53198 (0.01047)


### BW

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X_BW, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
mlp = MLPClassifier(random_state=42, activation='relu', hidden_layer_sizes=(10, 10, 10))
mlp.fit(X_train, y_train)

print('Accuracy of MLP classifier on training set: {:.2f}'
     .format(mlp.score(X_train, y_train)))
print('Accuracy of MLP classifier on test set: {:.2f}'
     .format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.53
Accuracy of MLP classifier on test set: 0.54


In [10]:
scores = cross_val_score(mlp, X_BW, y, cv=KFold(n_splits=10, random_state=42, shuffle=True))
print('Accuracy: %.5f (%.5f)' % (mean(scores), std(scores)))

Accuracy: 0.53162 (0.01124)


### IW

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X_IW, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [12]:
mlp = MLPClassifier(random_state=42, activation='relu', hidden_layer_sizes=(10, 10, 10))
mlp.fit(X_train, y_train)

print('Accuracy of MLP classifier on training set: {:.2f}'
     .format(mlp.score(X_train, y_train)))
print('Accuracy of MLP classifier on test set: {:.2f}'
     .format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.53
Accuracy of MLP classifier on test set: 0.53


In [13]:
scores = cross_val_score(mlp, X_IW, y, cv=KFold(n_splits=10, random_state=42, shuffle=True))
print('Accuracy: %.5f (%.5f)' % (mean(scores), std(scores)))

Accuracy: 0.53033 (0.01127)


### LB

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X_LB, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [15]:
mlp = MLPClassifier(random_state=42, activation='relu', hidden_layer_sizes=(10, 10, 10))
mlp.fit(X_train, y_train)

print('Accuracy of MLP classifier on training set: {:.2f}'
     .format(mlp.score(X_train, y_train)))
print('Accuracy of MLP classifier on test set: {:.2f}'
     .format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.53
Accuracy of MLP classifier on test set: 0.53


In [16]:
scores = cross_val_score(mlp, X_LB, y, cv=KFold(n_splits=10, random_state=42, shuffle=True))
print('Accuracy: %.5f (%.5f)' % (mean(scores), std(scores)))

Accuracy: 0.53051 (0.01090)


## Linear Multi Layer Perceptron, one hidden layer

### B356

In [17]:
X_train, X_test, y_train, y_test = train_test_split(X_B365, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [18]:
mlp = MLPClassifier(random_state=42, activation='identity', hidden_layer_sizes=(100,))
mlp.fit(X_train, y_train)

print('Accuracy of MLP classifier on training set: {:.2f}'
     .format(mlp.score(X_train, y_train)))
print('Accuracy of MLP classifier on test set: {:.2f}'
     .format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.53
Accuracy of MLP classifier on test set: 0.53


In [19]:
scores = cross_val_score(mlp, X_B365, y, cv=KFold(n_splits=10, random_state=42, shuffle=True))
print('Accuracy: %.5f (%.5f)' % (mean(scores), std(scores)))

Accuracy: 0.53216 (0.01081)


### BW

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X_BW, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [21]:
mlp = MLPClassifier(random_state=42, activation='identity', hidden_layer_sizes=(100,))
mlp.fit(X_train, y_train)

print('Accuracy of MLP classifier on training set: {:.2f}'
     .format(mlp.score(X_train, y_train)))
print('Accuracy of MLP classifier on test set: {:.2f}'
     .format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.53
Accuracy of MLP classifier on test set: 0.53


In [22]:
scores = cross_val_score(mlp, X_BW, y, cv=KFold(n_splits=10, random_state=42, shuffle=True))
print('Accuracy: %.5f (%.5f)' % (mean(scores), std(scores)))

Accuracy: 0.53082 (0.01009)


### IW

In [23]:
X_train, X_test, y_train, y_test = train_test_split(X_IW, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [24]:
mlp = MLPClassifier(random_state=42, activation='identity', hidden_layer_sizes=(100,))
mlp.fit(X_train, y_train)

print('Accuracy of MLP classifier on training set: {:.2f}'
     .format(mlp.score(X_train, y_train)))
print('Accuracy of MLP classifier on test set: {:.2f}'
     .format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.53
Accuracy of MLP classifier on test set: 0.53


In [25]:
scores = cross_val_score(mlp, X_IW, y, cv=KFold(n_splits=10, random_state=42, shuffle=True))
print('Accuracy: %.5f (%.5f)' % (mean(scores), std(scores)))

Accuracy: 0.52878 (0.01265)


### LB

In [26]:
X_train, X_test, y_train, y_test = train_test_split(X_LB, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [27]:
mlp = MLPClassifier(random_state=42, activation='identity', hidden_layer_sizes=(100,))
mlp.fit(X_train, y_train)

print('Accuracy of MLP classifier on training set: {:.2f}'
     .format(mlp.score(X_train, y_train)))
print('Accuracy of MLP classifier on test set: {:.2f}'
     .format(mlp.score(X_test, y_test)))

Accuracy of MLP classifier on training set: 0.53
Accuracy of MLP classifier on test set: 0.53


In [28]:
scores = cross_val_score(mlp, X_LB, y, cv=KFold(n_splits=10, random_state=42, shuffle=True))
print('Accuracy: %.5f (%.5f)' % (mean(scores), std(scores)))

Accuracy: 0.53016 (0.01125)


# Conclusion

It seems that for all the betting companies, the MLP model performs the same, regardless of the non-linear or linear model and the number of layers.
