In [230]:
import pandas as pd
import numpy as np
from bevel.linear_ordinal_regression import OrderedLogit
import matplotlib.pyplot as plt

In [237]:
def get_df(season):
    df = pd.read_csv(f'data/EPL Games {season}.csv')
    # Drop the columns we will not use
    df.drop(['Div', 'HTHG',
           'HTAG', 'HTR', 'Referee', 'HS', 'AS', 'HST', 'AST', 'HF', 'AF', 'HC',
           'AC', 'HY', 'AY', 'HR', 'AR', 'BWH', 'BWD',
           'BWA', 'IWH', 'IWD', 'IWA', 'PSH', 'PSD', 'PSA', 'WHH', 'WHD', 'WHA',
           'VCH', 'VCD', 'VCA', 'Bb1X2', 'BbMxH', 'BbAvH', 'BbMxD', 'BbAvD',
           'BbMxA', 'BbAvA', 'BbOU', 'BbMx>2.5', 'BbAv>2.5', 'BbMx<2.5',
           'BbAv<2.5', 'BbAH', 'BbAHh', 'BbMxAHH', 'BbAvAHH', 'BbMxAHA', 'BbAvAHA',
           'PSCH', 'PSCD', 'PSCA'], axis=1, inplace=True)
    df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
    df.sort_values('Date', inplace=True)
    df.reset_index(drop=True, inplace=True)
    df['logTMratio'] = np.log(df['HTMValue']/df['ATMValue'])
    df['logTMratioSeason'] = np.log(df['HTMValueSeason']/df['ATMValueSeason'])
    df['win.ord'] = np.where(df['FTHG']>df['FTAG'], 2, np.where(df['FTAG']>df['FTHG'], 0, 1))
#     df['log_pyth_wpct_ratio'] = np.log(df['pyth_wpct']/df['pyth_wpct_opp'])
    return df

We will use the 2017-2018 season as the training data and will test our model on the 2018-2019 season.

In [288]:
EPL1819 = get_df('18-19')
EPL1718 = get_df('17-18')

In [289]:
def get_results(df_train, df_test, keys):
    ol = OrderedLogit()
    ol.fit(df_train[keys], df_train['win.ord'])
    pred_prob = ol.predict_probabilities(df_test[keys])
    true_prob = np.zeros(pred_prob.shape)
    true_prob[np.arange(len(true_prob)), df_test['win.ord']] = 1
    df_final = df_test.copy()
    df_final['pred'] = ol.predict_class(df_final[keys])
    df_final['correct'] = df_final['pred']==df_final['win.ord']
    success_rate = np.mean(df_final['correct'])
    brier_score = ((pred_prob-true_prob)**2).sum(axis=1).mean()
    return success_rate, brier_score, df_final

We will create an ordinal logistic regression models to forecast the results of the 2018-2019 season based on the log of the ratio of the TMValue of the teams. We will consider the TMValues at the beginning of the season as well as the most updated TMValues at the time of the game. We will then compare with prediction of the B365 betting odds.

In [290]:
success_rate_season, brier_score_season, df_season = get_results(EPL1718, EPL1819, ['logTMratioSeason'])
success_rate_updated, brier_score_updated, df_updated = get_results(EPL1718, EPL1819, ['logTMratio'])

In [291]:
pred_prob = np.zeros((len(df_season),3))
pred_prob[:,0] = 1/df_season['B365A']
pred_prob[:,1] = 1/df_season['B365D']
pred_prob[:,2] = 1/df_season['B365H']
pred_prob = pred_prob/pred_prob.sum(axis=1, keepdims=True)
true_prob = np.zeros(pred_prob.shape)
true_prob[np.arange(len(true_prob)), df_season['win.ord']] = 1
brier_score_B365 = ((pred_prob-true_prob)**2).sum(axis=1).mean()
df_season['pred_B365'] = np.array(df_season[['B365A', 'B365D','B365H']]).argmin(axis=1)
df_season['correct_B365'] = df_season['pred_B365']==df_season['win.ord']
success_rate_B365 = np.mean(df_season['correct_B365'])

In [292]:
print(f'Success Rate TMValueSeason model: {success_rate_season*100:.2f}%')
print(f'Success Rate TMValue model: {success_rate_updated*100:.2f}%')
print(f'Success Rate B365 model: {success_rate_B365*100:.2f}%')

Success Rate TMValueSeason model: 57.63%
Success Rate TMValue model: 58.42%
Success Rate B365 model: 58.42%


In [301]:
print(f'Brier Score TMValueSeason model: {brier_score_season:.4f}')
print(f'Brier Score TMValue model: {brier_score_updated:.4f}')
print(f'Brier Score B365 model: {brier_score_B365:.4f}')

Brier Score TMValueSeason model: 0.5393
Brier Score TMValue model: 0.5391
Brier Score B365 model: 0.5221


In [308]:
print(f"TMValueSeason and TMValue models agree on  {len(df_updated[df_updated['pred'] == df_season['pred']])/len(df_updated)*100:.2f}% of the games")
print(f"TMValueSeason and B365 models agree on {len(df_season[df_season['pred'] == df_season['pred_B365']])/len(df_season)*100:.2f}% of the games")
print(f"TMValue and B365 models agree on  {len(df_updated[df_updated['pred'] == df_season['pred_B365']])/len(df_updated)*100:.2f}% of the games")

TMValueSeason and TMValue models agree on  96.32% of the games
TMValueSeason and B365 models agree on 87.11% of the games
TMValue and B365 models agree on  88.68% of the games


In [310]:
df_updated[df_season['pred']!=df_updated['pred']]

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,B365H,B365D,B365A,HTMValue,ATMValue,HTMValueSeason,ATMValueSeason,logTMratio,logTMratioSeason,win.ord,pred,correct
45,2018-09-15,Huddersfield Town,Crystal Palace,0,1,A,3.3,3.1,2.5,116.1,215.33,108.35,179.33,-0.61772,-0.503861,0,0,True
70,2018-10-05,Brighton & Hove Albion,West Ham United,1,0,H,2.7,3.3,2.87,177.0,285.5,169.1,307.5,-0.478092,-0.597985,2,2,True
109,2018-11-05,Huddersfield Town,Fulham FC,1,0,H,2.3,3.5,3.3,120.6,250.5,108.35,146.5,-0.73098,-0.301659,2,0,False
146,2018-12-05,Fulham FC,Leicester City,1,1,D,3.0,3.5,2.45,250.5,344.5,146.5,303.0,-0.318635,-0.726707,1,2,False
164,2018-12-15,Fulham FC,West Ham United,0,2,A,2.9,3.6,2.5,250.5,287.5,146.5,307.5,-0.137764,-0.74145,0,2,False
196,2018-12-30,Burnley FC,West Ham United,2,0,H,3.3,3.2,2.25,181.75,287.5,156.75,307.5,-0.458591,-0.673823,2,2,True
214,2019-01-12,Cardiff City,Huddersfield Town,0,0,D,2.25,3.25,3.6,94.25,116.1,56.4,108.35,-0.208501,-0.652898,1,2,False
220,2019-01-19,Wolverhampton Wanderers,Leicester City,4,3,H,2.37,3.2,3.4,232.05,336.0,136.8,303.0,-0.370158,-0.795213,2,2,True
222,2019-01-19,Southampton FC,Everton FC,2,1,H,2.75,3.4,2.7,249.1,435.5,228.85,348.5,-0.55864,-0.420571,2,0,False
225,2019-01-19,AFC Bournemouth,West Ham United,2,0,H,2.37,3.7,3.0,231.5,304.0,137.25,307.5,-0.272448,-0.806671,2,2,True


The TMValue model has a slightly higher success rate (3 more correct games to be precise) when compared to the TMValuesSeason model. This is expected since the TMValue is updated along the season unlike the TMValueSeason although it may not be significant. Overall, there is a great agreement between the models. Perhaps a bit surprisingly, the TMValue model has the exact same success rate as the B365 model. The difference between the two lies in the brier score: the B365 model has smaller brier score, meaning it yields better probabilities.

To try to get a sense if the TMValue leads to better results than the TMValueSeason, we will compare the success rate of both across the different seasons by simply picking the team with the highest TMValueSeason and TMValue as the winner.

In [352]:
table = []
index = []
for year in range(2011,2019):
    p1 = str(year)[-2:]
    p2 = str(year+1)[-2:]
    index.append(f'{p1}-{p2}')
    df_temp = pd.read_csv(f'data/EPL Games {p1}-{p2}.csv')
    sr_season = np.mean(np.where(df_temp['HTMValueSeason']>df_temp['ATMValueSeason'], 'H', 'A') == df_temp['FTR'])
    sr_updated = np.mean(np.where(df_temp['HTMValue']>df_temp['ATMValue'], 'H', 'A') == df_temp['FTR'])
    table.append([sr_season*100, sr_updated*100])
pd.DataFrame(table, index=index, columns=['TMValueSeason', 'TMValue']).round(2)

Unnamed: 0,TMValueSeason,TMValue
11-12,52.11,51.05
12-13,50.0,49.47
13-14,52.7,53.78
14-15,47.5,47.5
15-16,41.16,42.48
16-17,56.84,54.21
17-18,50.26,51.05
18-19,57.89,57.11


The results show that across each season, the predictive power of both TMValueSeason and TMValue is essentially the same suggesting that the higher success rate of the TMValue model compared to the TMValueSeason model above could have been just a coincidence. While the TMValue is captured in general on the 1st and 15th of every month, it is a reflection of player evaluation by the community. A player value is only suggested a member of the community proposes a change and there is sufficient agreement and therefore it will not capture the form of the player. In other words, the TMValue has a long-term effect.

In future work, we will try to capture the teams' form, for example through the current league position and the outcomes of the last 5 games, in order to try to improve the model's performance. 

Below, we show the breakdown of the predictions with respect to the actual outcomes.

In [354]:
pd.crosstab(df_season['FTR'], df_season['pred_B365'],dropna= True)

pred_B365,0,2
FTR,Unnamed: 1_level_1,Unnamed: 2_level_1
A,78,50
D,19,52
H,37,144


In [355]:
pd.crosstab(df_season['FTR'], df_season['pred'],dropna= True)

pred,0,2
FTR,Unnamed: 1_level_1,Unnamed: 2_level_1
A,72,56
D,17,54
H,34,147


In [356]:
pd.crosstab(df_updated['FTR'], df_updated['pred'],dropna= True)

pred,0,2
FTR,Unnamed: 1_level_1,Unnamed: 2_level_1
A,73,55
D,14,57
H,32,149


In [204]:
f"Percentage of draws in 2017-2018 season: {sum(EPL1718['FTR']=='D')/len(EPL1718)*100:.2f}%"
f"Percentage of draws in 2018-2019 season: {sum(EPL1819['FTR']=='D')/len(EPL1819)*100:.2f}%"

'Percentage of draws in 2018-2019 season: 18.68%'

Given that 26.1% of the games in the training data and 18.7% in the test data end in a draw, it seems surprising that a draw is never predicted by our model.

Most errors is the models come from not predicting any draws and predicting wins for the away team.