In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
df1819 = pd.read_csv('../CSV/season-1819_csv.csv') 
df1920 = pd.read_csv('../CSV/season-1920_csv.csv')
#load in csv's for each season 

In [3]:
df1819['Season'] = '18/19'
df1920['Season'] = '19/20'

In [8]:
df1819.iloc[:2, :26]

Unnamed: 0,Div,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,...,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A
0,E0,10/08/2018,Man United,Leicester,2,1,H,1,0,H,...,8,2,5,2,1,0,0,1.57,3.9,7.5
1,E0,11/08/2018,Bournemouth,Cardiff,2,0,H,1,0,H,...,9,7,4,1,1,0,0,1.9,3.6,4.5


In [5]:
df1819.columns 

Index(['Div', 'Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'HTHG',
       'HTAG', 'HTR', 'Referee', 'HS', 'AS', 'HST', 'AST', 'HF', 'AF', 'HC',
       'AC', 'HY', 'AY', 'HR', 'AR', 'B365H', 'B365D', 'B365A', 'BWH', 'BWD',
       'BWA', 'IWH', 'IWD', 'IWA', 'PSH', 'PSD', 'PSA', 'WHH', 'WHD', 'WHA',
       'VCH', 'VCD', 'VCA', 'Bb1X2', 'BbMxH', 'BbAvH', 'BbMxD', 'BbAvD',
       'BbMxA', 'BbAvA', 'BbOU', 'BbMx>2.5', 'BbAv>2.5', 'BbMx<2.5',
       'BbAv<2.5', 'BbAH', 'BbAHh', 'BbMxAHH', 'BbAvAHH', 'BbMxAHA', 'BbAvAHA',
       'PSCH', 'PSCD', 'PSCA', 'Season'],
      dtype='object')

In [5]:
df1920.head(2)

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,...,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,Season
0,E0,09/08/2019,20:00,Liverpool,Norwich,4,1,H,4,0,...,-2.25,1.91,1.99,1.94,1.98,1.99,2.07,1.9,1.99,19/20
1,E0,10/08/2019,12:30,West Ham,Man City,0,5,A,0,1,...,1.75,1.95,1.95,1.96,1.97,2.07,1.98,1.97,1.92,19/20


### limit both data frames to essential columns; Bet365 was the bookie chosen to run against

In [6]:
df1819 = df1819[['Date', 'HomeTeam', 'AwayTeam', 'FTR', 'FTHG', 'FTAG', 'Referee', 'B365H', 'B365D', 'B365A', 'Season']]

In [7]:
df1920 = df1920[['Date', 'HomeTeam', 'AwayTeam', 'FTR', 'FTHG', 'FTAG', 'Referee', 'B365H', 'B365D', 'B365A', 'Season']]

In [8]:
df = pd.concat([df1819, df1920])

In [9]:
df.columns

Index(['Date', 'HomeTeam', 'AwayTeam', 'FTR', 'FTHG', 'FTAG', 'Referee',
       'B365H', 'B365D', 'B365A', 'Season'],
      dtype='object')

In [11]:
df.Date = pd.to_datetime(df.Date,  format = '%d/%m/%Y')

In [12]:
df.head()

Unnamed: 0,Date,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season
0,2018-08-10,Man United,Leicester,H,2,1,A Marriner,1.57,3.9,7.5,18/19
1,2018-08-11,Bournemouth,Cardiff,H,2,0,K Friend,1.9,3.6,4.5,18/19
2,2018-08-11,Fulham,Crystal Palace,A,0,2,M Dean,2.5,3.4,3.0,18/19
3,2018-08-11,Huddersfield,Chelsea,A,0,3,C Kavanagh,6.5,4.0,1.61,18/19
4,2018-08-11,Newcastle,Tottenham,A,1,2,M Atkinson,3.9,3.5,2.04,18/19


In [13]:
df.dtypes

Date        datetime64[ns]
HomeTeam            object
AwayTeam            object
FTR                 object
FTHG                 int64
FTAG                 int64
Referee             object
B365H              float64
B365D              float64
B365A              float64
Season              object
dtype: object

In [14]:
df.reset_index(drop= True, inplace = True) #eliminates the two df's original indexes

In [17]:
df.set_index('Date', drop =True, inplace = True)

In [18]:
df['Link'] = [str(i)[:10]+n[:5] for i, n in zip(df.index, df.HomeTeam)]
#again create links between the date and team to act as keys for merging

In [19]:
df.head()

Unnamed: 0_level_0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season,Link
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2018-08-10,Man United,Leicester,H,2,1,A Marriner,1.57,3.9,7.5,18/19,2018-08-10Man U
2018-08-11,Bournemouth,Cardiff,H,2,0,K Friend,1.9,3.6,4.5,18/19,2018-08-11Bourn
2018-08-11,Fulham,Crystal Palace,A,0,2,M Dean,2.5,3.4,3.0,18/19,2018-08-11Fulha
2018-08-11,Huddersfield,Chelsea,A,0,3,C Kavanagh,6.5,4.0,1.61,18/19,2018-08-11Hudde
2018-08-11,Newcastle,Tottenham,A,1,2,M Atkinson,3.9,3.5,2.04,18/19,2018-08-11Newca


In [20]:
ELO = pd.read_csv('../CSV/LP_ELO.csv') #load in the ELO and League Position csv made previously

In [21]:
ELO.head()

Unnamed: 0,Club,Elo,Link,Position,Team,Points,Goal_Difference,Played,Date
0,Liverpool,1914.838867,2018-08-10Liver,1.0,Liverpool,0.0,0.0,0.0,2018-08-10
1,Liverpool,1915.229004,2018-08-11Liver,1.0,Liverpool,0.0,0.0,0.0,2018-08-10
2,Liverpool,1915.229004,2018-08-12Liver,1.0,Liverpool,0.0,0.0,0.0,2018-08-10
3,Liverpool,1915.229004,2018-08-13Liver,1.0,Liverpool,3.0,4.0,1.0,2018-08-13
4,Liverpool,1915.229004,2018-08-14Liver,1.0,Liverpool,3.0,4.0,1.0,2018-08-14


In [22]:
home_elo = pd.merge(left = df, right = ELO, left_on = 'Link', right_on = 'Link', how = 'left')
home_elo.tail()

Unnamed: 0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season,Link,Club,Elo,Position,Team,Points,Goal_Difference,Played,Date
515,Southampton,Watford,H,2,1,M Oliver,2.1,3.4,3.5,19/20,2019-11-30South,Southampton,1641.895264,19.0,Southampton,9.0,-18.0,13.0,2019-11-30
516,Norwich,Arsenal,D,2,2,P Tierney,4.0,4.33,1.75,19/20,2019-12-01Norwi,Norwich,1608.490601,19.0,Norwich,10.0,-15.0,13.0,2019-12-01
517,Wolves,Sheffield United,D,1,1,D Coote,1.9,3.5,4.2,19/20,2019-12-01Wolve,Wolves,1760.041748,6.0,Wolves,19.0,2.0,13.0,2019-12-01
518,Leicester,Everton,H,2,1,G Scott,1.61,4.0,5.25,19/20,2019-12-01Leice,Leicester,1826.061523,3.0,Leicester,29.0,23.0,13.0,2019-12-01
519,Man United,Aston Villa,D,2,2,C Pawson,1.45,4.5,7.0,19/20,2019-12-01Man U,Man United,1824.794556,11.0,Man United,17.0,4.0,13.0,2019-12-01


In [23]:
home_elo['Date'] = [i for i in df.index]
home_elo.set_index('Date', drop = True, inplace=True) #regain the date index after merge

In [24]:
home_elo.head()

Unnamed: 0_level_0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season,Link,Club,Elo,Position,Team,Points,Goal_Difference,Played
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2018-08-10,Man United,Leicester,H,2,1,A Marriner,1.57,3.9,7.5,18/19,2018-08-10Man U,Man United,1882.255249,1.0,Man United,0.0,0.0,0.0
2018-08-11,Bournemouth,Cardiff,H,2,0,K Friend,1.9,3.6,4.5,18/19,2018-08-11Bourn,Bournemouth,1670.799805,1.0,Bournemouth,0.0,0.0,0.0
2018-08-11,Fulham,Crystal Palace,A,0,2,M Dean,2.5,3.4,3.0,18/19,2018-08-11Fulha,Fulham,1631.047729,1.0,Fulham,0.0,0.0,0.0
2018-08-11,Huddersfield,Chelsea,A,0,3,C Kavanagh,6.5,4.0,1.61,18/19,2018-08-11Hudde,Huddersfield,1564.29187,1.0,Huddersfield,0.0,0.0,0.0
2018-08-11,Newcastle,Tottenham,A,1,2,M Atkinson,3.9,3.5,2.04,18/19,2018-08-11Newca,Newcastle,1668.042847,1.0,Newcastle,0.0,0.0,0.0


In [25]:
cols_to_drop = ['Club', 'Team', 'Link']

In [26]:
home_elo.drop(columns = cols_to_drop , inplace = True)

In [27]:
home_elo.rename(columns = {
    'Elo': 'Home_ELO',
    'Position': 'Home_LP',
    'Points': 'Home_Points',
    'Goal_Difference': 'Home_GD',
    'Played': 'Home_Played'
}, inplace = True)

#renaming of columns to be specifically Home to allow for similar Away Team columns

In [28]:
home_elo.head(20) #double check df is how we want it

Unnamed: 0_level_0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season,Home_ELO,Home_LP,Home_Points,Home_GD,Home_Played
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2018-08-10,Man United,Leicester,H,2,1,A Marriner,1.57,3.9,7.5,18/19,1882.255249,1.0,0.0,0.0,0.0
2018-08-11,Bournemouth,Cardiff,H,2,0,K Friend,1.9,3.6,4.5,18/19,1670.799805,1.0,0.0,0.0,0.0
2018-08-11,Fulham,Crystal Palace,A,0,2,M Dean,2.5,3.4,3.0,18/19,1631.047729,1.0,0.0,0.0,0.0
2018-08-11,Huddersfield,Chelsea,A,0,3,C Kavanagh,6.5,4.0,1.61,18/19,1564.29187,1.0,0.0,0.0,0.0
2018-08-11,Newcastle,Tottenham,A,1,2,M Atkinson,3.9,3.5,2.04,18/19,1668.042847,1.0,0.0,0.0,0.0
2018-08-11,Watford,Brighton,H,2,0,J Moss,2.37,3.2,3.4,18/19,1623.49585,1.0,0.0,0.0,0.0
2018-08-11,Wolves,Everton,D,2,2,C Pawson,2.37,3.3,3.3,18/19,1590.322388,1.0,0.0,0.0,0.0
2018-08-12,Arsenal,Man City,A,0,2,M Oliver,4.0,3.8,1.95,18/19,1819.932617,1.0,0.0,0.0,0.0
2018-08-12,Liverpool,West Ham,H,4,0,A Taylor,1.25,6.5,14.0,18/19,1915.229004,1.0,0.0,0.0,0.0
2018-08-12,Southampton,Burnley,D,0,0,G Scott,1.85,3.5,5.0,18/19,1650.654053,1.0,0.0,0.0,0.0


In [29]:
df = home_elo
#reset df and repeat for the away team

In [30]:
df['Link'] = [str(i)[:10]+n[:5] for i, n in zip(df.index, df.AwayTeam)]

In [31]:
df.Link.head()

Date
2018-08-10    2018-08-10Leice
2018-08-11    2018-08-11Cardi
2018-08-11    2018-08-11Cryst
2018-08-11    2018-08-11Chels
2018-08-11    2018-08-11Totte
Name: Link, dtype: object

In [32]:
away_elo = pd.merge(left = df, right = ELO, left_on = 'Link', right_on = 'Link', how = 'left')
away_elo.head()

Unnamed: 0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season,...,Home_Played,Link,Club,Elo,Position,Team,Points,Goal_Difference,Played,Date
0,Man United,Leicester,H,2,1,A Marriner,1.57,3.9,7.5,18/19,...,0.0,2018-08-10Leice,Leicester,1694.578125,1.0,Leicester,0.0,0.0,0.0,2018-08-10
1,Bournemouth,Cardiff,H,2,0,K Friend,1.9,3.6,4.5,18/19,...,0.0,2018-08-11Cardi,Cardiff,1573.711304,1.0,Cardiff,0.0,0.0,0.0,2018-08-10
2,Fulham,Crystal Palace,A,0,2,M Dean,2.5,3.4,3.0,18/19,...,0.0,2018-08-11Cryst,Crystal Palace,1690.268555,1.0,Crystal Palace,0.0,0.0,0.0,2018-08-10
3,Huddersfield,Chelsea,A,0,3,C Kavanagh,6.5,4.0,1.61,18/19,...,0.0,2018-08-11Chels,Chelsea,1834.489136,1.0,Chelsea,0.0,0.0,0.0,2018-08-10
4,Newcastle,Tottenham,A,1,2,M Atkinson,3.9,3.5,2.04,18/19,...,0.0,2018-08-11Totte,Tottenham,1912.118164,1.0,Tottenham,0.0,0.0,0.0,2018-08-10


In [33]:
away_elo['Date'] = [i for i in df.index]
away_elo.set_index('Date',drop = True, inplace=True)

In [34]:
away_elo.drop(columns = cols_to_drop , inplace = True)

In [35]:
away_elo.rename(columns = {
    'Elo': 'Away_ELO',
    'Position': 'Away_LP',
    'Points': 'Away_Points',
    'Goal_Difference': 'Away_GD',
    'Played': 'Away_Played'
}, inplace = True)

In [36]:
away_elo.head()

Unnamed: 0_level_0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season,Home_ELO,Home_LP,Home_Points,Home_GD,Home_Played,Away_ELO,Away_LP,Away_Points,Away_GD,Away_Played
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2018-08-10,Man United,Leicester,H,2,1,A Marriner,1.57,3.9,7.5,18/19,1882.255249,1.0,0.0,0.0,0.0,1694.578125,1.0,0.0,0.0,0.0
2018-08-11,Bournemouth,Cardiff,H,2,0,K Friend,1.9,3.6,4.5,18/19,1670.799805,1.0,0.0,0.0,0.0,1573.711304,1.0,0.0,0.0,0.0
2018-08-11,Fulham,Crystal Palace,A,0,2,M Dean,2.5,3.4,3.0,18/19,1631.047729,1.0,0.0,0.0,0.0,1690.268555,1.0,0.0,0.0,0.0
2018-08-11,Huddersfield,Chelsea,A,0,3,C Kavanagh,6.5,4.0,1.61,18/19,1564.29187,1.0,0.0,0.0,0.0,1834.489136,1.0,0.0,0.0,0.0
2018-08-11,Newcastle,Tottenham,A,1,2,M Atkinson,3.9,3.5,2.04,18/19,1668.042847,1.0,0.0,0.0,0.0,1912.118164,1.0,0.0,0.0,0.0


In [37]:
df = away_elo

In [38]:
df.tail(20)

Unnamed: 0_level_0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season,Home_ELO,Home_LP,Home_Points,Home_GD,Home_Played,Away_ELO,Away_LP,Away_Points,Away_GD,Away_Played
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2019-11-23,West Ham,Tottenham,A,2,3,M Oliver,4.2,4.33,1.7,19/20,1707.627197,16.0,13.0,-6.0,12.0,1838.901978,14.0,14.0,1.0,12.0
2019-11-23,Arsenal,Southampton,D,2,2,S Attwell,1.45,4.75,6.5,19/20,1849.495361,6.0,17.0,-1.0,12.0,1638.995972,19.0,8.0,-18.0,12.0
2019-11-23,Bournemouth,Wolves,A,1,2,S Hooper,2.75,3.3,2.6,19/20,1707.710693,9.0,16.0,0.0,12.0,1755.309814,8.0,16.0,1.0,12.0
2019-11-23,Brighton,Leicester,A,0,2,M Dean,3.75,3.5,2.0,19/20,1653.084717,11.0,15.0,-2.0,12.0,1822.116699,2.0,26.0,21.0,12.0
2019-11-23,Crystal Palace,Liverpool,A,1,2,K Friend,7.5,4.75,1.4,19/20,1740.198364,12.0,15.0,-6.0,12.0,2061.503418,1.0,34.0,18.0,12.0
2019-11-23,Everton,Norwich,A,0,2,A Taylor,1.36,5.1,8.0,19/20,1736.123413,15.0,14.0,-5.0,12.0,1594.017212,20.0,7.0,-17.0,12.0
2019-11-23,Watford,Burnley,A,0,3,P Tierney,2.3,3.4,3.1,19/20,1661.537842,18.0,8.0,-15.0,12.0,1693.949951,10.0,15.0,-1.0,12.0
2019-11-23,Man City,Chelsea,H,2,1,M Atkinson,1.44,5.0,6.5,19/20,2017.376831,4.0,25.0,22.0,12.0,1899.854248,3.0,26.0,10.0,12.0
2019-11-24,Sheffield United,Man United,D,3,3,A Marriner,3.5,3.4,2.1,19/20,1679.544678,8.0,17.0,4.0,12.0,1831.053589,10.0,16.0,4.0,12.0
2019-11-25,Aston Villa,Newcastle,H,2,0,L Mason,2.05,3.4,3.7,19/20,1636.698364,17.0,11.0,-3.0,12.0,1717.314209,14.0,15.0,-7.0,12.0


In [40]:
df.shape

(520, 20)

In [46]:
df.reset_index(drop= True, inplace = True) #date is no longer required however position will be so reset index here

In [61]:
def home_points(x, df = df, time_frame = 5):
    """ a function to determine how many points a home team has gained over a certain period of previous matches"""
    
    team_h = x.HomeTeam
    season = x.Season
    #print(team_h)
    gw = x.Home_Played
    #print(gw)
    diff = gw - time_frame
    target_h_a = df[((df.HomeTeam == team_h) | (df.AwayTeam == team_h))  & (df.Season == season)]
    
    target_h   = target_h_a[(target_h_a.HomeTeam == team_h)]
    
    target_h_d = target_h[(target_h.Home_Played == diff)]
    
    target_a = target_h_a[(target_h_a.AwayTeam == team_h)]
    
    target_a_d = target_a[(target_a.Away_Played) == diff]
    # print(type(target_h))
    #return target_h
    if target_h_d.loc[:, 'HomeTeam'].any() == team_h:
        points = x.Home_Points - target_h_d.Home_Points
        #print(target_h.Home_Points)
        
    elif target_a_d.loc[:, 'AwayTeam'].any() == team_h:
        points = x.Home_Points - target_a_d.Away_Points
        #print(target_h.Away_Points.item())
        
    else:
        return 0
        
    
    
    return points.item()

In [62]:
home_points(df.iloc[82], df = df, time_frame = 5) # a test

11.0

In [63]:
df['home_last_match'] = df.apply(lambda x: home_points(x, df= df, time_frame = 1), axis = 1)
df['home_last_3'] = df.apply(lambda x: home_points(x, df= df, time_frame = 3), axis = 1)
df['home_last_5'] = df.apply(lambda x: home_points(x, df= df, time_frame = 5), axis = 1)
#create columns for the last match, last 3 matches and last 5 matches to act as predictors

In [66]:
def away_points(x, df = df, time_frame = 5):
    """ a function to determine how many points an away team has gained over a certain period of previous matches"""
    team_h = x.AwayTeam
    season = x.Season
    #print(team_h)
    gw = x.Away_Played
    #print(gw)
    diff = gw - time_frame
    target_h_a = df[((df.HomeTeam == team_h) | (df.AwayTeam == team_h))  & (df.Season == season)]
    
    target_h   = target_h_a[(target_h_a.HomeTeam == team_h)]
    
    target_h_d = target_h[(target_h.Home_Played == diff)]
    
    target_a = target_h_a[(target_h_a.AwayTeam == team_h)]
    
    target_a_d = target_a[(target_a.Away_Played) == diff]
    
    if target_h_d.loc[:, 'HomeTeam'].any() == team_h:
        points = x.Away_Points - target_h_d.Home_Points
        #print(target_h.Home_Points)
        
    elif target_a_d.loc[:, 'AwayTeam'].any() == team_h:
        points = x.Away_Points - target_a_d.Away_Points
        #print(target_h.Away_Points.item())
        
    else:
        return 0
        
    
    
    return points.item()

In [67]:
df['away_last_match'] = df.apply(lambda x: away_points(x, df= df, time_frame = 1), axis = 1)
df['away_last_3'] = df.apply(lambda x: away_points(x, df= df, time_frame = 3), axis = 1)
df['away_last_5'] = df.apply(lambda x: away_points(x, df= df, time_frame = 5), axis = 1)
#repeat for the away team

In [215]:
df.tail()

Unnamed: 0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Season,...,Away_GD,Away_Played,home_last_match,home_last_3,home_last_5,away_last_match,away_last_3,away_last_5,Home_Points/Played,Away_Points/Played
515,Southampton,Watford,H,2,1,M Oliver,2.1,3.4,3.5,19/20,...,-18.0,13.0,1.0,1.0,2.0,0.0,3.0,5.0,0.692308,0.615385
516,Norwich,Arsenal,D,2,2,P Tierney,4.0,4.33,1.75,19/20,...,-1.0,13.0,3.0,3.0,4.0,1.0,2.0,3.0,0.769231,1.384615
517,Wolves,Sheffield United,D,1,1,D Coote,1.9,3.5,4.2,19/20,...,4.0,13.0,3.0,7.0,9.0,1.0,5.0,9.0,1.461538,1.384615
518,Leicester,Everton,H,2,1,G Scott,1.61,4.0,5.25,19/20,...,-7.0,13.0,3.0,9.0,15.0,0.0,4.0,7.0,2.230769,1.076923
519,Man United,Aston Villa,D,2,2,C Pawson,1.45,4.5,7.0,19/20,...,-1.0,13.0,1.0,4.0,8.0,3.0,3.0,6.0,1.307692,1.076923


In [71]:
df['Home_Points/Played'] = df.Home_Points / df.Home_Played
df['Away_Points/Played'] = df.Away_Points / df.Away_Played
#take away points dependence on number of matches played as this may not always be the same between oponents

In [72]:
df.fillna(0, inplace = True)

In [216]:
final_df = df.to_csv('../CSV/final_df.csv', index=False)

## double check everything below this has already been copied in future notebooks

In [226]:
X = df[[#'HomeTeam', 'AwayTeam',
        'Home_ELO', 'Home_LP', 'Home_Points/Played',
        'Home_GD', 
        #'home_last_match', 
        'home_last_3', 'home_last_5', 
        'Away_ELO', 'Away_LP', 'Away_Points/Played',
        'Away_GD',
        #'away_last_match',
        'away_last_3', 'away_last_5'
       ]]
y = df.FTR
X = pd.get_dummies(X)

In [227]:
y.value_counts(normalize = True)

H    0.463462
A    0.326923
D    0.209615
Name: FTR, dtype: float64

In [228]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 1, shuffle = True)

In [229]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('cls', LogisticRegression(multi_class = 'ovr', solver = 'liblinear')),
    #('gridsearch', GridSearchCV(cv = 5, estimator = 'cls', param_grid = param_grid ))
    #('cls', RandomForestClassifier(criterion = 'gini', n_estimators = 10))
])

pipeline.fit(X_train, y_train)

print(pipeline.score(X_train, y_train))
print(pipeline.score(X_test, y_test))
print(cross_val_score(pipeline, X_train, y_train, cv = 5).mean())

0.5714285714285714
0.5705128205128205
0.5578136162402031


In [209]:
param_grid = {'C': np.logspace(-8, -2, 100),
             'solver': ['lbfgs', 'liblinear'],
             }

In [210]:
GS = GridSearchCV(cv = 5,
                  estimator =  LogisticRegression(multi_class = 'ovr', max_iter = 10000),
                  param_grid = param_grid, n_jobs = -1, verbose = 1 )

In [221]:
GS.fit(X_train, y_train)

Fitting 5 folds for each of 200 candidates, totalling 1000 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    2.2s
[Parallel(n_jobs=-1)]: Done 1000 out of 1000 | elapsed:    5.5s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=10000, multi_class='ovr',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='warn',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid={'C': array([1.00000000...
       1.23284674e-03, 1.41747416e-03, 1.62975083e-03, 1.87381742e-03,
       2.15443469e-03, 2.47707636e-03, 2.84803587e-03, 3.27454916e-03,
       3.76493581e-03, 4.32876128e-03, 4.97702356e-03, 5.72236766e-03,
       6.57933225e-03, 7.56463328e-03, 8.69749003e-03, 1.00000000e-02]),
       

In [222]:
model = GS.best_estimator_
model 

LogisticRegression(C=2.310129700083158e-06, class_weight=None, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=10000, multi_class='ovr', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [223]:
GS.best_score_ 

0.5669642857142857

In [224]:
print(cross_val_score(model, X_train, y_train, cv = 5).mean())
print(model.score(X_train, y_train))
print(model.score(X_test, y_test)) #0.42

0.56600790513834
0.5714285714285714
0.53125


In [77]:
len(model.coef_)

3

In [78]:
pd.DataFrame(list(zip(X.columns, model.coef_[2])),
            columns = ['feature', 'coef']).sort_values(by='coef', ascending = False)

Unnamed: 0,feature,coef
0,Home_ELO,0.004147878
3,Home_GD,0.0008578129
5,Away_LP,5.225662e-05
2,Home_Points/Played,2.644531e-05
20,HomeTeam_Liverpool,1.455742e-05
40,AwayTeam_Fulham,1.159594e-05
21,HomeTeam_Man City,1.057739e-05
33,AwayTeam_Bournemouth,8.709213e-06
19,HomeTeam_Leicester,7.740941e-06
31,AwayTeam_Arsenal,7.586266e-06


In [152]:
rf_params = {'bootstrap': [True, False],
 'max_depth': [ 1, 2, 3, 4, 5, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2],
 'min_samples_split': [4, 5, 6, 7, 8],
 'n_estimators': [10, 11, 12, 13, 14, 15]}

In [153]:
GS = GridSearchCV(cv = 5,
                  estimator = RandomForestClassifier(criterion = 'gini'),
                  param_grid =rf_params, n_jobs = 4, verbose = 1 )

In [154]:
GS.fit(X_train, y_train)

Fitting 5 folds for each of 1440 candidates, totalling 7200 fits


[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done 181 tasks      | elapsed:    2.2s
[Parallel(n_jobs=4)]: Done 4381 tasks      | elapsed:   16.4s
[Parallel(n_jobs=4)]: Done 7200 out of 7200 | elapsed:   26.7s finished


GridSearchCV(cv=5, error_score='raise-deprecating',
             estimator=RandomForestClassifier(bootstrap=True, class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators='warn', n_jobs=None,
                                              oob_score=False,
                                              random_state=None, verbose=0,
                                              warm_start=False),
             iid

In [156]:
model = GS.best_estimator_
GS.best_score_                 

0.5769230769230769

In [83]:
model

RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
                       max_depth=3, max_features='auto', max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=2, min_samples_split=4,
                       min_weight_fraction_leaf=0.0, n_estimators=10,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [157]:
print(cross_val_score(model, X_train, y_train, cv = 5).mean())
print(model.score(X_train, y_train))
print(model.score(X_test, y_test))

0.5411675982245603
0.6307692307692307
0.5076923076923077


In [98]:
confusion_matrix(y_test, model.predict(X_test))

array([[ 2,  0, 14],
       [ 2,  0, 17],
       [ 1,  0, 20]])

In [107]:
model.feature_importances_

array([0.06084958, 0.08990839, 0.03964689, 0.072233  , 0.08600418,
       0.14197951, 0.05352937, 0.08483439, 0.05950445, 0.        ,
       0.0440289 , 0.        , 0.        , 0.        , 0.01384564,
       0.        , 0.        , 0.0503128 , 0.        , 0.01023424,
       0.        , 0.        , 0.        , 0.        , 0.01275466,
       0.        , 0.        , 0.        , 0.00565954, 0.00435781,
       0.        , 0.02936243, 0.        , 0.        , 0.03317661,
       0.04479678, 0.03085454, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.0321263 , 0.        , 0.        ,
       0.        , 0.        , 0.        ])

In [87]:
pd.DataFrame(confusion_matrix(y, pipeline.predict(X)), columns = pipeline.steps[-1][1].classes_,
                                                         index = pipeline.steps[-1][1].classes_)

Unnamed: 0,A,D,H
A,109,7,54
D,29,22,58
H,39,10,192


In [89]:
print(classification_report(y, model.predict(X)))

              precision    recall  f1-score   support

           A       0.65      0.41      0.50       170
           D       1.00      0.01      0.02       109
           H       0.53      0.90      0.66       241

    accuracy                           0.55       520
   macro avg       0.73      0.44      0.39       520
weighted avg       0.67      0.55      0.47       520



In [42]:
df.tail(10)

Unnamed: 0,HomeTeam,AwayTeam,FTR,FTHG,FTAG,Referee,B365H,B365D,B365A,Home_ELO,Home_LP,Home_Points,Home_GD,Home_GW,Away_ELO,Away_LP,Away_Points,Away_GD,Away_GW
370,Brighton,Man City,A,1,4,M Oliver,19.0,8.5,1.16,1618.852905,17.0,36.0,-25.0,38.0,2034.380981,1.0,98.0,72.0,38.0
371,Burnley,Arsenal,A,1,3,M Dean,3.25,3.8,2.2,1688.669556,15.0,40.0,-23.0,38.0,1873.2677,5.0,70.0,22.0,38.0
372,Crystal Palace,Bournemouth,H,5,3,R East,1.9,4.2,3.8,1734.742065,12.0,49.0,-2.0,38.0,1697.811768,14.0,45.0,-14.0,38.0
373,Fulham,Newcastle,A,0,4,K Friend,2.5,3.6,2.9,1599.815308,19.0,26.0,-47.0,38.0,1700.610352,13.0,45.0,-6.0,38.0
374,Leicester,Chelsea,D,0,0,A Taylor,2.4,3.75,2.9,1736.503174,9.0,52.0,3.0,38.0,1870.400146,3.0,72.0,24.0,38.0
375,Liverpool,Wolves,H,2,0,M Atkinson,1.3,6.0,11.0,2035.113281,2.0,97.0,67.0,38.0,1721.086304,7.0,57.0,1.0,38.0
376,Man United,Cardiff,A,0,2,J Moss,1.28,6.5,11.0,1859.725342,6.0,66.0,11.0,38.0,1585.044189,18.0,34.0,-35.0,38.0
377,Southampton,Huddersfield,D,1,1,L Probert,1.44,4.75,8.5,1679.398438,16.0,39.0,-20.0,38.0,1515.806152,20.0,16.0,-54.0,38.0
378,Tottenham,Everton,D,2,2,A Marriner,2.2,3.5,3.5,1902.648438,4.0,71.0,28.0,38.0,1763.89563,8.0,54.0,8.0,38.0
379,Watford,West Ham,A,1,4,C Kavanagh,2.25,3.75,3.2,1701.092041,11.0,50.0,-7.0,38.0,1712.852173,10.0,52.0,-3.0,38.0


In [43]:
1 / pipeline.predict_proba(X)[-10:]

array([[  1.19861241,  10.76172546,  13.74003062],
       [  1.78523878,   8.7080925 ,   3.07677984],
       [ 14.43420753,   8.39384695,   1.23215646],
       [  2.74400382,   2.02814498,   7.01716858],
       [  1.89527108,  10.24574888,   2.66830652],
       [ 28.26152848,   6.9153787 ,   1.21949588],
       [205.43977723,   4.43250514,   1.29950068],
       [ 16.33705284,   3.1750647 ,   1.60298736],
       [  9.12673919,   7.51807123,   1.32027319],
       [  1.88306284,   9.20400033,   2.7754505 ]])

In [44]:
pipeline.predict(X)[-10:]

array(['A', 'A', 'H', 'D', 'A', 'H', 'H', 'H', 'H', 'A'], dtype=object)

In [45]:
X

Unnamed: 0,Home_ELO,Home_LP,Home_Points,Home_GD,Away_ELO,Away_LP,Away_Points,Away_GD,HomeTeam_Arsenal,HomeTeam_Bournemouth,...,AwayTeam_Leicester,AwayTeam_Liverpool,AwayTeam_Man City,AwayTeam_Man United,AwayTeam_Newcastle,AwayTeam_Southampton,AwayTeam_Tottenham,AwayTeam_Watford,AwayTeam_West Ham,AwayTeam_Wolves
0,1882.255249,7.0,3.0,1.0,1694.578125,13.0,0.0,-1.0,0,0,...,1,0,0,0,0,0,0,0,0,0
1,1670.799805,3.0,3.0,2.0,1573.711304,15.0,0.0,-2.0,0,1,...,0,0,0,0,0,0,0,0,0,0
2,1631.047729,15.0,0.0,-2.0,1690.268555,3.0,3.0,2.0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1564.291870,19.0,0.0,-3.0,1834.489136,2.0,3.0,3.0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1668.042847,13.0,0.0,-1.0,1912.118164,7.0,3.0,1.0,0,0,...,0,0,0,0,0,0,1,0,0,0
5,1623.495850,3.0,3.0,2.0,1630.092163,15.0,0.0,-2.0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,1590.322388,9.0,1.0,0.0,1696.834473,9.0,1.0,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,1819.932617,15.0,0.0,-2.0,1972.182007,3.0,3.0,2.0,1,0,...,0,0,1,0,0,0,0,0,0,0
8,1915.229004,1.0,3.0,4.0,1670.878174,20.0,0.0,-4.0,0,0,...,0,0,0,0,0,0,0,0,1,0
9,1650.654053,11.0,1.0,0.0,1690.084106,11.0,1.0,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [173]:
X

Unnamed: 0,Home_ELO,Home_LP,Home_Points/Played,Home_GD,Away_ELO,Away_LP,Away_Points/Played,Away_GD,HomeTeam_Arsenal,HomeTeam_Aston Villa,...,AwayTeam_Man City,AwayTeam_Man United,AwayTeam_Newcastle,AwayTeam_Norwich,AwayTeam_Sheffield United,AwayTeam_Southampton,AwayTeam_Tottenham,AwayTeam_Watford,AwayTeam_West Ham,AwayTeam_Wolves
0,1882.255249,1.0,0.000000,0.0,1694.578125,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1670.799805,1.0,0.000000,0.0,1573.711304,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1631.047729,1.0,0.000000,0.0,1690.268555,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1564.291870,1.0,0.000000,0.0,1834.489136,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,1668.042847,1.0,0.000000,0.0,1912.118164,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,1,0,0,0
5,1623.495850,1.0,0.000000,0.0,1630.092163,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,1590.322388,1.0,0.000000,0.0,1696.834473,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,1819.932617,1.0,0.000000,0.0,1972.182007,1.0,0.000000,0.0,1,0,...,1,0,0,0,0,0,0,0,0,0
8,1915.229004,1.0,0.000000,0.0,1670.878174,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,0,0,1,0
9,1650.654053,1.0,0.000000,0.0,1690.084106,1.0,0.000000,0.0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [166]:
def prediction_odds(x, i, DF):
    if x == 'H':
        return DF.loc[i, 'B365H']
    elif x == 'D':
        return DF.loc[i, 'B365D']
    elif x == 'A':
        return DF.loc[i, 'B365A']

In [167]:
def profit_calculation(model = pipeline, df = df, X_test = X_test):
    test_predictions = model.predict(X_test)
    indices = list(X_test.index)
    #print(len(indices))
    #print(len(test_predictions))
    cols = ['HomeTeam', 'AwayTeam', 'FTR', 'B365H', 'B365D', 'B365A']
    test_df = df.loc[indices, cols]
    #print(test_df.shape)
    test_df['prediction'] = test_predictions
    bets = []
    for x, i in list(zip(test_df.prediction, indices)):
        bets.append(prediction_odds(x, i, df))
    test_df['bet_odds'] = bets
    test_df['win'] = (test_df.FTR == test_df.prediction)*1
    test_df['profits'] = test_df.bet_odds * test_df.win
    total_profit = (test_df.profits.sum() - len(test_df))
    #print(total_profit)
    return total_profit #test_df

In [168]:
X_test.shape

(130, 60)

In [225]:
profit_calculation(model = model, df = df, X_test = X_test)

-8.299999999999997

In [179]:
money = []
for i in range(100):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state =i, shuffle = True)
    #pipeline = Pipeline([
    #('scaler', StandardScaler()),
    #('cls', LogisticRegression(multi_class = 'ovr', solver = 'lbfgs', max_iter = 100000))
    #('cls', RandomForestClassifier(criterion = 'gini', n_estimators = 10))
    #])

    model.fit(X_train, y_train)
    
    
    money.append(profit_calculation(model = model, X_test = X_test))

In [180]:
(sum(money) / len(money)) #/(len(X_test) * 0.3) 

2.984699999999998

In [181]:
max(money), min(money)

(31.72999999999999, -18.64)

In [53]:
df.HomeTeam.unique()

array(['Man United', 'Bournemouth', 'Fulham', 'Huddersfield', 'Newcastle',
       'Watford', 'Wolves', 'Arsenal', 'Liverpool', 'Southampton',
       'Cardiff', 'Chelsea', 'Everton', 'Leicester', 'Tottenham',
       'West Ham', 'Brighton', 'Burnley', 'Man City', 'Crystal Palace'],
      dtype=object)

In [54]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state =1)

In [157]:
pipeline = Pipeline([
    ('scaler', MinMaxScaler()),
    ('cls', LogisticRegression(multi_class = 'ovr', solver = 'liblinear')),
    #('gridsearch', GridSearchCV())
])

pipeline.fit(X_train, y_train)

print(pipeline.score(X_train, y_train))
print(pipeline.score(X_test, y_test))
print(cross_val_score(pipeline, X_train, y_train, cv = 5).mean())

0.6730769230769231
0.5269230769230769
0.5730925752013432


In [182]:
wkend = pd.read_csv('weekend_matches.csv')

In [57]:
T = pd.get_dummies(wkend)

In [58]:
missing_cols = set(X.columns) - set(T.columns)

In [59]:
hey = {1, 1, 1, 1, 0, 0, 1}
for c in hey:
    print(c)

0
1


In [60]:
for c in missing_cols:
    T[c] = 0

In [61]:
T = T[X.columns]

In [62]:
pipeline.predict_proba(T)

array([[0.32617339, 0.22136573, 0.45246088],
       [0.36896356, 0.34211031, 0.28892613],
       [0.20395422, 0.51726159, 0.27878419],
       [0.36806224, 0.39331528, 0.23862248],
       [0.44090727, 0.48114233, 0.07795041],
       [0.35240731, 0.37766777, 0.26992493],
       [0.53796708, 0.3728827 , 0.08915022],
       [0.26575082, 0.21241558, 0.52183361]])

In [63]:
eg = pipeline.predict_proba(T)

In [64]:
for i in eg:
    print(i)
    print('break')

[0.32617339 0.22136573 0.45246088]
break
[0.36896356 0.34211031 0.28892613]
break
[0.20395422 0.51726159 0.27878419]
break
[0.36806224 0.39331528 0.23862248]
break
[0.44090727 0.48114233 0.07795041]
break
[0.35240731 0.37766777 0.26992493]
break
[0.53796708 0.3728827  0.08915022]
break
[0.26575082 0.21241558 0.52183361]
break


In [65]:
wkend[['AwayTeam', 'HomeTeam']]

Unnamed: 0,AwayTeam,HomeTeam
0,Tottenham,West Ham
1,Southampton,Arsenal
2,Wolves,Bournemouth
3,Leicester,Brighton
4,Liverpool,Crystal Palace
5,Norwich,Everton
6,Burnley,Watford
7,Chelsea,Man City


In [186]:
def predictor(df, X = X, pipeline = pipeline):
    T = pd.get_dummies(df)
    missing_cols = set(X.columns) - set(T.columns)
    for c in missing_cols:
        T[c] = 0
    T = T[X.columns]
    df['prediction'] = pipeline.predict(T)
    probs = pipeline.predict_proba(T)
    
    df['Home_prob'] = [x[2] for x in probs]
    df['Draw_prob'] = [x[1] for x in probs]
    df['Away_prob'] = [x[0] for x in probs]
    
    df['Home_odds'] = [1/x[2] for x in probs]
    df['Draw_odds'] = [1/x[1] for x in probs]
    df['Away_odds'] = [1/x[0] for x in probs]
    
    return df.drop(columns = [#'Home_ELO', 
                              'Home_Points/Played', 
                              #'Away_ELO',
                              'Away_Points/Played'])

In [187]:
predictor(wkend, X = X, pipeline = model)

Unnamed: 0,HomeTeam,AwayTeam,Home_ELO,Home_LP,Home_GD,Away_ELO,Away_LP,Away_GD,prediction,Home_prob,Draw_prob,Away_prob,Home_odds,Draw_odds,Away_odds
0,Crystal Palace,Bournmouth,1737,11,-5,1683,12,-2,H,0.533029,0.215983,0.250989,1.876072,4.630003,3.984242
1,Burnley,Man City,1684,10,0,1995,3,23,A,0.209506,0.201717,0.588777,4.773128,4.957448,1.698436
2,Chelsea,Aston Villa,1869,4,8,1642,15,-1,H,0.670406,0.200197,0.129398,1.491634,4.995086,7.728114
3,Leicester,Watford,1821,2,24,1627,20,-19,H,0.648086,0.204843,0.147072,1.543005,4.881797,6.799413
4,Man United,Tottenham,1795,9,4,1840,5,3,H,0.451365,0.211583,0.337052,2.215499,4.726281,2.966904
5,Southampton,Norwich,1639,18,-17,1604,19,-15,H,0.505289,0.223127,0.271583,1.979064,4.481745,3.682113
6,Wolves,West Ham,1745,6,2,1702,13,-6,H,0.525765,0.215391,0.258843,1.90199,4.64271,3.86334
7,Liverpool,Everton,2044,1,20,1701,17,-8,H,0.742489,0.180957,0.076554,1.346821,5.526186,13.062632
8,Sheffield United,Newcastle,1673,7,4,1698,14,-9,H,0.458115,0.220488,0.321397,2.18286,4.535391,3.111414
9,Arsenal,Brighton,1813,8,-1,1632,16,-5,H,0.636348,0.206459,0.157194,1.571468,4.84358,6.361583
