### Web Scraping & Analysis of English Premier League (EPL)

In [6]:
# importing modules and packages

import re
import requests
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
pd.options.display.max_columns = None

In [4]:
#scraping the base url form the website www.football-data.co.uk
url="https://www.football-data.co.uk/englandm.php"

# Make a GET request to fetch the raw HTML content
html_content = requests.get(url).text

# Parse the html content
soup = BeautifulSoup(html_content, "lxml")

#print(soup.prettify()) # print the parsed data of html

Creating a list of all the csv files scraped from the website. This code will scrape any new season data that is uploaded on the website.

In [7]:
data = []

''' removing unwanted season as the dataset attribute format does not match the format of the newer data
unwanted = ['Season 1993/1994','Season 1994/1995','Season 1995/1996','Season 1996/1997','Season 1997/1998',
'Season 1998/1999','Season 1999/2000','Season 2000/2001','Season 2001/2002'] '''

for link in soup.find_all('a', href=True, text=re.compile('Premier League')):
    data.append("https://www.football-data.co.uk/"+link.get("href"))

data = data[1:20]
print(data)

len(data)

['https://www.football-data.co.uk/mmz4281/2021/E0.csv', 'https://www.football-data.co.uk/mmz4281/1920/E0.csv', 'https://www.football-data.co.uk/mmz4281/1819/E0.csv', 'https://www.football-data.co.uk/mmz4281/1718/E0.csv', 'https://www.football-data.co.uk/mmz4281/1617/E0.csv', 'https://www.football-data.co.uk/mmz4281/1516/E0.csv', 'https://www.football-data.co.uk/mmz4281/1415/E0.csv', 'https://www.football-data.co.uk/mmz4281/1314/E0.csv', 'https://www.football-data.co.uk/mmz4281/1213/E0.csv', 'https://www.football-data.co.uk/mmz4281/1112/E0.csv', 'https://www.football-data.co.uk/mmz4281/1011/E0.csv', 'https://www.football-data.co.uk/mmz4281/0910/E0.csv', 'https://www.football-data.co.uk/mmz4281/0809/E0.csv', 'https://www.football-data.co.uk/mmz4281/0708/E0.csv', 'https://www.football-data.co.uk/mmz4281/0607/E0.csv', 'https://www.football-data.co.uk/mmz4281/0506/E0.csv', 'https://www.football-data.co.uk/mmz4281/0405/E0.csv', 'https://www.football-data.co.uk/mmz4281/0304/E0.csv', 'https://

19

Reading all the csv files and appending it to a single dataset. The code piece df{i} will dynamically take in any season data which is scraped from the website.

In [9]:
frames = []

for i in range(len(data)):
    globals()[f"df{i}"] = pd.read_csv(data[i], encoding="latin-1", on_bad_lines='skip')
    df_object = globals()[f"df{i}"]
    frames.append(df_object)

In [10]:
df = pd.DataFrame()

for d in frames:
    df =  df.append(d, ignore_index=True)

In [11]:
df.head()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,PSH,PSD,PSA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA,B365>2.5,B365<2.5,P>2.5,P<2.5,Max>2.5,Max<2.5,Avg>2.5,Avg<2.5,AHh,B365AHH,B365AHA,PAHH,PAHA,MaxAHH,MaxAHA,AvgAHH,AvgAHA,B365CH,B365CD,B365CA,BWCH,BWCD,BWCA,IWCH,IWCD,IWCA,PSCH,PSCD,PSCA,WHCH,WHCD,WHCA,VCCH,VCCD,VCCA,MaxCH,MaxCD,MaxCA,AvgCH,AvgCD,AvgCA,B365C>2.5,B365C<2.5,PC>2.5,PC<2.5,MaxC>2.5,MaxC<2.5,AvgC>2.5,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,LBH,LBD,LBA,SJH,SJD,SJA,GBH,GBD,GBA,BSH,BSD,BSA,SBH,SBD,SBA,GB>2.5,GB<2.5,GBAHH,GBAHA,GBAH,LBAHH,LBAHA,LBAH,B365AH,SOH,SOD,SOA,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52
0,E0,12/09/2020,12:30,Fulham,Arsenal,0.0,3.0,A,0.0,1.0,A,C Kavanagh,5.0,13.0,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0,6.0,4.33,1.53,5.5,4.25,1.57,6.0,3.9,1.57,6.16,4.51,1.56,6.5,4.2,1.53,6.5,4.2,1.55,6.55,4.55,1.6,5.94,4.34,1.55,1.72,2.1,1.8,2.13,1.84,2.18,1.76,2.1,1.0,1.93,1.97,1.96,1.96,2.0,1.99,1.93,1.95,5.0,4.0,1.66,5.5,4.0,1.62,5.25,3.9,1.67,5.48,3.98,1.69,5.5,3.8,1.65,5.5,3.9,1.67,5.75,4.2,1.71,5.36,3.93,1.67,2.0,1.8,2.06,1.86,2.1,1.92,2.0,1.84,0.75,2.01,1.89,2.02,1.91,2.13,1.92,2.02,1.87,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,E0,12/09/2020,15:00,Crystal Palace,Southampton,1.0,0.0,H,1.0,0.0,H,J Moss,5.0,9.0,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0,3.1,3.25,2.37,3.0,3.2,2.45,3.15,2.95,2.4,3.32,3.29,2.4,3.2,3.2,2.35,3.2,3.2,2.4,3.36,3.36,2.5,3.18,3.22,2.39,2.2,1.66,2.34,1.68,2.36,1.73,2.24,1.67,0.25,1.85,2.05,1.88,2.05,1.88,2.07,1.84,2.03,3.0,3.25,2.4,3.0,3.3,2.4,3.05,2.9,2.45,3.09,3.27,2.54,3.1,3.1,2.45,3.1,3.25,2.45,3.25,3.33,2.55,3.08,3.22,2.47,2.2,1.66,2.26,1.72,2.27,1.78,2.18,1.7,0.25,1.78,2.13,1.79,2.17,1.85,2.18,1.79,2.12,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,E0,12/09/2020,17:30,Liverpool,Leeds,4.0,3.0,H,3.0,2.0,H,M Oliver,22.0,6.0,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0,1.28,6.0,9.5,1.26,6.25,10.5,1.35,5.0,8.5,1.31,6.25,9.92,1.27,6.0,10.0,1.3,5.75,10.5,1.35,6.5,10.75,1.3,5.96,9.68,1.53,2.5,1.56,2.6,1.56,2.68,1.52,2.53,-1.5,1.95,1.95,1.97,1.95,2.0,2.08,1.9,1.97,1.25,6.0,11.0,1.25,6.25,11.0,1.3,6.0,9.0,1.28,6.34,11.38,1.25,6.0,12.0,1.29,6.0,11.5,1.3,6.75,12.27,1.28,6.16,10.63,1.5,2.62,1.51,2.76,1.53,2.82,1.5,2.62,-1.5,1.85,2.05,1.85,2.08,1.9,2.16,1.84,2.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,E0,12/09/2020,20:00,West Ham,Newcastle,0.0,2.0,A,0.0,0.0,D,S Attwell,15.0,15.0,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0,2.15,3.4,3.4,2.15,3.4,3.4,2.15,3.15,3.4,2.18,3.61,3.5,2.15,3.5,3.4,2.15,3.4,3.6,2.24,3.7,3.6,2.15,3.48,3.42,1.9,1.9,2.0,1.91,2.05,1.95,1.97,1.86,-0.5,2.07,1.72,2.17,1.78,2.17,1.81,2.12,1.75,1.95,3.6,3.75,1.95,3.7,3.75,2.05,3.25,3.75,2.04,3.59,3.92,2.0,3.5,3.8,2.0,3.5,3.9,2.07,3.78,3.99,2.01,3.57,3.79,1.9,1.9,2.0,1.92,2.0,2.05,1.91,1.92,-0.5,2.03,1.87,2.04,1.88,2.09,1.91,2.02,1.86,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,E0,13/09/2020,14:00,West Brom,Leicester,0.0,3.0,A,0.0,0.0,D,A Taylor,7.0,13.0,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0,3.8,3.6,1.95,3.7,3.6,2.0,3.85,3.2,2.0,4.0,3.59,2.0,3.8,3.6,1.95,4.0,3.5,1.95,4.0,3.82,2.04,3.87,3.57,1.97,1.9,1.9,2.0,1.91,2.02,2.03,1.92,1.9,0.5,1.91,1.99,1.92,2.0,1.93,2.02,1.88,1.97,3.25,3.4,2.2,3.3,3.4,2.2,3.35,3.0,2.3,3.38,3.38,2.32,3.3,3.3,2.25,3.3,3.3,2.3,3.55,3.5,2.38,3.32,3.33,2.28,2.2,1.66,2.23,1.74,2.28,1.82,2.15,1.73,0.25,1.92,1.98,1.93,1.99,1.95,2.01,1.91,1.97,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [12]:
#exporting th dataset to ./data folder in the project for further analysis and visualizations

df.to_csv(r'./data/df.csv',index=False)

### Data cleaning and subsetting required data
Remove betting data as we don't need it for prediction. Dropping any unnecessary columns as well. 

In [13]:
#we only require the first 22 columns for the purpose of prediction

df_new = df.iloc[:,:23]
df_new = df_new.drop(['Div','Date','Time'],axis=1)

table_features = df.iloc[:,:7]
table_features = table_features.drop(['FTHG','FTAG','Div','Date'],axis=1)

In [21]:
df_new.head()

Unnamed: 0,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR
0,Fulham,Arsenal,0.0,3.0,A,0.0,1.0,A,C Kavanagh,5.0,13.0,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0
1,Crystal Palace,Southampton,1.0,0.0,H,1.0,0.0,H,J Moss,5.0,9.0,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0
2,Liverpool,Leeds,4.0,3.0,H,3.0,2.0,H,M Oliver,22.0,6.0,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0
3,West Ham,Newcastle,0.0,2.0,A,0.0,0.0,D,S Attwell,15.0,15.0,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0
4,West Brom,Leicester,0.0,3.0,A,0.0,0.0,D,A Taylor,7.0,13.0,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0


In [22]:
df_new.shape[0]

7131

>>> Add formulas

In [23]:
avg_home_scored = df_new.FTHG.sum()*1.0 / df_new.shape[0]
avg_away_scored = df_new.FTAG.sum()*1.0 / df_new.shape[0]

avg_home_conceded = avg_away_scored
avg_away_conceded = avg_home_scored

print("Average number of goals at home =",avg_home_scored)
print("Average number of goals away =", avg_away_scored)
print("Average number of goals conceded at home =",avg_home_conceded)
print("Average number of goals conceded away",avg_away_conceded)

Average number of goals at home = 1.5201234048520544
Average number of goals away = 1.1531342027766092
Average number of goals conceded at home = 1.1531342027766092
Average number of goals conceded away 1.5201234048520544


Creating 2 groupby objects - one for Home Teams and the other for Away Teams.

In [24]:
result_home = df_new.groupby(['HomeTeam'])
result_away = df_new.groupby('AwayTeam')

>Some Terminology fot understanding purpose:

HGS - Home Goals Scored

AGC - Away Goals Scored

HGC - Home Goals Conceded

AGC - Away Goals Conceded

>Goals Scored:
>>HGS = Full-Time Goal Scored (home) / Total Home Games Played 

>>AGC = Full-Time Goal Scored (away) / Total Away Games Played


>Goals Conceded
>>HGC = Full-Time Goal Conceded (home) / Total Home Games Played

>>AGC = Full-Time Goal Conceded (away) / Total Away Games Played

In [25]:
table = pd.DataFrame()

table['HGS'] = result_home['FTHG'].sum()
table['HGC'] = result_home['FTAG'].sum()
table['AGS'] = result_away['FTAG'].sum()
table['AGC'] = result_away['FTHG'].sum()

table.head()

Unnamed: 0_level_0,HGS,HGC,AGS,AGC
HomeTeam,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Arsenal,748.0,308.0,584.0,440.0
Aston Villa,373.0,353.0,339.0,495.0
Birmingham,157.0,136.0,107.0,213.0
Blackburn,252.0,213.0,201.0,312.0
Blackpool,30.0,37.0,25.0,41.0


In [26]:
table.reset_index(inplace=True)
table.head()

Unnamed: 0,HomeTeam,HGS,HGC,AGS,AGC
0,Arsenal,748.0,308.0,584.0,440.0
1,Aston Villa,373.0,353.0,339.0,495.0
2,Birmingham,157.0,136.0,107.0,213.0
3,Blackburn,252.0,213.0,201.0,312.0
4,Blackpool,30.0,37.0,25.0,41.0


In [27]:
#renaming the columns

table.columns.values[0] = "Team"
table.head()

Unnamed: 0,Team,HGS,HGC,AGS,AGC
0,Arsenal,748.0,308.0,584.0,440.0
1,Aston Villa,373.0,353.0,339.0,495.0
2,Birmingham,157.0,136.0,107.0,213.0
3,Blackburn,252.0,213.0,201.0,312.0
4,Blackpool,30.0,37.0,25.0,41.0


>Some Terminology for understanding purpose:

HAS - Home Attack Score

AAS - Away Attack Score
 
HDS - Home Defense Score

DS - Away Defense Score

> Attack Score = Average Goals Scored / League Average Goal Scored

> Defense Score = Average Goals Conceded / League Average Goal Conceded

In [28]:
#Assuming number of home games = number of away games
num_games = df_new.shape[0]/40

In [29]:
#Home/Away Attack & Defense Score

table['HAS'] = (table['HGS'] / num_games) / avg_home_scored
table['AAS'] = (table['AGS'] / num_games) / avg_away_scored
table['HDS'] = (table['HGC'] / num_games) / avg_home_conceded
table['ADS'] = (table['AGC'] / num_games) / avg_away_conceded
table.head()

Unnamed: 0,Team,HGS,HGC,AGS,AGC,HAS,AAS,HDS,ADS
0,Arsenal,748.0,308.0,584.0,440.0,2.760148,2.840812,1.498237,1.623616
1,Aston Villa,373.0,353.0,339.0,495.0,1.376384,1.649033,1.717135,1.826568
2,Birmingham,157.0,136.0,107.0,213.0,0.579336,0.520491,0.661559,0.785978
3,Blackburn,252.0,213.0,201.0,312.0,0.929889,0.977745,1.036118,1.151292
4,Blackpool,30.0,37.0,25.0,41.0,0.110701,0.12161,0.179983,0.151292


In [30]:
cols = ['HAS', 'AAS', 'HDS', 'ADS']

table[cols] = table[cols].round(3)

table.head()

Unnamed: 0,Team,HGS,HGC,AGS,AGC,HAS,AAS,HDS,ADS
0,Arsenal,748.0,308.0,584.0,440.0,2.76,2.841,1.498,1.624
1,Aston Villa,373.0,353.0,339.0,495.0,1.376,1.649,1.717,1.827
2,Birmingham,157.0,136.0,107.0,213.0,0.579,0.52,0.662,0.786
3,Blackburn,252.0,213.0,201.0,312.0,0.93,0.978,1.036,1.151
4,Blackpool,30.0,37.0,25.0,41.0,0.111,0.122,0.18,0.151


### Working on the feature table

feature_table contains all the fixtures in the current season | 
ftr = full time result | 
hst = home shots on target | 
ast = away shots on target |

To create the feature table which is going to be used for prediction we need to slice some data from the 'df' just created and map the respective home and away team values for Attack and Defense Score respectively.

In [32]:
# slicing first 23 columns from df table for prediction

feature_table = df.iloc[:,:23]

feature_table = feature_table[['HomeTeam','AwayTeam','FTR','HST','AST']]

f_HAS = []
f_HDS = []
f_AAS = []
f_ADS = []
for index,row in feature_table.iterrows():
    f_HAS.append(table[table['Team'] == row['HomeTeam']]['HAS'].values)
    f_HDS.append(table[table['Team'] == row['HomeTeam']]['HDS'].values)
    f_AAS.append(table[table['Team'] == row['AwayTeam']]['AAS'].values)
    f_ADS.append(table[table['Team'] == row['AwayTeam']]['ADS'].values)
    
feature_table['HAS'] = f_HAS
feature_table['HDS'] = f_HDS
feature_table['AAS'] = f_AAS
feature_table['ADS'] = f_ADS
feature_table.head()

Unnamed: 0,HomeTeam,AwayTeam,FTR,HST,AST,HAS,HDS,AAS,ADS
0,Fulham,Arsenal,A,2.0,6.0,[1.28],[1.649],[2.841],[1.624]
1,Crystal Palace,Southampton,H,3.0,5.0,[0.675],[1.056],[1.124],[1.251]
2,Liverpool,Leeds,H,6.0,3.0,[2.642],[1.352],[0.389],[0.387]
3,West Ham,Newcastle,A,3.0,2.0,[1.59],[2.004],[1.523],[1.959]
4,West Brom,Leicester,A,1.0,7.0,[1.077],[1.741],[1.06],[0.845]


In [33]:
#method to convert the datatype of certain colummns

from array import *
def conversion(array_array,col_name):
    list_array = array_array.tolist()
    list_list = []
    for arr in list_array:
        lst = arr.tolist()
        list_list.append(lst)
    df_col = pd.DataFrame(list_list, columns = [col_name])
    return df_col

In [34]:
df_has = conversion(feature_table["HAS"], "has")
df_hds = conversion(feature_table["HDS"], "hds")
df_aas = conversion(feature_table["AAS"], "aas")
df_ads = conversion(feature_table["ADS"], "ads")

In [35]:
final_table = feature_table.join([df_has,df_hds,df_aas,df_ads])

In [36]:
final_table.drop(columns=["HAS","HDS","AAS","ADS"],inplace=True)

In [37]:
final_table

Unnamed: 0,HomeTeam,AwayTeam,FTR,HST,AST,has,hds,aas,ads
0,Fulham,Arsenal,A,2.0,6.0,1.280,1.649,2.841,1.624
1,Crystal Palace,Southampton,H,3.0,5.0,0.675,1.056,1.124,1.251
2,Liverpool,Leeds,H,6.0,3.0,2.642,1.352,0.389,0.387
3,West Ham,Newcastle,A,3.0,2.0,1.590,2.004,1.523,1.959
4,West Brom,Leicester,A,1.0,7.0,1.077,1.741,1.060,0.845
...,...,...,...,...,...,...,...,...,...
7126,Leeds,Aston Villa,H,5.0,6.0,0.273,0.365,1.649,1.827
7127,Man City,Southampton,A,7.0,3.0,2.786,1.508,1.124,1.251
7128,Sunderland,Arsenal,A,3.0,13.0,0.908,1.479,2.841,1.624
7129,Tottenham,Blackburn,A,7.0,9.0,2.402,1.766,0.978,1.151


In [38]:
final_table.rename(columns = {'has':'HAS', 'hds':'HDS','aas':'AAS', 'ads':'ADS'}, inplace = True)

In [39]:
final_table

Unnamed: 0,HomeTeam,AwayTeam,FTR,HST,AST,HAS,HDS,AAS,ADS
0,Fulham,Arsenal,A,2.0,6.0,1.280,1.649,2.841,1.624
1,Crystal Palace,Southampton,H,3.0,5.0,0.675,1.056,1.124,1.251
2,Liverpool,Leeds,H,6.0,3.0,2.642,1.352,0.389,0.387
3,West Ham,Newcastle,A,3.0,2.0,1.590,2.004,1.523,1.959
4,West Brom,Leicester,A,1.0,7.0,1.077,1.741,1.060,0.845
...,...,...,...,...,...,...,...,...,...
7126,Leeds,Aston Villa,H,5.0,6.0,0.273,0.365,1.649,1.827
7127,Man City,Southampton,A,7.0,3.0,2.786,1.508,1.124,1.251
7128,Sunderland,Arsenal,A,3.0,13.0,0.908,1.479,2.841,1.624
7129,Tottenham,Blackburn,A,7.0,9.0,2.402,1.766,0.978,1.151


In [41]:
#Converts FTR - Full Time Results (Home,Away or Draw) into numeric values

def transformResult(row):
    if(row.FTR == 'H'):
        return 1
    elif(row.FTR == 'A'):
        return -1
    else:
        return 0

In [42]:
final_table["Result"] = final_table.apply(lambda row: transformResult(row),axis=1)
final_table.tail()

Unnamed: 0,HomeTeam,AwayTeam,FTR,HST,AST,HAS,HDS,AAS,ADS,Result
7126,Leeds,Aston Villa,H,5.0,6.0,0.273,0.365,1.649,1.827,1
7127,Man City,Southampton,A,7.0,3.0,2.786,1.508,1.124,1.251,-1
7128,Sunderland,Arsenal,A,3.0,13.0,0.908,1.479,2.841,1.624,-1
7129,Tottenham,Blackburn,A,7.0,9.0,2.402,1.766,0.978,1.151,-1
7130,West Brom,Newcastle,D,3.0,9.0,1.077,1.741,1.523,1.959,0


In [43]:
#exporting final_table to ./data folder of the project

final_table.to_csv(r'./data/final_table.csv',index=False)

### Prediction Model

In [44]:
from numpy import loadtxt
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [45]:
final_table.dropna(inplace=True)
final_table

Unnamed: 0,HomeTeam,AwayTeam,FTR,HST,AST,HAS,HDS,AAS,ADS,Result
0,Fulham,Arsenal,A,2.0,6.0,1.280,1.649,2.841,1.624,-1
1,Crystal Palace,Southampton,H,3.0,5.0,0.675,1.056,1.124,1.251,1
2,Liverpool,Leeds,H,6.0,3.0,2.642,1.352,0.389,0.387,1
3,West Ham,Newcastle,A,3.0,2.0,1.590,2.004,1.523,1.959,-1
4,West Brom,Leicester,A,1.0,7.0,1.077,1.741,1.060,0.845,-1
...,...,...,...,...,...,...,...,...,...,...
7126,Leeds,Aston Villa,H,5.0,6.0,0.273,0.365,1.649,1.827,1
7127,Man City,Southampton,A,7.0,3.0,2.786,1.508,1.124,1.251,-1
7128,Sunderland,Arsenal,A,3.0,13.0,0.908,1.479,2.841,1.624,-1
7129,Tottenham,Blackburn,A,7.0,9.0,2.402,1.766,0.978,1.151,-1


In [46]:
final_table.isnull().values.any()

False

In [None]:
X_train = final_table[['HST','AST','HAS','HDS','AAS','ADS',]]
y_train = final_table['Result']

In [None]:
from sklearn.model_selection import train_test_split
X_train,test_X,y_train,test_y=train_test_split(X_train,y_train,random_state=0)

In [None]:
model1 = LogisticRegression()
model1.fit(X_train, y_train)
y_pred1 = model1.fit(X_train,y_train).predict(X_train)
y_pred1

array([-1,  1,  1, ...,  1, -1,  1])

In [None]:
accuracy_score(y_pred1,y_train)

0.5679820460071068

In [None]:
model2 = KNeighborsClassifier()
model2.fit(X_train, y_train)
y_pred2 = model2.fit(X_train,y_train).predict(X_train)
y_pred2

array([-1, -1,  1, ...,  1, -1,  1])

In [None]:
accuracy_score(y_pred2,y_train)

0.6495230970637741

In [None]:
model3 =  LinearSVC()
y_pred3 = model3.fit(X_train, y_train).predict(X_train)
y_pred3



array([-1,  1,  1, ...,  1, -1,  1])

In [None]:
accuracy_score(y_pred3,y_train)

0.5676080044884982

### Predicting Recent Performance

In [None]:
#dropping certain columns not needed for recent performance prediction purposes

pred_table = final_table.drop(['HST','AST'],axis=1)
pred_table

Unnamed: 0,HomeTeam,AwayTeam,FTR,HAS,HDS,AAS,ADS,Result
0,Fulham,Arsenal,A,1.280,1.649,2.841,1.624,-1
1,Crystal Palace,Southampton,H,0.675,1.056,1.124,1.251,1
2,Liverpool,Leeds,H,2.642,1.352,0.389,0.387,1
3,West Ham,Newcastle,A,1.590,2.004,1.523,1.959,-1
4,West Brom,Leicester,A,1.077,1.741,1.060,0.845,-1
...,...,...,...,...,...,...,...,...
7126,Leeds,Aston Villa,H,0.273,0.365,1.649,1.827,1
7127,Man City,Southampton,A,2.786,1.508,1.124,1.251,-1
7128,Sunderland,Arsenal,A,0.908,1.479,2.841,1.624,-1
7129,Tottenham,Blackburn,A,2.402,1.766,0.978,1.151,-1


In [None]:
# Adding next week fixtures from understat.com

new_fixtures = pd.DataFrame( [['Crystal Palace','Leeds','D',0,0,0,0],
                             ['West Ham','Arsenal','D',0,0,0,0],
                             ['Everton','Chelsea','D',0,0,0,0],
                             ['Newcastle','Liverpool','D',0,0,0,0],
                             ['Wolves','Brighton','D',0,0,0,0],
                             ['Watford','Burnley','D',0,0,0,0],
                             ['Tottenham','Leicester','D',0,0,0,0],
                             ['Southampton','Crystal Palace','D',0,0,0,0],
                             ['Aston Villa','Norwich','D',0,0,0,0],
                             ['Leeds','Man City','D',0,0,0,0]],columns=['HomeTeam','AwayTeam', 'FTR',
                                                                            'HAS','HDS','AAS','ADS'])

In [None]:
new_fixtures.head()

Unnamed: 0,HomeTeam,AwayTeam,FTR,HAS,HDS,AAS,ADS
0,Crystal Palace,Leeds,D,0,0,0,0
1,West Ham,Arsenal,D,0,0,0,0
2,Everton,Chelsea,D,0,0,0,0
3,Newcastle,Liverpool,D,0,0,0,0
4,Wolves,Brighton,D,0,0,0,0


In [None]:
#Converts results (H,A or D) into numeric values

def transformResult(row):
    if(row.FTR == 'H'):
        return 1
    elif(row.FTR == 'A'):
        return -1
    else:
        return 0

In [None]:
new_fixtures["Result"] = new_fixtures.apply(lambda row: transformResult(row),axis=1)
new_fixtures.head()

Unnamed: 0,HomeTeam,AwayTeam,FTR,HAS,HDS,AAS,ADS,Result
0,Crystal Palace,Leeds,D,0,0,0,0,0
1,West Ham,Arsenal,D,0,0,0,0,0
2,Everton,Chelsea,D,0,0,0,0,0
3,Newcastle,Liverpool,D,0,0,0,0,0
4,Wolves,Brighton,D,0,0,0,0,0


In [None]:
new_HAS = []
new_HDS = []
new_AAS = []
new_ADS = []
for index,row in new_fixtures.iterrows():
    new_HAS.append(table[table['Team'] == row['HomeTeam']]['HAS'].values[0])
    new_HDS.append(table[table['Team'] == row['HomeTeam']]['HDS'].values[0])
    new_AAS.append(table[table['Team'] == row['AwayTeam']]['AAS'].values[0])
    new_ADS.append(table[table['Team'] == row['AwayTeam']]['ADS'].values[0])
    
new_fixtures['HAS'] = new_HAS
new_fixtures['HDS'] = new_HDS
new_fixtures['AAS'] = new_AAS
new_fixtures['ADS'] = new_ADS
new_fixtures

Unnamed: 0,HomeTeam,AwayTeam,FTR,HAS,HDS,AAS,ADS,Result
0,Crystal Palace,Leeds,D,0.675,1.056,0.389,0.387,0
1,West Ham,Arsenal,D,1.59,2.004,2.841,1.624,0
2,Everton,Chelsea,D,2.096,1.858,2.778,1.332,0
3,Newcastle,Liverpool,D,1.69,1.917,2.709,1.561,0
4,Wolves,Brighton,D,0.587,0.91,0.306,0.413,0
5,Watford,Burnley,D,0.513,0.773,0.598,0.856,0
6,Tottenham,Leicester,D,2.402,1.766,1.06,0.845,0
7,Southampton,Crystal Palace,D,1.173,1.333,0.924,1.026,0
8,Aston Villa,Norwich,D,1.376,1.717,0.394,0.841,0
9,Leeds,Man City,D,0.273,0.365,2.758,1.601,0


In [None]:
train_x = pred_table[['HAS','HDS','AAS','ADS']]
train_y = pred_table['Result']

In [None]:
X_test = new_fixtures[['HAS','HDS','AAS','ADS']]
y_test = new_fixtures['Result']

In [None]:
clf = KNeighborsClassifier()
model = clf.fit(train_x,train_y)
pred = clf.predict(X_test)

In [None]:
pred

array([ 0, -1, -1, -1,  0, -1,  1,  1,  1,  1])

In [None]:
this_week = new_fixtures[['HomeTeam','AwayTeam']]
this_week['Result']=pred

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  this_week['Result']=pred


In [None]:
this_week

Unnamed: 0,HomeTeam,AwayTeam,Result
0,Crystal Palace,Leeds,0
1,West Ham,Arsenal,-1
2,Everton,Chelsea,-1
3,Newcastle,Liverpool,-1
4,Wolves,Brighton,0
5,Watford,Burnley,-1
6,Tottenham,Leicester,1
7,Southampton,Crystal Palace,1
8,Aston Villa,Norwich,1
9,Leeds,Man City,1


In [None]:
def transformResultBack(row,col_name):
    if(row[col_name] == 1):
        return 'H'
    elif(row[col_name] == -1):
        return 'A'
    else:
        return 'D'

In [None]:
this_week["Full Time Result"] = this_week.apply(lambda row: transformResultBack(row,"Result"),axis=1)

this_week.drop(["Result"],axis=1,inplace=True)
this_week

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  this_week["Full Time Result"] = this_week.apply(lambda row: transformResultBack(row,"Result"),axis=1)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().drop(


Unnamed: 0,HomeTeam,AwayTeam,Full Time Result
0,Crystal Palace,Leeds,D
1,West Ham,Arsenal,A
2,Everton,Chelsea,A
3,Newcastle,Liverpool,A
4,Wolves,Brighton,D
5,Watford,Burnley,A
6,Tottenham,Leicester,H
7,Southampton,Crystal Palace,H
8,Aston Villa,Norwich,H
9,Leeds,Man City,H


In [78]:
this_week.to_csv(r'./data/this_week.csv',index=False)

In [62]:
df.head()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,PSH,PSD,PSA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA,B365>2.5,B365<2.5,P>2.5,P<2.5,Max>2.5,Max<2.5,Avg>2.5,Avg<2.5,AHh,B365AHH,B365AHA,PAHH,PAHA,MaxAHH,MaxAHA,AvgAHH,AvgAHA,B365CH,B365CD,B365CA,BWCH,BWCD,BWCA,IWCH,IWCD,IWCA,PSCH,PSCD,PSCA,WHCH,WHCD,WHCA,VCCH,VCCD,VCCA,MaxCH,MaxCD,MaxCA,AvgCH,AvgCD,AvgCA,B365C>2.5,B365C<2.5,PC>2.5,PC<2.5,MaxC>2.5,MaxC<2.5,AvgC>2.5,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,LBH,LBD,LBA,SJH,SJD,SJA,GBH,GBD,GBA,BSH,BSD,BSA,SBH,SBD,SBA,GB>2.5,GB<2.5,GBAHH,GBAHA,GBAH,LBAHH,LBAHA,LBAH,B365AH,SOH,SOD,SOA,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52
0,E0,12/09/2020,12:30,Fulham,Arsenal,0.0,3.0,A,0.0,1.0,A,C Kavanagh,5.0,13.0,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0,6.0,4.33,1.53,5.5,4.25,1.57,6.0,3.9,1.57,6.16,4.51,1.56,6.5,4.2,1.53,6.5,4.2,1.55,6.55,4.55,1.6,5.94,4.34,1.55,1.72,2.1,1.8,2.13,1.84,2.18,1.76,2.1,1.0,1.93,1.97,1.96,1.96,2.0,1.99,1.93,1.95,5.0,4.0,1.66,5.5,4.0,1.62,5.25,3.9,1.67,5.48,3.98,1.69,5.5,3.8,1.65,5.5,3.9,1.67,5.75,4.2,1.71,5.36,3.93,1.67,2.0,1.8,2.06,1.86,2.1,1.92,2.0,1.84,0.75,2.01,1.89,2.02,1.91,2.13,1.92,2.02,1.87,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,E0,12/09/2020,15:00,Crystal Palace,Southampton,1.0,0.0,H,1.0,0.0,H,J Moss,5.0,9.0,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0,3.1,3.25,2.37,3.0,3.2,2.45,3.15,2.95,2.4,3.32,3.29,2.4,3.2,3.2,2.35,3.2,3.2,2.4,3.36,3.36,2.5,3.18,3.22,2.39,2.2,1.66,2.34,1.68,2.36,1.73,2.24,1.67,0.25,1.85,2.05,1.88,2.05,1.88,2.07,1.84,2.03,3.0,3.25,2.4,3.0,3.3,2.4,3.05,2.9,2.45,3.09,3.27,2.54,3.1,3.1,2.45,3.1,3.25,2.45,3.25,3.33,2.55,3.08,3.22,2.47,2.2,1.66,2.26,1.72,2.27,1.78,2.18,1.7,0.25,1.78,2.13,1.79,2.17,1.85,2.18,1.79,2.12,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,E0,12/09/2020,17:30,Liverpool,Leeds,4.0,3.0,H,3.0,2.0,H,M Oliver,22.0,6.0,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0,1.28,6.0,9.5,1.26,6.25,10.5,1.35,5.0,8.5,1.31,6.25,9.92,1.27,6.0,10.0,1.3,5.75,10.5,1.35,6.5,10.75,1.3,5.96,9.68,1.53,2.5,1.56,2.6,1.56,2.68,1.52,2.53,-1.5,1.95,1.95,1.97,1.95,2.0,2.08,1.9,1.97,1.25,6.0,11.0,1.25,6.25,11.0,1.3,6.0,9.0,1.28,6.34,11.38,1.25,6.0,12.0,1.29,6.0,11.5,1.3,6.75,12.27,1.28,6.16,10.63,1.5,2.62,1.51,2.76,1.53,2.82,1.5,2.62,-1.5,1.85,2.05,1.85,2.08,1.9,2.16,1.84,2.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,E0,12/09/2020,20:00,West Ham,Newcastle,0.0,2.0,A,0.0,0.0,D,S Attwell,15.0,15.0,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0,2.15,3.4,3.4,2.15,3.4,3.4,2.15,3.15,3.4,2.18,3.61,3.5,2.15,3.5,3.4,2.15,3.4,3.6,2.24,3.7,3.6,2.15,3.48,3.42,1.9,1.9,2.0,1.91,2.05,1.95,1.97,1.86,-0.5,2.07,1.72,2.17,1.78,2.17,1.81,2.12,1.75,1.95,3.6,3.75,1.95,3.7,3.75,2.05,3.25,3.75,2.04,3.59,3.92,2.0,3.5,3.8,2.0,3.5,3.9,2.07,3.78,3.99,2.01,3.57,3.79,1.9,1.9,2.0,1.92,2.0,2.05,1.91,1.92,-0.5,2.03,1.87,2.04,1.88,2.09,1.91,2.02,1.86,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,E0,13/09/2020,14:00,West Brom,Leicester,0.0,3.0,A,0.0,0.0,D,A Taylor,7.0,13.0,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0,3.8,3.6,1.95,3.7,3.6,2.0,3.85,3.2,2.0,4.0,3.59,2.0,3.8,3.6,1.95,4.0,3.5,1.95,4.0,3.82,2.04,3.87,3.57,1.97,1.9,1.9,2.0,1.91,2.02,2.03,1.92,1.9,0.5,1.91,1.99,1.92,2.0,1.93,2.02,1.88,1.97,3.25,3.4,2.2,3.3,3.4,2.2,3.35,3.0,2.3,3.38,3.38,2.32,3.3,3.3,2.25,3.3,3.3,2.3,3.55,3.5,2.38,3.32,3.33,2.28,2.2,1.66,2.23,1.74,2.28,1.82,2.15,1.73,0.25,1.92,1.98,1.93,1.99,1.95,2.01,1.91,1.97,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [63]:
def change_into_datetime(col):
    df[col]=pd.to_datetime(df[col])
change_into_datetime("Date")
df.head()

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,PSH,PSD,PSA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA,B365>2.5,B365<2.5,P>2.5,P<2.5,Max>2.5,Max<2.5,Avg>2.5,Avg<2.5,AHh,B365AHH,B365AHA,PAHH,PAHA,MaxAHH,MaxAHA,AvgAHH,AvgAHA,B365CH,B365CD,B365CA,BWCH,BWCD,BWCA,IWCH,IWCD,IWCA,PSCH,PSCD,PSCA,WHCH,WHCD,WHCA,VCCH,VCCD,VCCA,MaxCH,MaxCD,MaxCA,AvgCH,AvgCD,AvgCA,B365C>2.5,B365C<2.5,PC>2.5,PC<2.5,MaxC>2.5,MaxC<2.5,AvgC>2.5,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,LBH,LBD,LBA,SJH,SJD,SJA,GBH,GBD,GBA,BSH,BSD,BSA,SBH,SBD,SBA,GB>2.5,GB<2.5,GBAHH,GBAHA,GBAH,LBAHH,LBAHA,LBAH,B365AH,SOH,SOD,SOA,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52
0,E0,2020-12-09,12:30,Fulham,Arsenal,0.0,3.0,A,0.0,1.0,A,C Kavanagh,5.0,13.0,2.0,6.0,12.0,12.0,2.0,3.0,2.0,2.0,0.0,0.0,6.0,4.33,1.53,5.5,4.25,1.57,6.0,3.9,1.57,6.16,4.51,1.56,6.5,4.2,1.53,6.5,4.2,1.55,6.55,4.55,1.6,5.94,4.34,1.55,1.72,2.1,1.8,2.13,1.84,2.18,1.76,2.1,1.0,1.93,1.97,1.96,1.96,2.0,1.99,1.93,1.95,5.0,4.0,1.66,5.5,4.0,1.62,5.25,3.9,1.67,5.48,3.98,1.69,5.5,3.8,1.65,5.5,3.9,1.67,5.75,4.2,1.71,5.36,3.93,1.67,2.0,1.8,2.06,1.86,2.1,1.92,2.0,1.84,0.75,2.01,1.89,2.02,1.91,2.13,1.92,2.02,1.87,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,E0,2020-12-09,15:00,Crystal Palace,Southampton,1.0,0.0,H,1.0,0.0,H,J Moss,5.0,9.0,3.0,5.0,14.0,11.0,7.0,3.0,2.0,1.0,0.0,0.0,3.1,3.25,2.37,3.0,3.2,2.45,3.15,2.95,2.4,3.32,3.29,2.4,3.2,3.2,2.35,3.2,3.2,2.4,3.36,3.36,2.5,3.18,3.22,2.39,2.2,1.66,2.34,1.68,2.36,1.73,2.24,1.67,0.25,1.85,2.05,1.88,2.05,1.88,2.07,1.84,2.03,3.0,3.25,2.4,3.0,3.3,2.4,3.05,2.9,2.45,3.09,3.27,2.54,3.1,3.1,2.45,3.1,3.25,2.45,3.25,3.33,2.55,3.08,3.22,2.47,2.2,1.66,2.26,1.72,2.27,1.78,2.18,1.7,0.25,1.78,2.13,1.79,2.17,1.85,2.18,1.79,2.12,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,E0,2020-12-09,17:30,Liverpool,Leeds,4.0,3.0,H,3.0,2.0,H,M Oliver,22.0,6.0,6.0,3.0,9.0,6.0,9.0,0.0,1.0,0.0,0.0,0.0,1.28,6.0,9.5,1.26,6.25,10.5,1.35,5.0,8.5,1.31,6.25,9.92,1.27,6.0,10.0,1.3,5.75,10.5,1.35,6.5,10.75,1.3,5.96,9.68,1.53,2.5,1.56,2.6,1.56,2.68,1.52,2.53,-1.5,1.95,1.95,1.97,1.95,2.0,2.08,1.9,1.97,1.25,6.0,11.0,1.25,6.25,11.0,1.3,6.0,9.0,1.28,6.34,11.38,1.25,6.0,12.0,1.29,6.0,11.5,1.3,6.75,12.27,1.28,6.16,10.63,1.5,2.62,1.51,2.76,1.53,2.82,1.5,2.62,-1.5,1.85,2.05,1.85,2.08,1.9,2.16,1.84,2.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,E0,2020-12-09,20:00,West Ham,Newcastle,0.0,2.0,A,0.0,0.0,D,S Attwell,15.0,15.0,3.0,2.0,13.0,7.0,8.0,7.0,2.0,2.0,0.0,0.0,2.15,3.4,3.4,2.15,3.4,3.4,2.15,3.15,3.4,2.18,3.61,3.5,2.15,3.5,3.4,2.15,3.4,3.6,2.24,3.7,3.6,2.15,3.48,3.42,1.9,1.9,2.0,1.91,2.05,1.95,1.97,1.86,-0.5,2.07,1.72,2.17,1.78,2.17,1.81,2.12,1.75,1.95,3.6,3.75,1.95,3.7,3.75,2.05,3.25,3.75,2.04,3.59,3.92,2.0,3.5,3.8,2.0,3.5,3.9,2.07,3.78,3.99,2.01,3.57,3.79,1.9,1.9,2.0,1.92,2.0,2.05,1.91,1.92,-0.5,2.03,1.87,2.04,1.88,2.09,1.91,2.02,1.86,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,E0,2020-09-13,14:00,West Brom,Leicester,0.0,3.0,A,0.0,0.0,D,A Taylor,7.0,13.0,1.0,7.0,12.0,9.0,2.0,5.0,1.0,1.0,0.0,0.0,3.8,3.6,1.95,3.7,3.6,2.0,3.85,3.2,2.0,4.0,3.59,2.0,3.8,3.6,1.95,4.0,3.5,1.95,4.0,3.82,2.04,3.87,3.57,1.97,1.9,1.9,2.0,1.91,2.02,2.03,1.92,1.9,0.5,1.91,1.99,1.92,2.0,1.93,2.02,1.88,1.97,3.25,3.4,2.2,3.3,3.4,2.2,3.35,3.0,2.3,3.38,3.38,2.32,3.3,3.3,2.25,3.3,3.3,2.3,3.55,3.5,2.38,3.32,3.33,2.28,2.2,1.66,2.23,1.74,2.28,1.82,2.15,1.73,0.25,1.92,1.98,1.93,1.99,1.95,2.01,1.91,1.97,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


slicing first 322 matches

In [67]:
df_complete = df.iloc[323: , :]
df_complete = df_complete.sort_index(ascending=False)
df_complete

Unnamed: 0,Div,Date,Time,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR,B365H,B365D,B365A,BWH,BWD,BWA,IWH,IWD,IWA,PSH,PSD,PSA,WHH,WHD,WHA,VCH,VCD,VCA,MaxH,MaxD,MaxA,AvgH,AvgD,AvgA,B365>2.5,B365<2.5,P>2.5,P<2.5,Max>2.5,Max<2.5,Avg>2.5,Avg<2.5,AHh,B365AHH,B365AHA,PAHH,PAHA,MaxAHH,MaxAHA,AvgAHH,AvgAHA,B365CH,B365CD,B365CA,BWCH,BWCD,BWCA,IWCH,IWCD,IWCA,PSCH,PSCD,PSCA,WHCH,WHCD,WHCA,VCCH,VCCD,VCCA,MaxCH,MaxCD,MaxCA,AvgCH,AvgCD,AvgCA,B365C>2.5,B365C<2.5,PC>2.5,PC<2.5,MaxC>2.5,MaxC<2.5,AvgC>2.5,AvgC<2.5,AHCh,B365CAHH,B365CAHA,PCAHH,PCAHA,MaxCAHH,MaxCAHA,AvgCAHH,AvgCAHA,Bb1X2,BbMxH,BbAvH,BbMxD,BbAvD,BbMxA,BbAvA,BbOU,BbMx>2.5,BbAv>2.5,BbMx<2.5,BbAv<2.5,BbAH,BbAHh,BbMxAHH,BbAvAHH,BbMxAHA,BbAvAHA,LBH,LBD,LBA,SJH,SJD,SJA,GBH,GBD,GBA,BSH,BSD,BSA,SBH,SBD,SBA,GB>2.5,GB<2.5,GBAHH,GBAHA,GBAH,LBAHH,LBAHA,LBAH,B365AH,SOH,SOD,SOA,Unnamed: 48,Unnamed: 49,Unnamed: 50,Unnamed: 51,Unnamed: 52
7130,E0,2003-11-05,,West Brom,Newcastle,2.0,2.0,D,0.0,1.0,A,G Barber,8.0,14.0,3.0,9.0,9.0,8.0,2.0,8.0,0.0,0.0,0.0,0.0,4.000,3.50,1.727,,,,,,,,,,3.75,3.3,1.80,,,,,,,,,,1.80,2.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.500,3.20,1.909,,,,3.85,3.40,1.82,,,,3.750,3.4,1.800,1.75,1.95,,,,,,,,3.75,3.4,1.80,,,,,
7129,E0,2003-11-05,,Tottenham,Blackburn,0.0,4.0,A,0.0,2.0,A,A D'Urso,11.0,13.0,7.0,9.0,9.0,12.0,6.0,3.0,1.0,1.0,1.0,0.0,2.500,3.50,2.300,,,,,,,,,,2.62,3.5,2.20,,,,,,,,,,1.85,1.95,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,3.000,3.20,2.100,,,,2.60,3.30,2.40,,,,2.600,3.4,2.300,1.80,1.90,,,,,,,,2.63,3.4,2.25,,,,,
7128,E0,2003-11-05,,Sunderland,Arsenal,0.0,4.0,A,0.0,2.0,A,P Durkin,7.0,20.0,3.0,13.0,16.0,12.0,6.0,10.0,1.0,0.0,0.0,0.0,5.000,3.40,1.615,,,,,,,,,,4.20,3.5,1.66,,,,,,,,,,1.75,2.05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,4.000,3.40,1.727,,,,4.75,3.60,1.63,,,,4.500,3.6,1.615,1.70,2.00,,,,,,,,5.00,3.4,1.62,,,,,
7127,E0,2003-11-05,,Man City,Southampton,0.0,1.0,A,0.0,1.0,A,M Dean,17.0,6.0,7.0,3.0,14.0,14.0,10.0,4.0,3.0,4.0,0.0,0.0,1.615,3.40,5.000,,,,,,,,,,1.61,3.5,4.50,,,,,,,,,,1.75,2.05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.615,3.50,4.500,,,,1.65,3.50,4.75,,,,1.615,3.4,5.000,1.70,2.00,,,,,,,,1.53,3.5,5.50,,,,,
7126,E0,2003-11-05,,Leeds,Aston Villa,3.0,1.0,H,1.0,1.0,D,M Halsey,11.0,18.0,5.0,6.0,18.0,14.0,1.0,8.0,0.0,1.0,0.0,0.0,1.833,3.25,3.750,,,,,,,,,,1.80,3.3,3.75,,,,,,,,,,1.80,2.00,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.800,3.25,3.750,,,,1.83,3.35,3.85,,,,1.800,3.4,3.750,1.75,1.95,,,,,,,,1.80,3.4,3.75,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
327,E0,2021-04-26,20:00,Leicester,Crystal Palace,2.0,1.0,H,0.0,1.0,A,G Scott,13.0,4.0,5.0,2.0,13.0,12.0,7.0,2.0,0.0,1.0,0.0,0.0,1.400,4.33,8.500,1.45,4.4,7.50,1.47,4.30,7.00,1.48,4.36,8.24,1.47,4.2,8.00,1.45,4.2,8.00,1.50,4.46,8.50,1.47,4.26,7.71,2.00,1.80,2.09,1.81,2.10,1.85,2.04,1.79,-1.00,1.85,2.05,1.85,2.09,1.88,2.11,1.82,2.06,1.40,4.75,8.00,1.42,4.5,8.00,1.43,4.60,7.50,1.44,4.75,8.25,1.42,4.5,8.00,1.44,4.5,8.00,1.47,4.84,8.54,1.43,4.64,7.95,1.72,2.10,1.78,2.15,1.84,2.28,1.76,2.09,-1.25,2.07,1.86,2.07,1.87,2.09,1.91,2.03,1.86,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
326,E0,2021-04-25,19:00,Aston Villa,West Brom,2.0,2.0,D,1.0,1.0,D,S Attwell,24.0,10.0,11.0,4.0,14.0,9.0,11.0,4.0,0.0,2.0,0.0,0.0,1.850,3.60,4.200,1.87,3.6,4.20,1.85,3.65,4.20,1.90,3.80,4.28,1.88,3.6,4.20,1.83,3.6,4.40,1.93,3.82,4.40,1.88,3.67,4.19,1.90,1.90,1.95,1.95,2.06,1.95,1.94,1.88,-0.50,1.87,2.03,1.90,2.03,1.93,2.06,1.88,2.01,2.05,3.50,3.60,2.00,3.5,3.75,2.10,3.45,3.60,2.10,3.57,3.76,2.05,3.5,3.70,2.05,3.5,3.75,2.22,3.60,3.90,2.08,3.48,3.66,1.95,1.95,1.94,1.96,1.97,2.05,1.91,1.92,-0.25,1.80,2.14,1.81,2.13,1.85,2.14,1.80,2.09,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
325,E0,2021-04-25,14:00,Leeds,Man United,0.0,0.0,D,0.0,0.0,D,C Pawson,6.0,16.0,3.0,4.0,21.0,11.0,2.0,6.0,4.0,1.0,0.0,0.0,4.000,4.20,1.750,4.10,4.0,1.80,4.00,4.10,1.80,4.17,4.20,1.83,4.00,4.0,1.80,4.10,3.9,1.80,4.30,4.25,1.92,4.05,4.05,1.82,1.57,2.37,1.58,2.52,1.65,2.52,1.59,2.40,0.75,1.83,2.07,1.86,2.08,1.87,2.13,1.83,2.05,4.20,4.00,1.75,4.25,3.9,1.80,4.30,3.85,1.80,4.42,3.98,1.83,4.33,3.9,1.78,4.50,3.8,1.80,4.65,4.11,1.85,4.35,3.91,1.80,1.80,2.00,1.85,2.06,1.88,2.17,1.79,2.05,0.75,1.84,2.09,1.86,2.07,1.96,2.12,1.86,2.04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
324,E0,2021-04-25,12:00,Wolves,Burnley,0.0,4.0,A,0.0,3.0,A,D England,12.0,14.0,2.0,7.0,8.0,9.0,8.0,5.0,1.0,2.0,0.0,0.0,2.000,3.40,3.900,2.00,3.4,3.90,2.10,3.25,3.85,2.12,3.32,3.99,2.05,3.3,3.80,2.05,3.2,4.00,2.15,3.46,4.05,2.07,3.31,3.88,2.30,1.61,2.35,1.65,2.42,1.69,2.31,1.62,-0.50,2.08,1.82,2.12,1.82,2.14,1.87,2.08,1.82,2.30,3.10,3.40,2.25,3.1,3.50,2.35,3.05,3.35,2.44,3.06,3.50,2.20,3.2,3.60,2.38,3.1,3.40,2.46,3.40,3.65,2.35,3.09,3.42,2.50,1.53,2.58,1.56,2.58,1.61,2.43,1.57,-0.25,2.05,1.88,2.05,1.88,2.08,1.98,2.02,1.87,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [None]:
df_analysis = df_complete.sort_index(ascending=False)