# Reading datasets forming a new one

In [1]:
import pandas as pd
import os
from scipy.stats import poisson

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

directory = 'C:/Users/99451/Desktop/MODEL/eng_prem'

dfs = []

# Define the columns you want to extract from each CSV file
columns_to_keep = ['Date', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'B365H', 'B365D', 'B365A']

for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        df = pd.read_csv(os.path.join(directory, filename), on_bad_lines = 'skip', encoding='latin-1')
        df = df[columns_to_keep]
        dfs.append(df)

df = pd.concat(dfs, ignore_index=True)

print("Number of rows:", df.shape[0])
print("Number of columns:", df.shape[1])
df.head()

Number of rows: 8214
Number of columns: 9


Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,B365H,B365D,B365A
0,17/08/2002,Blackburn,Sunderland,0.0,0.0,D,1.727,3.25,4.333
1,17/08/2002,Charlton,Chelsea,2.0,3.0,A,2.8,3.25,2.2
2,17/08/2002,Everton,Tottenham,2.0,2.0,D,2.25,3.25,2.75
3,17/08/2002,Fulham,Bolton,4.0,1.0,H,1.727,3.25,4.333
4,17/08/2002,Leeds,Man City,3.0,0.0,H,1.667,3.4,4.5


# Adding Moving Averages

In [2]:
#Calculating scored and conceded goals
df['FTHGS'] = df.groupby('HomeTeam')['FTHG'].transform(lambda x: x.rolling(5, min_periods=1).mean().shift(1))
df['FTHGC'] = df.groupby('HomeTeam')['FTAG'].transform(lambda x: x.rolling(5, min_periods=1).mean().shift(1))

df['FTAGS'] = df.groupby('AwayTeam')['FTAG'].transform(lambda x: x.rolling(5, min_periods=1).mean().shift(1))
df['FTAGC'] = df.groupby('AwayTeam')['FTHG'].transform(lambda x: x.rolling(5, min_periods=1).mean().shift(1))

#Calculating odds probabilities
total = 1 / df['B365H'] + 1 / df['B365D'] + 1 / df['B365A']

df['FTH'] = (1 / df['B365H']) / total
df['FTD'] = (1 / df['B365D']) / total
df['FTA'] = (1 / df['B365A']) / total
df['FTDA'] = df['FTD'] + df['FTA']

# Calculate the league-wide moving average for home_goals and away_goals
df['league_home_goals'] = df['FTHG'].rolling(50, min_periods=1).mean().shift(1)
df['league_away_goals'] = df['FTAG'].rolling(50, min_periods=1).mean().shift(1)

#Calculating Poisson Statistics
df['home_attack'] = df['FTHGS'] / df['league_home_goals']
df['home_defence'] = df['FTHGC'] / df['league_away_goals']
df['away_attack'] = df['FTAGS'] / df['league_away_goals']
df['away_defence'] = df['FTAGC'] / df['league_home_goals']

df['home_xg'] = df['home_attack'] * df['away_defence'] * df['league_home_goals']
df['away_xg'] = df['away_attack'] * df['home_defence'] * df['league_away_goals']
df['xg_dif'] = df['home_xg'] - df['away_xg']

df[df['HomeTeam'] == 'Everton'].head(7)

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,B365H,B365D,B365A,FTHGS,FTHGC,FTAGS,FTAGC,FTH,FTD,FTA,FTDA,league_home_goals,league_away_goals,home_attack,home_defence,away_attack,away_defence,home_xg,away_xg,xg_dif
2,17/08/2002,Everton,Tottenham,2.0,2.0,D,2.25,3.25,2.75,,,,,0.398329,0.275766,0.325905,0.601671,1.0,1.5,,,,,,,
24,28/08/2002,Everton,Birmingham,1.0,1.0,D,1.67,3.4,4.5,2.0,2.0,0.0,2.0,0.536974,0.263749,0.199277,0.463026,1.625,1.041667,1.230769,1.92,0.0,1.230769,2.461538,0.0,2.461538
51,14/09/2002,Everton,Middlesbrough,2.0,1.0,H,2.3,3.2,2.7,1.5,1.5,0.0,0.5,0.389014,0.279604,0.331382,0.610986,1.52,1.1,0.986842,1.363636,0.0,0.328947,0.493421,0.0,0.493421
72,28/09/2002,Everton,Fulham,2.0,0.0,H,2.25,3.25,2.75,1.666667,1.333333,1.666667,1.0,0.398329,0.275766,0.325905,0.601671,1.36,1.04,1.22549,1.282051,1.602564,0.735294,1.22549,2.136752,-0.911262
89,19/10/2002,Everton,Arsenal,2.0,1.0,H,5.5,3.5,1.533,1.75,1.0,2.5,1.0,0.16236,0.255137,0.582504,0.83764,1.42,1.14,1.232394,0.877193,2.192982,0.704225,1.232394,2.192982,-0.960588
123,09/11/2002,Everton,Charlton,1.0,0.0,H,1.667,3.4,4.5,1.8,1.0,0.6,1.0,0.537421,0.263494,0.199085,0.462579,1.3,1.06,1.384615,0.943396,0.566038,0.769231,1.384615,0.566038,0.818578
141,23/11/2002,Everton,West Brom,1.0,0.0,H,1.444,3.6,7.0,1.6,0.6,0.6,1.4,0.622124,0.249541,0.128335,0.377876,1.28,0.94,1.25,0.638298,0.638298,1.09375,1.75,0.382979,1.367021
