In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
np.bool = np.bool_

# Scrape the data from 2019/20
url = 'https://fbref.com/en/comps/9/2019-2020/schedule/2019-2020-Premier-League-Scores-and-Fixtures'

df20 = pd.read_html(url, attrs={'id': 'sched_2019-2020_9_1'})[0]
df20 = df20.dropna(subset=['Score', 'xG', 'xG.1', 'Referee'])
df20.head()

In [None]:
# Scrape the data from 2020/21
url = 'https://fbref.com/en/comps/9/2020-2021/schedule/2020-2021-Premier-League-Scores-and-Fixtures'

df21 = pd.read_html(url, attrs={'id': 'sched_2020-2021_9_1'})[0]
df21 = df21.dropna(subset=['Score', 'xG', 'xG.1', 'Referee'])
first = pd.concat([df20, df21])
first.shape[0]

In [None]:
# Scrape the data from 2021/22
url = 'https://fbref.com/en/comps/9/2021-2022/schedule/2021-2022-Premier-League-Scores-and-Fixtures'

df22 = pd.read_html(url, attrs={'id': 'sched_2021-2022_9_1'})[0]
df22 = df22.dropna(subset=['Score', 'xG', 'xG.1', 'Referee'])
second = pd.concat([first, df22])
second.shape[0]

In [None]:
# Scrape the data from 2022/23
url = 'https://fbref.com/en/comps/9/2022-2023/schedule/2022-2023-Premier-League-Scores-and-Fixtures'

df23 = pd.read_html(url, attrs={'id': 'sched_2022-2023_9_1'})[0]
df23 = df23.dropna(subset=['Score', 'xG', 'xG.1', 'Referee'])
third = pd.concat([second, df23])
third.shape[0]

In [None]:
# Scrape the data from 2023/24
url = 'https://fbref.com/en/comps/9/2023-2024/schedule/2023-2024-Premier-League-Scores-and-Fixtures'

df24 = pd.read_html(url, attrs={'id': 'sched_2023-2024_9_1'})[0]
df24 = df24.dropna(subset=['Score', 'xG', 'xG.1', 'Referee'])
fourth = pd.concat([third, df24])
fourth.shape[0]

In [None]:
# Scrape the data from 2024/25
url = 'https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures'

df25 = pd.read_html(url, attrs={'id': 'sched_2024-2025_9_1'})[0]
df25 = df25.dropna(subset=['Score', 'xG', 'xG.1', 'Referee'])
df25.head()

In [None]:
finaldf = pd.concat([fourth, df25])
finaldf.shape[0]

In [None]:
finaldf.head()

In [None]:
# Split the 'Score' column into 'HomeGoals' and 'AwayGoals' columns
finaldf[['home_goals', 'away_goals']] = finaldf['Score'].str.split('–', expand=True).astype(float)
finaldf.head()

In [None]:
data = finaldf[['Home','Away','xG','xG.1','home_goals','away_goals']]
data.head()

In [None]:
data['HomeGoals']= data['xG']*0.70 + (data['home_goals']*0.30)
data['AwayGoals']= data['xG.1']*0.70 + (data['away_goals']*0.30)
data.head()

In [None]:
data = data.rename(columns={'Home': 'HomeTeam', 'Away': 'AwayTeam'})
data.head()

In [None]:
def team_scoring_prediction(data, home_team, away_team):
    total_home_goals = data['HomeGoals'].sum()
    total_home_goals_mean = (total_home_goals/data.shape[0]).round(2)
    print("Average mean Home goal scored in EPL: " +str(total_home_goals_mean))
    
    total_away_goals = data['AwayGoals'].sum()
    total_away_goals_mean = (total_away_goals/data.shape[0]).round(2)
    print("Average mean Away goal scored in EPL: " +str(total_away_goals_mean))
    
    #Select the home team and away team
    
    home_team = data.loc[data['HomeTeam'] == home_team]
    away_team = data.loc[data['AwayTeam'] == away_team]
    
    #Home goals scored by home team and mean
    
    home_team_home_goals_score = home_team['HomeGoals'].sum()
    home_team_home_goals_score_mean = (home_team_home_goals_score/home_team.shape[0]).round(2)
    print("Average Home goal scored by Home team in EPL: " + str(home_team_home_goals_score_mean))
    
    #Home goals conceded by home team
    
    home_team_home_goals_concede = home_team['AwayGoals'].sum()
    home_team_home_goals_concede_mean = (home_team_home_goals_concede/home_team.shape[0]).round(2)
    print("Average Home goal conceded by Home team in EPL: " + str(home_team_home_goals_concede_mean))
    
    #Home team attacking strength 
    home_team_att_strength = (home_team_home_goals_score_mean/ total_home_goals_mean).round(2)
    print("Home team attacking strength: " + str(home_team_att_strength))
    
    #Home team defensive strength 
    home_team_def_strength = (home_team_home_goals_concede_mean/ total_away_goals_mean).round(2)
    print("Home team defensive strength: " + str(home_team_def_strength))
    
    #Away goals scored by away team and mean
    
    away_team_away_goals_score = away_team['AwayGoals'].sum()
    away_team_away_goals_score_mean = (away_team_away_goals_score/away_team.shape[0]).round(2)
    print("Average Away goal scored by Away team in EPL: " + str(away_team_away_goals_score_mean))
    
    #Away goals conceded by Away team
    
    away_team_away_goals_concede = away_team['HomeGoals'].sum()
    away_team_away_goals_concede_mean = (away_team_away_goals_concede/away_team.shape[0]).round(2)
    print("Average Away goal conceded by Away team in EPL: "+ str(away_team_away_goals_concede_mean))
    
    
    #Away team attacking strength 
    away_team_att_strength = (away_team_away_goals_score_mean/ total_away_goals_mean).round(2)
    print("Away team attacking strength: " + str(away_team_att_strength))
    
    #Away team defensive strength
    away_team_def_strength = (away_team_away_goals_concede_mean/ total_home_goals_mean).round(2)
    print("Away team defensive strength: " + str(away_team_def_strength))
    
    #Home team scoring strength
    home_team_goal_exp = (home_team_att_strength * away_team_def_strength * total_home_goals_mean).round(2)
    #home_team_goal_exp = (home_team_att_strength * away_team_def_strength).round(2)
    print("Home team goal expectance: "+ str(home_team_goal_exp))
    
    #Away team scoring strength
    away_team_goal_exp = (away_team_att_strength * home_team_def_strength * total_away_goals_mean).round(2)
    #away_team_goal_exp = (away_team_att_strength * home_team_def_strength).round(2)
    print("Away team goal expectance: " + str(away_team_goal_exp))
    
    return home_team_goal_exp, away_team_goal_exp

In [None]:
home_team_goal, away_team_goal = team_scoring_prediction(data, 'Manchester City','Arsenal')

In [None]:
from numpy import random
home_team_poission = random.poisson(lam=home_team_goal, size=100000)
away_team_poission = random.poisson(lam=away_team_goal, size=100000)

In [None]:
def home_team_goal_prob(n):
    goals = 0
    for i in range(0, 10000):
        if home_team_poission[i] == n:
            goals = goals+1
            prob = goals/ 10000     
    return prob, goals

In [None]:
def away_team_goal_prob(n):
    goals = 0
    for i in range(0, 10000):
        if away_team_poission[i] == n:
            goals = goals+1
            prob = goals/ 10000     
    return prob, goals

In [None]:
 home_0, g = home_team_goal_prob(0)
print(home_0, g)

home_1, g = home_team_goal_prob(1)
print(home_1, g)

home_2, g = home_team_goal_prob(2)
print(home_2, g)

home_3, g = home_team_goal_prob(3)
print(home_3, g)

home_4, g = home_team_goal_prob(4)
print(home_4, g)

home_5, g = home_team_goal_prob(5)
print(home_5, g)

In [None]:
away_0, g = away_team_goal_prob(0)
print(away_0, g)

away_1, g = away_team_goal_prob(1)
print(away_1, g)

away_2, g = away_team_goal_prob(2)
print(away_2, g)

away_3, g = away_team_goal_prob(3)
print(away_3, g)

away_4, g = away_team_goal_prob(4)
print(away_4, g)

away_5, g = away_team_goal_prob(5)
print(away_5, g)

In [None]:
home_chance = [home_0, home_1, home_2, home_3, home_4, home_5]
home_chance_frame = pd.DataFrame(home_chance, columns=['Probs'])
home_chance_frame = home_chance_frame
home_chance_frame

In [None]:
away_chance= [away_0, away_1, away_2, away_3, away_4, away_5]
away_chance_frame = pd.DataFrame(away_chance, columns=['Probs'])
away_chance_frame = away_chance_frame
away_chance_frame

In [None]:
df_cross = home_chance_frame.dot(away_chance_frame.T)
df_cross = df_cross.round(3)
df_cross

In [None]:
#Plot the heatmap
plt.figure(figsize=(8,6))
heat_map = sns.heatmap( df_cross, linewidth = 1 , annot = False, cmap=plt.cm.Oranges)
plt.title( "Scoreline probability" )
#reverse y-axis
plt.gca().invert_yaxis()
plt.xlabel('Away')
plt.ylabel('Home')
plt.show()

In [None]:
def printDiagonalSums(mat, n):
 
    principal = 0
    secondary = 0;
    for i in range(0, n):
        for j in range(0, n):
 
            # Condition for principal diagonal
            if (i == j):
                principal += mat[i][j]
 
            # Condition for secondary diagonal
            if ((i + j) == (n - 1)):
                secondary += mat[i][j]
         
    return principal

In [None]:
principal = printDiagonalSums(df_cross, 5)
print(principal)

df_cross_up = df_cross.where(np.triu(np.ones(df_cross.shape)).astype(np.bool))
print(df_cross_up)

In [None]:
draw = principal

home_team_win = df_cross.sum().sum() - df_cross_up.sum().sum()
away_team_win = df_cross_up.sum().sum() - principal


print("home win chance: " + str(np.round(home_team_win, 3)*100))
print("draw chance: " + str(np.round(draw, 3)*100))
print("away chance: " + str(np.round(away_team_win, 3)*100))

print("machineball total chance: " + str(np.round(home_team_win, 3)*100 + np.round(draw, 3)*100 + np.round(away_team_win, 3)*100))

In [None]:
bookie_home_team_win_odds = 1.82
bookie_draw = 3.74
bookie_away_team_win_odds = 4.7

Bookie_home_win = 1/ bookie_home_team_win_odds
Bookie_draw = 1/ bookie_draw
Bookie_away_win = 1/ bookie_away_team_win_odds

print("Bookie home win chance: " + str(np.round(Bookie_home_win * 100, 2)))
print("Bookie draw chance: " + str(np.round(Bookie_draw * 100, 2)))
print("Bookie away win chance: " +str(np.round(Bookie_away_win * 100)))
print("Bookie total chance: " + str(np.round(Bookie_home_win * 100, 2) + np.round(Bookie_draw * 100, 2) + np.round(Bookie_away_win * 100)))

In [None]:
## Save plot
#plt.savefig('C:/Users/Louis/OneDrive/Documents/Python/images/Heatmaps/fulham_away.png', dpi=1200, bbox_inches = "tight")