In [1]:
import pandas as pd
import numpy as np
import requests
import time
from io import StringIO
from bs4 import BeautifulSoup

In [211]:
championship_22_23_url = "https://fbref.com/en/comps/10/2022-2023/stats/2022-2023-Championship-Stats"
championship_23_24_url = "https://fbref.com/en/comps/10/2023-2024/2023-2024-Championship-Stats"
premier_23_24_url = "https://fbref.com/en/comps/9/2023-2024/stats/2023-2024-Premier-League-Stats"
premier_24_25_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

In [212]:
def fetch_data_new_season(url):
    response = requests.get(url)
    df = pd.read_html(StringIO(response.text), attrs={'id': "results2024-202591_overall"}, index_col=[1])[0]
    df = df.reset_index().rename(columns={"index": "ID"})
    df = df[['Squad']]
    
    return df

In [227]:
premier_24_25 = fetch_data_new_season(premier_24_25_url)

In [228]:
def fetch_data(url, table_id):
    response = requests.get(url)
    df = pd.read_html(StringIO(response.text), attrs={'id': table_id}, index_col=[0])[0]
    df = df.reset_index().rename(columns={"index": "ID"})
    df = df[['ID', 'Per 90 Minutes']]
    df.columns = df.columns.get_level_values(1)
    df = df[['','npxG']]
    df = df.reset_index().rename(columns={"index": "ID"})
    
    return df

In [229]:
championship_22_23_for = fetch_data(championship_22_23_url, "stats_squads_standard_for")
championship_22_23_against = fetch_data(championship_22_23_url, "stats_squads_standard_against")
championship_23_24_for = fetch_data(championship_23_24_url, "stats_squads_standard_for")
championship_23_24_against = fetch_data(championship_23_24_url, "stats_squads_standard_against")
premier_23_24_for = fetch_data(premier_23_24_url, "stats_squads_standard_for")
premier_23_24_against = fetch_data(premier_23_24_url, "stats_squads_standard_against")

In [230]:
championship_22_23 = pd.merge(championship_22_23_for, championship_22_23_against, on='ID', how='inner')
championship_22_23 = championship_22_23.rename({"_x": "Squad", "npxG_x": "npxG_for", "npxG_y": "npxG_against"}, axis=1)
championship_22_23 = championship_22_23[['Squad', 'npxG_for', 'npxG_against']]

championship_23_24 = pd.merge(championship_23_24_for, championship_23_24_against, on='ID', how='inner')
championship_23_24 = championship_23_24.rename({"_x": "Squad", "npxG_x": "npxG_for", "npxG_y": "npxG_against"}, axis=1)
championship_23_24 = championship_23_24[['Squad', 'npxG_for', 'npxG_against']]

In [231]:
premier_23_24 = pd.merge(premier_23_24_for, premier_23_24_against, on='ID', how='inner')
premier_23_24 = premier_23_24.rename({"_x": "Squad", "npxG_x": "npxG_for", "npxG_y": "npxG_against"}, axis=1)
premier_23_24 = premier_23_24[['Squad', 'npxG_for', 'npxG_against']]

In [232]:
promoted_teams = pd.merge(championship_22_23, premier_23_24, on='Squad', how='inner')
promoted_teams = promoted_teams.rename({"npxG_for_x": "npxG_for_champ", "npxG_against_x": "npxG_against_champ", "npxG_for_y": "npxG_for_premier", "npxG_against_y": "npxG_against_premier"}, axis=1)

In [233]:
champ_sum_for = promoted_teams['npxG_for_champ'].sum()
prem_sum_for = promoted_teams['npxG_for_premier'].sum()
multiplier_for = prem_sum_for / champ_sum_for
champ_sum_against = promoted_teams['npxG_against_champ'].sum()
prem_sum_against = promoted_teams['npxG_against_premier'].sum()
multiplier_against = prem_sum_against / champ_sum_against

In [234]:
#promoted_teams['npxG_for_adj'] = promoted_teams['npxG_for_champ'] * multiplier_for
#promoted_teams['npxG_against_adj'] = promoted_teams['npxG_against_champ'] * multiplier_against

In [244]:
premier_merged = pd.merge(premier_23_24, premier_24_25, on='Squad', how='right')

In [245]:
premier_merged = pd.merge(premier_merged, championship_23_24, on='Squad', how='left')

In [246]:
premier_merged['npxG_for_y'] = np.round(premier_merged['npxG_for_y']*multiplier_for,2)
premier_merged['npxG_against_y'] = np.round(premier_merged['npxG_against_y']*multiplier_against,2)

In [247]:
premier_merged['npxG_for_pred'] = premier_merged['npxG_for_x'].fillna(premier_merged['npxG_for_y'])
premier_merged['npxG_against_pred'] = premier_merged['npxG_against_x'].fillna(premier_merged['npxG_against_y'])
premier_merged = premier_merged[['Squad', 'npxG_for_pred', 'npxG_against_pred']]

In [248]:
premier_merged.insert(0, 'team_id', range(1,21))

In [249]:
premier_merged

Unnamed: 0,team_id,Squad,npxG_for_pred,npxG_against_pred
0,1,Arsenal,1.8,0.68
1,2,Aston Villa,1.59,1.53
2,3,Bournemouth,1.41,1.38
3,4,Brentford,1.47,1.43
4,5,Brighton,1.37,1.33
5,6,Chelsea,1.71,1.42
6,7,Crystal Palace,1.2,1.31
7,8,Everton,1.36,1.29
8,9,Fulham,1.3,1.5
9,10,Ipswich Town,1.11,2.09


In [251]:
r = requests.get("https://fantasy.premierleague.com/api/bootstrap-static/")
fpl_data = r.json()
teams = pd.DataFrame(fpl_data['teams'])
teams = teams[['id', 'name', 'short_name']]

In [252]:
teams

Unnamed: 0,id,name,short_name
0,1,Arsenal,ARS
1,2,Aston Villa,AVL
2,3,Bournemouth,BOU
3,4,Brentford,BRE
4,5,Brighton,BHA
5,6,Chelsea,CHE
6,7,Crystal Palace,CRY
7,8,Everton,EVE
8,9,Fulham,FUL
9,10,Ipswich,IPS


In [257]:
teams_df = pd.merge(teams, premier_merged, left_on='id', right_on='team_id', how='inner')
teams_df = teams_df[['team_id', 'Squad', 'short_name', 'npxG_for_pred', 'npxG_against_pred']]

In [272]:
stats_for = teams_df.copy()
stats_for['Average'] = stats_for['npxG_for_pred'].mean()
stats_for['attacking_multiplier'] = np.round(stats_for['npxG_for_pred'] / stats_for['Average'],2)
stats_for = stats_for[['team_id', 'Squad', 'short_name', 'npxG_for_pred', 'attacking_multiplier']]

In [275]:
stats_against = teams_df.copy()
stats_against['Average'] = stats_against['npxG_against_pred'].mean()
stats_against['defensive_multiplier'] = np.round(stats_against['npxG_against_pred'] / stats_against['Average'],2)
stats_against = stats_against[['team_id', 'Squad', 'short_name', 'npxG_against_pred', 'defensive_multiplier']]

In [277]:
team_for_csv_file_path = "C:/Users/erknud3/fpl-optimization/model/data/team_for_season_data.csv"
stats_for.to_csv(team_for_csv_file_path, index=False)

team_against_csv_file_path = "C:/Users/erknud3/fpl-optimization/model/data/team_against_season_data.csv"
stats_against.to_csv(team_against_csv_file_path, index=False)