In [6]:
import pandas as pd

from utils.general import get_data_path
from utils.get_ids import external_team_name_to_fpl_name, get_team_id

SEASON = "2024-25"

gws_dfs: dict[int, pd.DataFrame] = {}
for i in range(1, 35+1):
    df = pd.read_csv(get_data_path(SEASON, f"team_gws/gw{i}.csv"))
    df.drop(columns=["xP", "P", "P vs xP", "C 1/3", "C PA", "TPA", "T", "xCS", "CS"], inplace=True)
    df["team_id"] = df['Team'].apply(
        external_team_name_to_fpl_name
                                    ).apply(get_team_id, args=("name", SEASON)).astype(int)
    df['vs_team_id'] = df['vs Team'].apply(
        external_team_name_to_fpl_name
                                    ).apply(get_team_id, args=("name", SEASON)).astype(int)
    
    df.drop(columns=["Team", "vs Team"], inplace=True)
    df['gw'] = i

    gws_dfs[i] = df

dfs_list = [df for df in gws_dfs.values()]
merged_df = pd.concat(dfs_list)
last_10 = pd.concat(dfs_list[-10:])

In [7]:
grouped = merged_df.groupby(["team_id", "Home?"]).agg(
    avg_shots=("Shots", "mean"),
    avg_shots_on_target=("SoT", "mean"),
    avg_shots_in_box=("SiB", "mean"),
    avg_xG = ("xG", "mean"),
    avg_goals = ("G", "mean"),
    avg_cc = ("CC", "mean"),
    avg_xA = ("xA", "mean"),
    avg_assists = ("A", "mean"),
    avg_xGC = ("xGC", "mean"),
    avg_GC = ("GC", "mean"),
    game_count = ("gw", "count"),
).reset_index()

vs_grouped = last_10.groupby(["vs_team_id", "Home?"]).agg(
    avg_shots=("Shots", "mean"),
    avg_shots_on_target=("SoT", "mean"),
    avg_shots_in_box=("SiB", "mean"),
    avg_xG = ("xG", "mean"),
    avg_goals = ("G", "mean"),
    avg_cc = ("CC", "mean"),
    avg_xA = ("xA", "mean"),
    avg_assists = ("A", "mean"),
    avg_xGC = ("xGC", "mean"),
    avg_GC = ("GC", "mean"),
    game_count = ("gw", "count"),
).reset_index()

grouped["avg_shots_in_box_against"] = grouped.apply(
    lambda row: vs_grouped[
        (vs_grouped['vs_team_id'] == row['team_id']) & 
        (vs_grouped['Home?'] == row['Home?'])
    ]['avg_shots_in_box'].values[0],
    axis=1
)

grouped['avg_shots_against'] = grouped.apply(
    lambda row: vs_grouped[
        (vs_grouped['vs_team_id'] == row['team_id']) & 
        (vs_grouped['Home?'] == row['Home?'])
    ]['avg_shots'].values[0],
    axis=1
) 
grouped['avg_shots_on_target_against'] = grouped.apply(
    lambda row: vs_grouped[
        (vs_grouped['vs_team_id'] == row['team_id']) & 
        (vs_grouped['Home?'] == row['Home?'])
    ]['avg_shots_on_target'].values[0],
    axis=1
)

grouped['avg_cc_against'] = grouped.apply(
    lambda row: vs_grouped[
        (vs_grouped['vs_team_id'] == row['team_id']) & 
        (vs_grouped['Home?'] == row['Home?'])
    ]['avg_cc'].values[0],
    axis=1
)

grouped[grouped['team_id'] == 6]

Unnamed: 0,team_id,Home?,avg_shots,avg_shots_on_target,avg_shots_in_box,avg_xG,avg_goals,avg_cc,avg_xA,avg_assists,avg_xGC,avg_GC,game_count,avg_shots_in_box_against,avg_shots_against,avg_shots_on_target_against,avg_cc_against
10,6,False,14.176471,4.647059,9.882353,1.688235,1.647059,10.705882,1.205882,1.235294,1.535294,1.352941,17,5.0,8.0,2.666667,5.666667
11,6,True,18.166667,6.722222,11.833333,1.988889,1.888889,14.055556,1.55,1.333333,0.983333,1.0,18,7.25,9.75,3.25,7.0


In [8]:
######## Probably could get them from the data but i decided to do it this way
STAT_WEIGHTS = {
    # attack
    'avg_shots': 0.8, 
    'avg_shots_on_target': 0.9,
    'avg_shots_in_box': 1,
    'avg_xG': 1,
    'avg_goals': 1,
    'avg_cc': 0.5,
    'avg_xA': 0.7,
    'avg_assists': 0.7,
    # defense
    'avg_GC': 1,
    'avg_xGC': 1,
    'avg_shots_against': 0.8,
    'avg_shots_on_target_against': 1,
    'avg_shots_in_box_against': 1,
    'avg_cc_against': 0.5
}

# team_id -> {home_attack_rating, away_attack_rating, home_defense_rating, away_defense_rating}
team_ratings = {}

for team_id in grouped['team_id'].unique():
    # attack stats
    home_attack_stats = grouped[(grouped['team_id'] == team_id) & (grouped['Home?'])]
    home_attack_stats = home_attack_stats[['avg_shots', 'avg_shots_on_target', 'avg_shots_in_box', 'avg_xG', 'avg_goals',
                              'avg_cc', 'avg_xA', 'avg_assists']]
    
    home_attack_stats_sum = home_attack_stats.apply(lambda row: sum(row * STAT_WEIGHTS[col] for col in home_attack_stats.columns), axis=1)
    home_attack_stats_sum = home_attack_stats_sum.sum(axis=1)

    away_attack_stats = grouped[(grouped['team_id'] == team_id) & (~grouped['Home?'])]
    away_attack_stats = away_attack_stats[['avg_shots', 'avg_shots_on_target', 'avg_shots_in_box', 'avg_xG', 'avg_goals',
                              'avg_cc', 'avg_xA', 'avg_assists']]
    away_attack_stats_sum = away_attack_stats.apply(lambda row: sum(row * STAT_WEIGHTS[col] for col in away_attack_stats.columns), axis=1)
    away_attack_stats_sum = away_attack_stats_sum.sum(axis=1)

    # defence stats
    home_defense_stats = grouped[(grouped['team_id'] == team_id) & (grouped['Home?'])]
    home_defense_stats = home_defense_stats[['avg_GC', 'avg_xGC', 'avg_shots_against',
                                             'avg_shots_on_target_against', 'avg_cc_against']]

    home_defense_stats_sum = home_defense_stats.apply(lambda row: sum(row * STAT_WEIGHTS[col] for col in home_defense_stats.columns), axis=1)
    home_defense_stats_sum = home_defense_stats_sum.sum(axis=1)

    away_defense_stats = grouped[(grouped['team_id'] == team_id) & (~grouped['Home?'])]
    away_defense_stats = away_defense_stats[['avg_GC', 'avg_xGC', 'avg_shots_against', 'avg_shots_in_box_against',
                                             'avg_shots_on_target_against', 'avg_cc_against']]

    away_defense_stats_sum = away_defense_stats.apply(lambda row: sum(row * STAT_WEIGHTS[col] for col in away_defense_stats.columns), axis=1)
    away_defense_stats_sum = away_defense_stats_sum.sum(axis=1)

    team_ratings[int(team_id)] = {
        "home_attack_rating": float(home_attack_stats_sum.values[0]),
        "away_attack_rating": float(away_attack_stats_sum.values[0]),
        "home_defense_rating": float(home_defense_stats_sum.values[0]),
        "away_defense_rating": float(away_defense_stats_sum.values[0])
    }

team_ratings_df = pd.DataFrame.from_dict(team_ratings, orient='index')

# normalize the ratings
team_ratings_df[['home_attack_rating', 'away_attack_rating']] = (team_ratings_df[['home_attack_rating', 'away_attack_rating']]
.apply(lambda x: (x - x.min()) / (x.max() - x.min())))

team_ratings_df[['home_defense_rating', 'away_defense_rating']] = (team_ratings_df[['home_defense_rating', 'away_defense_rating']]
.apply(lambda x: (x.max() - x) / (x.max() - x.min())))

team_ratings_df.sort_values(by='home_attack_rating', ascending=False)

Unnamed: 0,home_attack_rating,away_attack_rating,home_defense_rating,away_defense_rating
6,1.0,0.605838,0.775932,0.868449
12,0.95705,1.0,0.594061,1.0
13,0.876567,0.643018,0.697344,0.906427
1,0.841815,0.436622,1.0,0.845056
15,0.737282,0.402535,0.61825,0.676694
18,0.736277,0.462487,0.406804,0.537355
3,0.729843,0.678486,0.785834,0.355868
2,0.70376,0.251083,0.560088,0.354818
4,0.562096,0.238387,0.377873,0.35368
5,0.535511,0.458205,0.681261,0.461838


In [9]:
# add team id column instead of index
team_ratings_df.reset_index(inplace=True)
team_ratings_df.rename(columns={'index': 'team_id'}, inplace=True)
team_ratings_df.to_csv(get_data_path(SEASON, "team_ratings.csv"), index=False)

In [None]:
import pandas as pd

ddf = pd.read