In [6]:
import pandas as pd

# Load player appearance data
appearances = pd.read_csv("../data/input/appearances.csv")
players = pd.read_csv("../data/input/players.csv")
clubs = pd.read_csv("../data/input/clubs.csv")
games = pd.read_csv("../data/input/games.csv")
competitions = pd.read_csv("../data/input/competitions.csv")


In [8]:
# Merge club name
clubs = clubs[["club_id", "name"]].rename(columns={"club_id": "player_club_id", "name": "club_name"})
appearances = appearances.merge(clubs, on="player_club_id", how="left")

# Merge competition name
competitions = competitions[["competition_id", "name"]].rename(columns={"name": "competition_name"})
appearances = appearances.merge(competitions, on="competition_id", how="left")

# Merge season (from games table)
games = games[["game_id", "season"]]
appearances = appearances.merge(games, on="game_id", how="left")


In [9]:
df_prem = appearances[appearances["competition_name"] == "premier-league"]


In [10]:
player_stats = (
    df_prem.groupby(["player_id", "player_name", "season", "club_name"])
    .agg(
        games_played=("appearance_id", "count"),
        total_minutes=("minutes_played", "sum"),
        goals=("goals", "sum"),
        assists=("assists", "sum")
    )
    .reset_index()
)


In [None]:
#Remove players with no minutes to avoid division by zero
player_stats = player_stats[player_stats["total_minutes"] > 0]

player_stats["goal_contributions"] = player_stats["goals"] + player_stats["assists"]
player_stats["goals_per_90"] = player_stats["goals"] / (player_stats["total_minutes"] / 90)
player_stats["assists_per_90"] = player_stats["assists"] / (player_stats["total_minutes"] / 90)
player_stats["contrib_per_90"] = player_stats["goal_contributions"] / (player_stats["total_minutes"] / 90)


In [None]:
# Filter out short-term cameos (e.g., < 900 minutes)
#top_players = player_stats[player_stats["total_minutes"] >= 900]
player_stats = player_stats.sort_values(by="contrib_per_90", ascending=False)


In [13]:
top_players

Unnamed: 0,player_id,player_name,season,club_name,games_played,total_minutes,goals,assists,goal_contributions,goals_per_90,assists_per_90,contrib_per_90
810,26399,Sergio Agüero,2013,Manchester City Football Club,23,1538,17,10,27,0.994798,0.585176,1.579974
1567,44352,Luis Suárez,2013,Liverpool Football Club,33,2965,31,17,48,0.940978,0.516020,1.456998
3907,148455,Mohamed Salah,2024,Liverpool Football Club,31,2755,27,17,44,0.882033,0.555354,1.437387
6050,418560,Erling Haaland,2022,Manchester City Football Club,35,2777,36,8,44,1.166727,0.259273,1.425999
1869,50935,Chicharito,2012,Manchester United Football Club,22,947,10,5,15,0.950370,0.475185,1.425554
...,...,...,...,...,...,...,...,...,...,...,...,...
5161,278166,Amari'i Bell,2023,Luton Town,21,1723,0,0,0,0.000000,0.000000,0.000000
5874,391005,Mads Roerslev,2022,Brentford Football Club,20,1102,0,0,0,0.000000,0.000000,0.000000
5158,277114,Jason Denayer,2016,Sunderland AFC,24,1879,0,0,0,0.000000,0.000000,0.000000
1762,48002,Angelo Ogbonna,2022,West Ham United Football Club,16,1236,0,0,0,0.000000,0.000000,0.000000
