In [1]:
import pandas as pd
from pathlib import Path

def normalize_squad_column(df: pd.DataFrame) -> pd.DataFrame:
    cols = df.columns.tolist()
    if "Squad" in cols:
        pass
    elif "Team" in cols:
        df = df.rename(columns={"Team": "Squad"})
    else:
        raise ValueError(f"No 'Squad' or 'Team' column found in {cols}")
    df["Squad"] = df["Squad"].astype(str).str.strip()
    return df


def merge_squad_and_leaderboard(season_dir: str | Path) -> pd.DataFrame:
    season_dir = Path(season_dir)

    squad_path = season_dir / "squad_player_stats.csv"
    if not squad_path.exists():
        raise FileNotFoundError(f"No squad_player_stats.csv in {season_dir}")
    squad_df = pd.read_csv(squad_path)
    squad_df = normalize_squad_column(squad_df)
    squad_df = squad_df.drop_duplicates(subset=["Squad"])


    lb_files = list(season_dir.glob("*Team_Leaderboard*.csv"))
    if not lb_files:
        raise FileNotFoundError(f"No Team_Leaderboard CSV found in {season_dir}")
    lb_path = lb_files[0]

    lb_df = pd.read_csv(lb_path)
    lb_df = normalize_squad_column(lb_df)
    lb_df = lb_df.drop_duplicates(subset=["Squad"])

    merged = lb_df.merge(squad_df, on="Squad", how="left")

    merged = merged.drop_duplicates(subset=["Squad"])


    base_code = lb_path.stem.split("_Team_Leaderboard")[0]
    out_name = f"{base_code}_squad_team_merged.csv"
    out_path = season_dir / out_name

    merged.to_csv(out_path, index=False)
    print(f"[{season_dir.name}] merged file saved to: {out_path.name}")

    return merged

In [13]:
merge_squad_and_leaderboard("../CSV_files/Season_2020-2021")

[Season_2020-2021] merged file saved to: 2020_2021_squad_team_merged.csv


Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,...,Possession_Against,Progressive_Passes_Received_Per90_Against,Shots_Per_90,Shots_On_Target_Per_90,Sh/90_Against,SoT/90_Against,Shot_Creating_Actions_Per90,Goal_Creating_Actions_Per90,SCA90_Against,GCA90_Against
0,1,Manchester City,38,27,5,6,83,32,51,86,...,36.1,22.11,15.53,5.55,7.18,2.37,28.08,3.71,12.87,1.42
1,2,Manchester Utd,38,21,11,6,73,44,29,74,...,44.2,32.11,13.55,5.16,11.16,3.61,25.08,3.34,19.63,1.76
2,3,Liverpool,38,20,9,9,68,42,26,69,...,37.6,25.05,15.84,5.37,8.53,3.55,28.11,2.89,15.29,1.95
3,4,Chelsea,38,19,10,9,58,36,22,67,...,38.6,27.76,14.37,5.08,8.74,2.71,26.79,2.5,15.45,1.47
4,5,Leicester City,38,20,6,12,68,50,18,66,...,45.4,32.55,12.45,4.47,9.63,3.47,22.71,3.0,17.32,2.16
5,6,West Ham,38,19,8,11,62,47,15,65,...,57.1,44.18,12.21,4.16,12.08,3.76,21.66,2.79,21.74,2.03
6,7,Tottenham,38,18,8,12,68,45,23,62,...,48.3,39.45,11.53,4.39,12.53,3.82,20.68,3.16,23.39,2.0
7,8,Arsenal,38,18,7,13,55,39,16,61,...,46.2,30.66,11.92,3.68,10.84,3.32,21.39,2.5,18.71,1.55
8,9,Leeds United,38,18,5,15,62,54,8,59,...,42.4,31.26,13.63,5.08,14.45,4.97,24.32,2.84,25.03,2.26
9,10,Everton,38,17,8,13,47,48,-1,59,...,53.5,43.45,10.39,3.76,13.18,4.16,18.68,2.11,23.45,2.24
