In [1]:
import pandas as pd
from pathlib import Path

In [2]:
# Both df should use the same name for a column, here we fix it.
def normalize_squad_column(df):
    cols = df.columns.tolist()
    if "Squad" in cols:
        pass
    elif "Team" in cols:
        df = df.rename(columns={"Team": "Squad"})
    else:
        raise ValueError(f"No 'Squad' or 'Team' column found in {cols}")
    df["Squad"] = df["Squad"].astype(str).str.strip()
    return df

In [3]:
# Main function for merging the stats and leaderboard tables.
def merge_squad_and_leaderboard(season_dir: str | Path) -> pd.DataFrame:
    season_dir = Path(season_dir)

    # Squad_player_stats.csv handling
    squad_path = season_dir / "squad_player_stats.csv"
    if not squad_path.exists():
        raise FileNotFoundError(f"No squad_player_stats.csv in {season_dir}")
    squad_df = pd.read_csv(squad_path)
    squad_df = normalize_squad_column(squad_df)
    squad_df = squad_df.drop_duplicates(subset=["Squad"])


    # Team_Leaderboard.csv handling
    lb_files = list(season_dir.glob("*Team_Leaderboard*.csv"))
    if not lb_files:
        raise FileNotFoundError(f"No Team_Leaderboard CSV found in {season_dir}")
    lb_path = lb_files[0]
    lb_df = pd.read_csv(lb_path)
    lb_df = normalize_squad_column(lb_df)
    lb_df = lb_df.drop_duplicates(subset=["Squad"])

    # Merging the two df-s
    merged = lb_df.merge(squad_df, on="Squad", how="left")
    merged = merged.drop_duplicates(subset=["Squad"])

    base_code = lb_path.stem.split("_Team_Leaderboard")[0]
    out_name = f"{base_code}_squad_team_merged.csv"
    out_path = season_dir / out_name
    merged.to_csv(out_path, index=False)
    print(f"[{season_dir.name}] merged file saved to: {out_path.name}")

    return merged

In [4]:
# Running the fucntion: use the output part to specify the year where the 
# stats and leaderboard are located - the merged file will be made into the same folder.
merge_squad_and_leaderboard("../CSV_files/Season_2025-2026")

[Season_2025-2026] merged file saved to: 2025_2026_squad_team_merged.csv


Unnamed: 0,Rk,Squad,MP,W,D,L,GF,GA,GD,Pts,...,Possession_Against,Progressive_Passes_Received_Per90_Against,Shots_Per_90,Shots_On_Target_Per_90,Sh/90_Against,SoT/90_Against,Shot_Creating_Actions_Per90,Goal_Creating_Actions_Per90,SCA90_Against,GCA90_Against
0,1,Arsenal,13,9,3,1,25,7,18,30,...,41.5,22.85,14.23,4.69,7.31,2.08,24.54,3.54,13.23,1.0
1,2,Manchester City,14,9,1,4,32,16,16,28,...,43.4,28.0,14.07,5.07,9.21,3.14,25.36,3.71,15.5,1.71
2,3,Chelsea,13,7,3,3,24,12,12,24,...,42.6,30.85,14.08,5.31,9.23,2.69,24.46,3.46,15.23,1.54
3,4,Aston Villa,13,7,3,3,16,11,5,24,...,46.5,31.46,11.46,3.69,12.15,3.0,20.38,2.23,21.69,1.38
4,5,Brighton,13,6,4,3,21,16,5,22,...,50.1,34.77,12.38,4.38,11.0,3.23,21.38,2.54,20.54,1.92
5,6,Sunderland,13,6,4,3,17,13,4,22,...,56.8,44.15,9.62,2.77,14.38,4.46,17.08,2.23,25.23,1.69
6,7,Manchester Utd,13,6,3,4,21,20,1,21,...,47.7,32.08,14.92,5.0,11.0,3.69,26.85,2.62,19.08,2.62
7,8,Liverpool,13,7,0,6,20,20,0,21,...,38.8,25.92,14.77,4.08,11.08,3.69,26.54,2.69,19.54,2.77
8,9,Everton,14,6,3,5,15,17,-2,21,...,55.6,41.86,10.5,2.86,13.21,3.93,18.93,1.86,22.79,2.14
9,10,Crystal Palace,13,5,5,3,17,11,6,20,...,57.1,38.92,12.0,3.92,10.85,3.08,20.54,2.15,19.08,1.62
