In [1]:
import pandas as pd

# Load the Excel file
file_path = "/Users/marclambertes/Downloads/Wyscout Database/USL 2025.xlsx"  # Adjust if using another path
df = pd.read_excel(file_path)

# Clean column names
df.columns = df.columns.str.strip()

# Define available attacking and defensive metrics
attack_metrics = [
    "Goals", "xG", "Assists", "Shots per 90", "Key passes per 90", "xA per 90",
    "Touches in box per 90", "Dribbles per 90", "Offensive duels per 90",
    "Shot assists per 90", "Smart passes per 90", "Through passes per 90", "Crosses per 90"
]

defense_metrics = [
    "Interceptions per 90", "Sliding tackles per 90", "Shots blocked per 90",
    "Defensive duels per 90", "Defensive duels won, %", "Aerial duels per 90",
    "Aerial duels won, %", "Successful defensive actions per 90",
    "Fouls per 90", "Conceded goals per 90", "Prevented goals per 90"
]

# Filter only existing columns
attack_metrics = [col for col in attack_metrics if col in df.columns]
defense_metrics = [col for col in defense_metrics if col in df.columns]

# Convert to numeric and fill NaNs
df[attack_metrics + defense_metrics] = df[attack_metrics + defense_metrics].apply(pd.to_numeric, errors='coerce').fillna(0)

# Normalize metrics and compute ratings
def normalize(df, cols, name):
    norm = (df[cols] - df[cols].min()) / (df[cols].max() - df[cols].min() + 1e-9)
    df[name] = norm.mean(axis=1) * 100
    return df

df = normalize(df, attack_metrics, "Attacking Rating")
df = normalize(df, defense_metrics, "Defensive Rating")

# Weight configuration
attack_weight = 0.6
defense_weight = 0.4

df["Combined Rating"] = df["Attacking Rating"] * attack_weight + df["Defensive Rating"] * defense_weight

# Percentile comparisons (team and global)
def add_percentile_comparisons(df, rating_col, team_col="Team"):
    df[f"{rating_col} (Team %)"] = df.groupby(team_col)[rating_col].transform(
        lambda x: (x - x.min()) / (x.max() - x.min() + 1e-9) * 100
    )
    df[f"{rating_col} (Global %)"] = (df[rating_col] - df[rating_col].min()) / (
        df[rating_col].max() - df[rating_col].min() + 1e-9) * 100
    return df

for col in ["Attacking Rating", "Defensive Rating", "Combined Rating"]:
    df = add_percentile_comparisons(df, col)

# Select final columns for export or display
output_cols = [
    "Player", "Team", "Position",
    "Attacking Rating", "Attacking Rating (Team %)", "Attacking Rating (Global %)",
    "Defensive Rating", "Defensive Rating (Team %)", "Defensive Rating (Global %)",
    "Combined Rating", "Combined Rating (Team %)", "Combined Rating (Global %)"
]

final_df = df[output_cols]

# Save to Excel (optional)
final_df.to_excel("Football_Performance_Ratings.xlsx", index=False)

# Or print sample output
print(final_df.head(10))


           Player               Team       Position  Attacking Rating  \
0     A. Crognale        San Antonio            RCB          5.140256   
1      R. Sánchez        San Antonio             GK          0.986768   
2     J. Medranda        San Antonio             LB         14.900471   
3      M. Taintor        San Antonio            LCB          8.788965   
4    J. Hernández        San Antonio  LCMF, AMF, CF         17.359322   
5     A. Tambakis  New Mexico United             GK          1.928684   
6      C. Gloster  New Mexico United             LB          6.481749   
7       T. Maples  New Mexico United       LCB, RCB         13.314538   
8       N. Gordon    Monterey Bay FC            RCB          4.534348   
9  Nico Campuzano    Monterey Bay FC             GK          0.160628   

   Attacking Rating (Team %)  Attacking Rating (Global %)  Defensive Rating  \
0                  29.473646                    11.600370         31.217477   
1                   5.658018          