In [1]:
import pandas as pd
from scipy.stats import zscore

# Load the dataset
data = pd.read_excel('/Users/marclambertes/Downloads/Wyscout/EP.xlsx')

# Define metrics for each role and weights
roles_metrics_weights = {
    "Finisher": {"Goals": 0.2, "Shots on target %": 0.15, "xG": 0.15, "Conversion rate %": 0.15, "Touches in box per 90": 0.2, "Shot accuracy %": 0.15},
    "Target": {"Aerial duels won %": 0.2, "Goals": 0.15, "Touches in box per 90": 0.15, "xG": 0.15, "Hold up play per 90": 0.2, "Headers on target %": 0.15},
    "Roamer": {"Key passes per 90": 0.2, "Dribbles per 90": 0.15, "Assists": 0.15, "Progressive passes per 90": 0.15, "Touches in final third per 90": 0.2, "Carries into box per 90": 0.15},
    "Wide Threat": {"Crosses per 90": 0.2, "Touches in box per 90": 0.15, "Dribbles per 90": 0.15, "Shot creating actions per 90": 0.15, "Key passes per 90": 0.2, "Passes into box per 90": 0.15},
    "Unlocker": {"Key passes per 90": 0.2, "Cross accuracy %": 0.15, "Assists": 0.15, "Progressive passes per 90": 0.15, "Shot creating actions per 90": 0.2, "Passes into box per 90": 0.15},
    "Outlet": {"Progressive passes received per 90": 0.2, "Fouls won per 90": 0.15, "Dribbles per 90": 0.15, "Touches in final third per 90": 0.15, "Pass completion %": 0.2, "Carries into box per 90": 0.15},
    "Box Crasher": {"Goals": 0.2, "Touches in box per 90": 0.15, "Dribbles per 90": 0.15, "xG": 0.15, "Shot accuracy %": 0.2, "Key passes per 90": 0.15},
    "Creator": {"Key passes per 90": 0.2, "Assists": 0.2, "Shot creating actions per 90": 0.2, "Progressive passes per 90": 0.15, "Passes into box per 90": 0.15, "Crosses per 90": 0.1},
    "Orchestrator": {"Pass accuracy %": 0.2, "Progressive passes per 90": 0.2, "Touches per 90": 0.15, "Switches of play per 90": 0.15, "Key passes per 90": 0.15, "Shot creating actions per 90": 0.15},
    "Box to Box": {"Defensive duels won %": 0.2, "Progressive runs per 90": 0.2, "Goals": 0.15, "Key passes per 90": 0.15, "Touches per 90": 0.15, "Dribbles per 90": 0.15},
    "Distributor": {"Progressive passes per 90": 0.2, "Long passes per 90": 0.2, "Switches of play per 90": 0.2, "Pass accuracy %": 0.15, "Touches per 90": 0.15, "Passes into box per 90": 0.1},
    "Builder": {"Pass accuracy %": 0.2, "Defensive duels won %": 0.2, "Touches per 90": 0.15, "Progressive passes per 90": 0.15, "Long passes per 90": 0.15, "Interceptions per 90": 0.15},
    "Overlapper": {"Crosses per 90": 0.2, "Dribbles per 90": 0.2, "Assists": 0.15, "Touches in final third per 90": 0.15, "Progressive runs per 90": 0.15, "Passes into box per 90": 0.15},
    "Progressor": {"Progressive passes per 90": 0.2, "Dribbles per 90": 0.2, "Touches per 90": 0.15, "Switches of play per 90": 0.15, "Carries into box per 90": 0.15, "Passes into box per 90": 0.15},
    "Safety": {"Defensive duels won %": 0.2, "Clearances per 90": 0.2, "Interceptions per 90": 0.15, "Pass accuracy %": 0.15, "Blocks per 90": 0.15, "Aerial duels won %": 0.15},
    "Aggressor": {"Tackles per 90": 0.2, "Interceptions per 90": 0.2, "Defensive duels won %": 0.15, "Clearances per 90": 0.15, "Fouls per 90": 0.15, "Blocks per 90": 0.15},
    "Spreader": {"Long passes per 90": 0.2, "Pass accuracy %": 0.2, "Switches of play per 90": 0.2, "Touches per 90": 0.15, "Progressive passes per 90": 0.15, "Passes into box per 90": 0.1},
    "Anchor": {"Clearances per 90": 0.2, "Blocks per 90": 0.2, "Interceptions per 90": 0.15, "Aerial duels won %": 0.15, "Defensive duels won %": 0.15, "Pass accuracy %": 0.15}
}

# Standardize (Z-score) relevant metrics
def standardize_metrics(data, metrics):
    standardized = {}
    for metric in metrics:
        if metric in data.columns:
            standardized[metric] = zscore(data[metric].fillna(0))
        else:
            print(f"Metric '{metric}' not found in dataset.")
    return pd.DataFrame(standardized)

# Calculate role scores
def calculate_role_scores(data, role_definitions):
    role_scores = {}
    for role, metrics_weights in role_definitions.items():
        metrics = metrics_weights.keys()
        weights = metrics_weights.values()

        # Standardize metrics for the role
        standardized = standardize_metrics(data, metrics)

        # Compute weighted Z-scores for the role
        role_scores[role] = standardized.dot(list(weights))

    return pd.DataFrame(role_scores)

# Compute scores for all roles
role_scores = calculate_role_scores(data, roles_metrics_weights)

# Combine role scores with player names and teams
final_scores = pd.concat([data[["Player", "Team"]], role_scores], axis=1)

# Save results to an Excel file
output_path = '/mnt/data/player_role_scores.xlsx'
final_scores.to_excel(output_path, index=False)

print(f"Role scores calculated and saved to {output_path}.")

Metric 'Shots on target %' not found in dataset.
Metric 'Conversion rate %' not found in dataset.
Metric 'Shot accuracy %' not found in dataset.


ValueError: Dot product shape mismatch, (500, 3) vs (6,)