<a href="https://colab.research.google.com/github/marclamberts/football-analysis/blob/main/The_Athletic_roles.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from scipy.stats import zscore
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Load the dataset
data = pd.read_excel('/content/drive/My Drive/Database Men 2023-2024/Eredivisie - 2023-2024.xlsx', sheet_name='Search results (500)')

# Define metrics for each role and weights based on available dataset
roles_metrics_weights = {
    "Finisher": {"Goals": 0.2, "xG": 0.2, "Shots per 90": 0.2, "Shots on target, %": 0.2, "Goal conversion, %": 0.2},
    "Target": {"Aerial duels won, %": 0.2, "Goals": 0.2, "Head goals": 0.2, "xG": 0.2, "Touches in box per 90": 0.2},
    "Roamer": {"Dribbles per 90": 0.2, "Successful dribbles, %": 0.2, "Touches in box per 90": 0.2, "Progressive runs per 90": 0.2, "Accelerations per 90": 0.2},
    "Wide Threat": {"Crosses per 90": 0.2, "Accurate crosses, %": 0.2, "Dribbles per 90": 0.2, "Progressive runs per 90": 0.2, "Touches in box per 90": 0.2},
    "Unlocker": {"Key passes per 90": 0.2, "Smart passes per 90": 0.2, "Accurate smart passes, %": 0.2, "Passes to penalty area per 90": 0.2, "Accurate passes to penalty area, %": 0.2},
    "Outlet": {"Touches in box per 90": 0.2, "Progressive runs per 90": 0.2, "Received passes per 90": 0.2, "Fouls suffered per 90": 0.2, "Dribbles per 90": 0.2},
    "Box Crasher": {"Goals": 0.2, "xG": 0.2, "Touches in box per 90": 0.2, "Shots per 90": 0.2, "Progressive runs per 90": 0.2},
    "Creator": {"Key passes per 90": 0.2, "Shot assists per 90": 0.2, "Smart passes per 90": 0.2, "Passes to final third per 90": 0.2, "Accurate passes to final third, %": 0.2},
    "Orchestrator": {"Passes per 90": 0.2, "Accurate passes, %": 0.2, "Forward passes per 90": 0.2, "Accurate forward passes, %": 0.2, "Progressive passes per 90": 0.2},
    "Box to Box": {"Defensive duels per 90": 0.2, "Defensive duels won, %": 0.2, "Progressive runs per 90": 0.2, "Goals": 0.2, "Assists": 0.2},
    "Distributor": {"Passes per 90": 0.2, "Accurate passes, %": 0.2, "Long passes per 90": 0.2, "Accurate long passes, %": 0.2, "Progressive passes per 90": 0.2},
    "Builder": {"Passes per 90": 0.2, "Accurate passes, %": 0.2, "Defensive duels won, %": 0.2, "Interceptions per 90": 0.2, "Progressive passes per 90": 0.2},
    "Overlapper": {"Crosses per 90": 0.2, "Accurate crosses, %": 0.2, "Dribbles per 90": 0.2, "Progressive runs per 90": 0.2, "Passes to penalty area per 90": 0.2},
    "Progressor": {"Progressive passes per 90": 0.2, "Accurate progressive passes, %": 0.2, "Dribbles per 90": 0.2, "Progressive runs per 90": 0.2, "Touches in box per 90": 0.2},
    "Safety": {"Defensive duels won, %": 0.2, "Interceptions per 90": 0.2, "Accurate passes, %": 0.2, "Shots blocked per 90": 0.2, "Aerial duels won, %": 0.2},
    "Aggressor": {"Defensive duels per 90": 0.2, "Sliding tackles per 90": 0.2, "Interceptions per 90": 0.2, "Fouls per 90": 0.2, "Shots blocked per 90": 0.2},
    "Spreader": {"Long passes per 90": 0.25, "Accurate long passes, %": 0.25, "Progressive passes per 90": 0.25, "Accurate progressive passes, %": 0.25},
    "Anchor": {"Defensive duels won, %": 0.2, "Interceptions per 90": 0.2, "Shots blocked per 90": 0.2, "Aerial duels won, %": 0.2, "Accurate passes, %": 0.2}
}

# Standardize (Z-score) relevant metrics
def standardize_metrics(data, metrics):
    standardized = {}
    for metric in metrics:
        if metric in data.columns:
            standardized[metric] = zscore(data[metric].fillna(0))
        else:
            print(f"Metric '{metric}' not found in dataset.")
    return pd.DataFrame(standardized)

# Calculate role scores
def calculate_role_scores(data, role_definitions):
    role_scores = {}
    for role, metrics_weights in role_definitions.items():
        metrics = metrics_weights.keys()
        weights = metrics_weights.values()

        # Standardize metrics for the role
        standardized = standardize_metrics(data, metrics)

        # Compute weighted Z-scores for the role
        if not standardized.empty:
            role_scores[role] = standardized.dot(list(weights))

    return pd.DataFrame(role_scores)

# Compute scores for all roles
role_scores = calculate_role_scores(data, roles_metrics_weights)

# Combine role scores with player names and teams
final_scores = pd.concat([data[["Player", "Team"]], role_scores], axis=1)

# Save results to an Excel file
output_path = '/content/drive/My Drive/player_role_scores.xlsx'
final_scores.to_excel(output_path, index=False)

print(f"Role scores calculated and saved to {output_path}.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Role scores calculated and saved to /content/drive/My Drive/player_role_scores.xlsx.


In [None]:
from scipy import stats
from math import pi

# Specify the player name
player_name = "J. Hato"

# Extract player data
player = final_scores.loc[final_scores['Player'] == player_name].reset_index(drop=True)

if player.empty:
    raise ValueError(f"Player {player_name} not found in dataset.")

# Drop 'Player' column to focus on metrics
params = list(final_scores.columns[1:])
percentiles = []

# Calculate percentiles for the player's metrics
for param in params:
    percentiles.append(math.floor(stats.percentileofscore(df[param], player[param].iloc[0])))

# Create a DataFrame for visualization
percentile_df = pd.DataFrame({
    'Metric': params,
    'Percentile': percentiles
}).sort_values(by='Percentile', ascending=False)

# Assign colors based on percentile ranges
def assign_color(percentile):
    if percentile <= 25:
        return 'red'
    elif 26 <= percentile <= 50:
        return 'orange'
    elif 51 <= percentile <= 75:
        return 'yellow'
    else:
        return 'green'

percentile_df['Color'] = percentile_df['Percentile'].apply(assign_color)

# Plot a bar chart with colors
plt.figure(figsize=(12, 8))
bars = plt.barh(percentile_df['Metric'], percentile_df['Percentile'], color=percentile_df['Color'])

# Add labels to the bars
for bar, percentile in zip(bars, percentile_df['Percentile']):
    plt.text(
        bar.get_width() + 2,  # Position label slightly to the right of the bar
        bar.get_y() + bar.get_height() / 2,  # Vertically center the label
        str(percentile),
        va='center', fontsize=10
    )

plt.xlabel('Percentile', fontsize=14)
plt.ylabel('Metrics', fontsize=14)
plt.title(f'Percentile Ranks for {player_name}', fontsize=16)
plt.gca().invert_yaxis()  # Reverse the order of metrics for better visualization
plt.tight_layout()
plt.show()

NameError: name 'math' is not defined