## Player Metrics and Sub-Role Analysis

In [None]:

import pandas as pd

# Load the data
file_path = '/mnt/data/Eerste Divisie - 2023-2024.xlsx'
data = pd.read_excel(file_path, sheet_name='Search results (500)')

# Sub-role categorization
def categorize_sub_role(position):
    if 'GK' in position:
        return 'Goalkeeper'
    elif any(x in position for x in ['LCB', 'RCB', 'CB']):
        return 'Center Back'
    elif any(x in position for x in ['LB', 'RB']):
        return 'Full Back'
    elif 'DMF' in position:
        return 'Defensive Midfielder'
    elif any(x in position for x in ['CMF', 'LCMF', 'RCMF']):
        return 'Central Midfielder'
    elif 'AMF' in position:
        return 'Attacking Midfielder'
    elif any(x in position for x in ['LW', 'RW']):
        return 'Winger'
    elif any(x in position for x in ['CF', 'ST']):
        return 'Striker'
    else:
        return 'Other'

data['Sub_Role'] = data['Position'].apply(categorize_sub_role)

# Expanded metrics
# Normalize key statistics per 90 minutes
data['Goals_per_90'] = data['Goals'] / (data['Minutes played'] / 90)
data['Assists_per_90'] = data['Assists'] / (data['Minutes played'] / 90)
data['Key_Passes_per_90'] = data['Key passes per 90']
data['Dribbles_per_90'] = data['Dribbles per 90']
data['Tackles_per_90'] = data['Tackles per 90']
data['Interceptions_per_90'] = data['Interceptions per 90']
data['Shots_on_Target_per_90'] = data['Shots on target per 90']
data['Pass_Completion_Percentage'] = data['Pass completion, %'] / 100
data['Aerial_Duels_Won_Percentage'] = data['Aerial duels won, %'] / 100
data['Progressive_Passes_per_90'] = data['Progressive passes per 90']
data['Progressive_Carries_per_90'] = data['Progressive carries per 90']
data['Crosses_per_90'] = data['Crosses per 90']
data['Touches_in_Attacking_Box_per_90'] = data['Touches in attacking box per 90']
data['Recoveries_per_90'] = data['Recoveries per 90']
data['Passes_to_Final_Third_per_90'] = data['Passes to final third per 90']
data['Key_Defensive_Actions_per_90'] = data['Tackles per 90'] + data['Interceptions per 90'] + data['Clearances per 90']

# Derived metrics
data['Goal_Contribution_per_90'] = data['Goals_per_90'] + data['Assists_per_90']
data['Defensive_Actions_per_90'] = data['Tackles_per_90'] + data['Interceptions_per_90']
data['xG_Difference'] = data['Goals'] - data['xG']
data['Offensive_Actions_per_90'] = data['Dribbles_per_90'] + data['Key_Passes_per_90'] + data['Crosses_per_90']
data['Total_Progressive_Actions_per_90'] = data['Progressive_Passes_per_90'] + data['Progressive_Carries_per_90'] + data['Passes_to_Final_Third_per_90']
data['Attacking_Efficiency'] = data['Touches_in_Attacking_Box_per_90'] + data['Shots_on_Target_per_90'] + data['Goals_per_90']
data['Defensive_Efficiency'] = data['Key_Defensive_Actions_per_90'] + data['Recoveries_per_90']

# Define weights for metrics by sub-role
sub_role_weights = {
    'Goalkeeper': {
        'saves': 3,
        'Prevented goals per 90': 3,
        'Exits per 90': 2,
        'Back passes received as GK per 90': 2,
        'Recoveries_per_90': 2,
    },
    'Center Back': {
        'Aerial duels per 90.1': 3,
        'Tackles_per_90': 3,
        'Clearances per 90': 3,
        'Interceptions_per_90': 2,
        'Pass_Completion_Percentage': 2,
        'Defensive_Efficiency': 3,
    },
    'Full Back': {
        'Tackles_per_90': 3,
        'Interceptions_per_90': 2,
        'Dribbles_per_90': 2,
        'Key_Passes_per_90': 2,
        'Pass_Completion_Percentage': 3,
        'Total_Progressive_Actions_per_90': 2,
        'Crosses_per_90': 2,
    },
    'Defensive Midfielder': {
        'Tackles_per_90': 3,
        'Interceptions_per_90': 3,
        'Pass_Completion_Percentage': 2,
        'Key_Passes_per_90': 2,
        'Recoveries_per_90': 2,
    },
    'Central Midfielder': {
        'Assists_per_90': 2,
        'Key_Passes_per_90': 3,
        'Dribbles_per_90': 2,
        'Tackles_per_90': 2,
        'Pass_Completion_Percentage': 3,
        'Total_Progressive_Actions_per_90': 2,
        'Passes_to_Final_Third_per_90': 2,
    },
    'Attacking Midfielder': {
        'Goals_per_90': 3,
        'Key_Passes_per_90': 3,
        'Dribbles_per_90': 3,
        'Assists_per_90': 2,
        'xG_Difference': 2,
        'Offensive_Actions_per_90': 3,
        'Attacking_Efficiency': 3,
    },
    'Winger': {
        'Goals_per_90': 2,
        'Key_Passes_per_90': 3,
        'Dribbles_per_90': 3,
        'Assists_per_90': 3,
        'xG_Difference': 2,
        'Offensive_Actions_per_90': 3,
        'Crosses_per_90': 2,
        'Touches_in_Attacking_Box_per_90': 3,
    },
    'Striker': {
        'Goals_per_90': 3,
        'xG': 3,
        'Shots_on_Target_per_90': 3,
        'Dribbles_per_90': 2,
        'Aerial_Duels_Won_Percentage': 2,
        'Attacking_Efficiency': 3,
    }
}

# Calculate weighted scores for sub-roles
def calculate_weighted_score(row, role_weights):
    score = 0
    for metric, weight in role_weights.items():
        if metric in row and not pd.isna(row[metric]):
            score += row[metric] * weight
    return score

data['Sub_Role_Weighted_Score'] = data.apply(
    lambda row: calculate_weighted_score(row, sub_role_weights.get(row['Sub_Role'], {})), axis=1
)

# Display the final data sorted by weighted scores
final_data = data[['Player', 'Team', 'Sub_Role', 'Goals', 'Assists', 'Goals_per_90', 
                   'Assists_per_90', 'Defensive_Actions_per_90', 'Key_Passes_per_90', 
                   'Dribbles_per_90', 'Shots_on_Target_per_90', 'xG_Difference', 
                   'Offensive_Actions_per_90', 'Total_Progressive_Actions_per_90', 
                   'Attacking_Efficiency', 'Defensive_Efficiency', 'Sub_Role_Weighted_Score']]
final_data = final_data.sort_values(by='Sub_Role_Weighted_Score', ascending=False)

# Display the expanded metrics and scores
import ace_tools as tools; tools.display_dataframe_to_user(name="Expanded Metrics and Weighted Scores", dataframe=final_data)
    