In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load data from Excel file
file_path = "/Users/marclambertes/Downloads/Outswinger FC - Expected Goals/Bundesliga xG 04022025.xlsx"
df = pd.read_excel(file_path)

# Aggregate metrics per player
player_metrics = df.groupby('PlayerId').agg({
    'xG': 'sum',  # Sum of expected goals
    'Goal': 'sum',  # Sum of actual goals
    'isBigChance': 'sum',  # Total big chances
    'isGoal': 'sum',  # Total goals from big chances
    'isAssistedShot': 'sum',  # Total assisted shots
    'isIntentionalAssist': 'sum',  # Total intentional assists
    'timeMin': 'sum',  # Total minutes
    'timeSec': 'sum'  # Total seconds
}).reset_index()

# Calculate derived metrics
player_metrics['xG_per_Shot'] = player_metrics['xG']  # Assume xG as shot quality proxy
player_metrics['Goal_Performance'] = player_metrics['Goal'] - player_metrics['xG']  # Goal overperformance
player_metrics['BigChance_Conversion'] = np.where(player_metrics['isBigChance'] > 0,
                                                  player_metrics['isGoal'] / player_metrics['isBigChance'], 0)
player_metrics['Assist_Contribution'] = np.where(player_metrics['isAssistedShot'] > 0,
                                                 player_metrics['isIntentionalAssist'] / player_metrics['isAssistedShot'], 0)
player_metrics['MinutesPlayed'] = player_metrics['timeMin'] + player_metrics['timeSec'] / 60  # Total minutes played

# Normalize the metrics using MinMaxScaler
scaler = MinMaxScaler()

normalized_metrics = player_metrics[['xG_per_Shot', 'Goal_Performance', 'BigChance_Conversion',
                                      'Assist_Contribution', 'MinutesPlayed']].fillna(0)

scaled_values = scaler.fit_transform(normalized_metrics)
scaled_df = pd.DataFrame(scaled_values, columns=['xG_per_Shot_norm', 'Goal_Performance_norm',
                                                  'BigChance_Conversion_norm', 'Assist_Contribution_norm',
                                                  'MinutesPlayed_norm'])

# Merge normalized values back into the DataFrame
player_metrics = pd.concat([player_metrics[['PlayerId']], scaled_df], axis=1)

# Calculate the Goal Impact Rating (GIR)
weight_xG_per_Shot = 0.3
weight_Goal_Performance = 0.4
weight_BigChance_Conversion = 0.15
weight_Assist_Contribution = 0.1
weight_MinutesPlayed = 0.05

player_metrics['GIR'] = (weight_xG_per_Shot * player_metrics['xG_per_Shot_norm'] +
                         weight_Goal_Performance * player_metrics['Goal_Performance_norm'] +
                         weight_BigChance_Conversion * player_metrics['BigChance_Conversion_norm'] +
                         weight_Assist_Contribution * player_metrics['Assist_Contribution_norm'] +
                         weight_MinutesPlayed * player_metrics['MinutesPlayed_norm'])

# Display the results (GIR for each player)
pe_scores = player_metrics[['PlayerId', 'GIR']]
print(pe_scores)

# Save the results back to an Excel file
pe_scores.to_excel("GIR_Player_Scores.xlsx", index=False)


TypeError: '>' not supported between instances of 'str' and 'int'