In [8]:
import pandas as pd

# Load the data
df = pd.read_csv("/Users/marclambertes/Python/Matches/Men/2024-2025/A-League 2024-2025/Sydney 2-3 Melbourne City.csv")

# Filter relevant events
goals = df[df['typeId'] == 16]  # Goals
shots = df[df['typeId'].isin([13, 14, 15])]  # All shot types
passes = df[df['typeId'] == 1]  # Passes
defensive_actions = df[df['typeId'].isin([7, 8, 12])]  # Tackles, Clearances, Interceptions
aerial_duels = df[df['typeId'] == 44]  # Aerial duels
failed_controls = df[df['typeId'] == 45]  # Failed to control ball
ball_recoveries = df[df['typeId'] == 49]  # Ball recoveries
dispossessions = df[df['typeId'] == 50]  # Dispossessed
errors = df[df['typeId'] == 51]  # Errors

# Get team IDs
teams = df['contestantId'].unique()
team_names = {teams[0]: "Sydney FC", teams[1]: "Melbourne City"}

# --- 1. Goal Contributions ---
goal_contributions = goals.groupby(['contestantId', 'playerName']).size().reset_index(name='goals')

# --- 2. Key Passes (Passes leading to shots) ---
# Create a time-ordered index with event IDs for better matching
df = df.sort_values(['periodId', 'timeMin', 'timeSec'])
df['next_event_type'] = df['typeId'].shift(-1)
df['next_event_player'] = df['playerName'].shift(-1)

# Get all passes that immediately precede shots
key_passes = df[
    (df['typeId'] == 1) &  # Current event is pass
    (df['next_event_type'].isin([13, 14, 15])) &  # Next event is any shot type
    (df['contestantId'] == df['contestantId'].shift(-1))  # Same team sequence
].groupby(['contestantId', 'playerName']).size().reset_index(name='key_passes')

# --- 3. Defensive Actions ---
defensive_stats = defensive_actions.groupby(['contestantId', 'playerName']).size().reset_index(name='defensive_actions')

# --- 4. Shot Statistics ---
# Create shot outcome columns
shots['shot_outcome'] = 'off_target'  # Default to off-target
shots.loc[shots['typeId'].isin([14, 15]), 'shot_outcome'] = 'on_target'

shot_stats = shots.groupby(['contestantId', 'playerName']).size().reset_index(name='total_shots')
shot_outcomes = shots.groupby(['contestantId', 'playerName', 'shot_outcome']).size().unstack(fill_value=0)
shot_outcomes = shot_outcomes.reset_index()

# --- 5. Aerial Duels ---
aerial_stats = aerial_duels.groupby(['contestantId', 'playerName', 'outcome']).size().unstack(fill_value=0)
aerial_stats.columns = ['aerial_lost', 'aerial_won']
aerial_stats = aerial_stats.reset_index()
aerial_stats['aerial_duels'] = aerial_stats['aerial_won'] + aerial_stats['aerial_lost']

# --- 6. Failed Controls ---
failed_control_stats = failed_controls.groupby(['contestantId', 'playerName']).size().reset_index(name='failed_controls')

# --- 7. Ball Recoveries ---
recovery_stats = ball_recoveries.groupby(['contestantId', 'playerName']).size().reset_index(name='ball_recoveries')

# --- 8. Dispossessions ---
dispossession_stats = dispossessions.groupby(['contestantId', 'playerName']).size().reset_index(name='dispossessions')

# --- 9. Errors ---
error_stats = errors.groupby(['contestantId', 'playerName']).size().reset_index(name='errors')

# --- Merge Data ---
player_stats = pd.merge(
    goal_contributions,
    key_passes,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

player_stats = pd.merge(
    player_stats,
    defensive_stats,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

player_stats = pd.merge(
    player_stats,
    shot_stats,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

player_stats = pd.merge(
    player_stats,
    shot_outcomes,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

player_stats = pd.merge(
    player_stats,
    aerial_stats,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

player_stats = pd.merge(
    player_stats,
    failed_control_stats,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

player_stats = pd.merge(
    player_stats,
    recovery_stats,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

player_stats = pd.merge(
    player_stats,
    dispossession_stats,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

player_stats = pd.merge(
    player_stats,
    error_stats,
    on=['contestantId', 'playerName'],
    how='outer'
).fillna(0)

# --- Calculate Goal Difference Added (GDA) ---
# Enhanced heuristic with additional metrics
player_stats['GDA'] = (
    player_stats['goals'] * 1.0 +                     # Full weight for goals
    player_stats['key_passes'] * 0.4 +                # Increased weight for key passes
    player_stats['on_target'] * 0.3 +                 # On-target shots
    player_stats['off_target'] * 0.1 +                # Off-target shots
    player_stats['aerial_won'] * 0.2 +                # Aerial duels won
    player_stats['ball_recoveries'] * 0.15 -          # Ball recoveries
    player_stats['defensive_actions'] * 0.1 -         # Defensive actions (already counted in recoveries)
    player_stats['aerial_lost'] * 0.1 -               # Aerial duels lost
    player_stats['failed_controls'] * 0.15 -          # Failed ball controls
    player_stats['dispossessions'] * 0.2 -            # Dispossessions
    player_stats['errors'] * 0.3                      # Errors
)

# Add team names and clean up columns
player_stats['team'] = player_stats['contestantId'].map(team_names)
player_stats = player_stats[[
    'team', 'playerName', 'goals', 'total_shots', 'on_target', 'off_target',
    'key_passes', 'defensive_actions', 'aerial_duels', 'aerial_won', 'aerial_lost',
    'failed_controls', 'ball_recoveries', 'dispossessions', 'errors', 'GDA'
]]

# Sort by GDA and clean up
player_stats = player_stats.sort_values('GDA', ascending=False)

# --- Save to Excel ---
output_path = "/Users/marclambertes/Python/Player_GDA_Analysis_Enhanced.xlsx"
with pd.ExcelWriter(output_path) as writer:
    # All players combined
    player_stats.to_excel(writer, sheet_name='All Players', index=False)
    
    # By team
    for team in team_names.values():
        team_df = player_stats[player_stats['team'] == team]
        team_df.to_excel(writer, sheet_name=team[:31], index=False)  # Sheet name limit

print(f"Analysis complete. Results saved to {output_path}")

# --- Display Top Performers ---
print("\nTop 5 Players Overall:")
print(player_stats.head(5).to_string(index=False))

for team in team_names.values():
    print(f"\nTop 3 Players for {team}:")
    print(player_stats[player_stats['team'] == team].head(3).to_string(index=False))

Analysis complete. Results saved to /Users/marclambertes/Python/Player_GDA_Analysis_Enhanced.xlsx

Top 5 Players Overall:
          team playerName  goals  total_shots  on_target  off_target  key_passes  defensive_actions  aerial_duels  aerial_won  aerial_lost  failed_controls  ball_recoveries  dispossessions  errors  GDA
     Sydney FC  M. Caputo    1.0          5.0        4.0         1.0         0.0                2.0          10.0         4.0          6.0              0.0              3.0             1.0     0.0 2.55
     Sydney FC  A. Lopane    1.0          2.0        2.0         0.0         2.0                5.0           1.0         1.0          0.0              3.0              7.0             1.0     0.0 2.50
Melbourne City  J. Lolley    0.0          1.0        1.0         0.0         5.0                3.0           1.0         1.0          0.0              2.0              1.0             1.0     0.0 1.85
     Sydney FC  M. Memeti    1.0          2.0        2.0         0.0  