In [2]:
import pandas as pd
import numpy as np

# Load dataset
file_path = "/Users/marclambertes/Downloads/Outswinger FC - Expected Goals/Bundesliga xG 04022025.xlsx"  # Update with your file path
df = pd.read_excel(file_path)

# Ensure relevant columns are numeric
df[['x', 'y', 'isGoal']] = df[['x', 'y', 'isGoal']].apply(pd.to_numeric, errors='coerce').fillna(0)

### Zone Efficiency Metrics ###
# Define finer-grained pitch zones
conditions = [
    (df['x'] <= 8.25),  # Deep left defensive zone
    (df['x'] > 8.25) & (df['x'] <= 16.5),  # Left defensive zone
    (df['x'] > 16.5) & (df['x'] <= 24.75),  # Left midfield deep
    (df['x'] > 24.75) & (df['x'] <= 33),  # Left midfield advanced
    (df['x'] > 33) & (df['x'] <= 41.25),  # Central deep defensive zone
    (df['x'] > 41.25) & (df['x'] <= 49.5),  # Central midfield zone
    (df['x'] > 49.5) & (df['x'] <= 57.75),  # Central attacking zone
    (df['x'] > 57.75) & (df['x'] <= 66),  # Central advanced zone
    (df['x'] > 66) & (df['x'] <= 74.25),  # Right midfield advanced
    (df['x'] > 74.25) & (df['x'] <= 82.5),  # Right midfield deep
    (df['x'] > 82.5) & (df['x'] <= 90.75),  # Right defensive zone
    (df['x'] > 90.75)  # Deep right defensive zone
]
zone_names = ['Deep Left Defensive Zone', 'Left Defensive Zone', 'Left Midfield Deep', 'Left Midfield Advanced',
               'Central Deep Defensive Zone', 'Central Midfield Zone', 'Central Attacking Zone',
               'Central Advanced Zone', 'Right Midfield Advanced', 'Right Midfield Deep',
               'Right Defensive Zone', 'Deep Right Defensive Zone']
df['PitchZone'] = np.select(conditions, zone_names, default='Unknown')

# Zone efficiency calculation
def calculate_efficiency(group):
    total_attempts = group.shape[0]
    successful_goals = group['isGoal'].sum()
    return successful_goals / total_attempts if total_attempts > 0 else 0

zone_efficiency = df.groupby('PitchZone').apply(calculate_efficiency)
print("Zone Efficiency:")
print(zone_efficiency)


Zone Efficiency:
PitchZone
Central Advanced Zone        0.000000
Central Attacking Zone       0.333333
Central Midfield Zone        0.000000
Deep Left Defensive Zone     1.000000
Deep Right Defensive Zone    0.200000
Left Defensive Zone          1.000000
Left Midfield Advanced       0.000000
Right Defensive Zone         0.121911
Right Midfield Advanced      0.042373
Right Midfield Deep          0.048430
dtype: float64
