In [60]:
import pandas as pd
import math

# Define the file path to your CSV
file_path = "/Users/marclambertes/Python/Matches/Men/2024-2025/Eredivisie 2024-2025/Heerenveen 3-3 Twente.csv"  # Replace with the path to your CSV

# Load the CSV data into a pandas DataFrame
df = pd.read_csv(file_path)

# Define the list of qualifier columns that contain qualifier IDs
type_cols = [col for col in df.columns if '/qualifierId' in col]

# Add 'endX' and 'endY' columns for storing the end position of the pass
df['endX'] = 0.0
df['endY'] = 0.0
for i in range(len(df)):
    df1 = df.iloc[i:i+1,:]
    
    # Calculate endX (the X position of the pass end)
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 140:  # Check for 'endX' qualifier
            endx = df1.loc[:,'qualifier/%i/value' %j].values[0]
            df['endX'][i] = endx
    
    # Calculate endY (the Y position of the pass end)
    for k in range(len(type_cols)):
        col = df1[type_cols[k]].values[0]
        if col == 141:  # Check for 'endY' qualifier
            endy = df1.loc[:,'qualifier/%i/value' %k].values[0]
            df['endY'][i] = endy

# After endX and endY are updated, detect Key Pass (based on typeId 210)
df['Key pass'] = 0
for i in range(len(df)):
    df1 = df.iloc[i:i+1,:]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 210:  # Key pass event (typeId == 210)
            df['Key pass'][i] = 1

# After endX and endY are updated, detect Cross (based on typeId 2)
df['Cross'] = 0
for i in range(len(df)):
    df1 = df.iloc[i:i+1,:]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 2:  # Cross event (typeId == 2)
            df['Cross'][i] = 1

# Define a function to calculate the distance to goal (for shots, etc.)
def calculate_distance_to_goal(x, y):
    goal_x = 105  # Assuming the goal is at x=105
    goal_y = 68 / 2  # Assuming the goal is centered vertically at 34
    distance = math.sqrt((goal_x - x) ** 2 + (goal_y - y) ** 2)
    return distance

# Define value assignment function with position-based weighting (including Receiving and Key Pass)
def assign_event_value(row):
    value = 0
    category = None
    
    # Calculate the distance to goal for shots (distance to goal logic remains the same)
    distance_to_goal = calculate_distance_to_goal(row['x'], row['y'])
    
    # Initialize all categories to 0
    shots_value = 0
    passes_value = 0
    dribbles_value = 0
    defensive_actions_value = 0
    fouls_value = 0
    receiving_value = 0
    cross_value = 0  # Initialize Cross value
    key_pass_value = 0  # Initialize Key Pass value

    # Determine the zone based on the X-coordinate
    if row['x'] < 35:
        zone = 'defensive'
    elif row['x'] < 70:
        zone = 'middle'
    else:
        zone = 'attacking'

    # Shot values (typeId 13, 14, 15, 16) with distance-based weighting
    if row['typeId'] in [13, 14, 15]:
        shots_value = -0.2  # Shots that are not goals (negative value for missed shots)
        if distance_to_goal < 20:  # Close range shots
            shots_value = 1.2
        elif distance_to_goal < 40:  # Mid-range shots
            shots_value = 0.8
    
    elif row['typeId'] == 16 and row['outcome'] == 1:  # Goal
        shots_value = 1.5  # Goal for typeId 16 (outcome 1 means Goal)
        if distance_to_goal < 20:  # Close range goal
            shots_value = 2.0
    
    # Passes (typeId 1) with position-based weighting
    if row['typeId'] == 1:
        if row['outcome'] == 1:  # Successful pass
            passes_value = 0.5
            if zone == 'attacking':  # If in attacking zone
                passes_value = 1.0
            elif zone == 'middle':  # If in middle zone
                passes_value = 0.7
        elif row['outcome'] == 0:  # Unsuccessful pass
            passes_value = -0.3  # Negative for unsuccessful passes
    
    # Dribbles (typeId 3) with position-based weighting
    if row['typeId'] == 3:
        if row['outcome'] == 1:  # Successful dribble
            dribbles_value = 0.8
            if zone == 'attacking':  # In the attacking zone
                dribbles_value = 1.2
            elif zone == 'middle':  # In the middle zone
                dribbles_value = 1.0
        elif row['outcome'] == 0:  # Unsuccessful dribble
            dribbles_value = -0.4  # Negative for unsuccessful dribbles
    
    # Defensive Actions (typeId 7, 8) with position-based weighting
    if row['typeId'] in [7, 8]:
        if row['outcome'] == 1:  # Successful defensive action
            defensive_actions_value = 1.0
            if zone == 'defensive':  # If in the defensive zone
                defensive_actions_value = 1.5
        elif row['outcome'] == 0:  # Failed defensive action
            defensive_actions_value = -0.3  # Negative for failed defensive actions
    
    # Additional Defensive Actions (typeId 4) with position-based weighting
    if row['typeId'] == 4:
        if row['outcome'] == 1:  # Successful defensive action
            defensive_actions_value = 1.0
        elif row['outcome'] == 0:  # Failed defensive action
            defensive_actions_value = -0.3  # Negative for failed defensive actions
    
    # Fouls (typeId 5) - Fouls are negative regardless of position
    if row['typeId'] == 5:
        fouls_value = -0.5  # Committed fouls are always negative
    
    # Receiving pass logic
    if not pd.isna(row['endX']) and not pd.isna(row['endY']):
        receiving_value = 0.7  # Base value for receiving the ball successfully
    
    # Key Pass logic (based on typeId 210)
    if row['Key pass'] == 1:  # Check if it's a Key Pass
        key_pass_value = 1.0  # Assign value for Key Pass
    
    # Cross value (based on typeId 2)
    if row['Cross'] == 1:  # If it's a cross
        cross_value = 1.0  # Assign a value for cross
    
    # Return the individual category values including Key Pass and Cross
    return (shots_value, passes_value, dribbles_value, defensive_actions_value, fouls_value, receiving_value, cross_value, key_pass_value)

# Apply the value assignment function to each row in the DataFrame
df[['shots_value', 'passes_value', 'dribbles_value', 'defensive_actions_value', 'fouls_value', 'receiving_value', 'cross_value', 'key_pass_value']] = df.apply(assign_event_value, axis=1, result_type='expand')

# Create a new DataFrame that sums up event values by player, contestantId
event_summary = df.groupby(['playerName', 'contestantId']).agg(
    shots_value=('shots_value', 'sum'),
    passes_value=('passes_value', 'sum'),
    dribbles_value=('dribbles_value', 'sum'),
    defensive_actions_value=('defensive_actions_value', 'sum'),
    fouls_value=('fouls_value', 'sum'),
    receiving_value=('receiving_value', 'sum'),
    cross_value=('cross_value', 'sum'),
    key_pass_value=('key_pass_value', 'sum')
)

# Add a total event value column
event_summary['total_event_value'] = event_summary.sum(axis=1)

# Calculate Z-scores for each event category and the total event value
event_summary_z_scores = event_summary.apply(lambda x: (x - x.mean()) / x.std(), axis=0)

# Save the summary DataFrame with Z-scores to an Excel file
event_summary_z_scores.to_excel("player_event_summary_with_zone_weighting.xlsx", index=True)

# Display the summarized DataFrame with Z-scores
print(event_summary_z_scores)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['endX'][i] = endx
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['endY'][i] = endy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Key pass'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Cross'][i] = 1


                                              shots_value  passes_value  \
playerName         contestantId                                           
A. Jahanbakhsh     4vd2t5schmvvufrfib7f2vjdf     0.506617      0.348200   
A. Van Hoorenbeeck 4tic29sox7m39fy1ztgv0jsiq    -0.623529     -0.937780   
A. Verschueren     4tic29sox7m39fy1ztgv0jsiq    -0.764798     -0.684385   
B. Kuipers         4tic29sox7m39fy1ztgv0jsiq    -0.623529     -0.646376   
B. van Rooij       4tic29sox7m39fy1ztgv0jsiq    -0.623529      0.278516   
C. Nunnely         4vd2t5schmvvufrfib7f2vjdf    -0.623529     -0.735064   
D. Rallis          4vd2t5schmvvufrfib7f2vjdf    -0.764798     -0.430990   
D. Rots            4tic29sox7m39fy1ztgv0jsiq     1.636764     -0.361306   
E. Gürbüz          4vd2t5schmvvufrfib7f2vjdf     1.354228     -0.703390   
E. van Ee          4vd2t5schmvvufrfib7f2vjdf     2.555008      1.266757   
G. Lagerbielke     4tic29sox7m39fy1ztgv0jsiq     0.435983     -0.608367   
L. Smans           4vd2t5