In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

# Load the dataset
file_path = '/Users/marclambertes/Python/Matches/Men/2024-2025/Eredivisie 2024-2025/AZ 1-0 Fortuna.csv'
df = pd.read_csv(file_path)


# Filter relevant columns (pass, shot, assist, player information)
df_filtered = df[['timeMin', 'timeSec', 'contestantId', 'typeId', 'outcome', 'x', 'y', 'assist', 'playerId', 'playerName', 'keyPass']]

# Split contestantId into Home and Away teams
df_filtered['home_team'] = np.where(df_filtered['contestantId'] == 1, 1, 0)
df_filtered['away_team'] = np.where(df_filtered['contestantId'] == 2, 1, 0)

# Create endX and endY locations
type_cols = [col for col in df_filtered.columns if '/qualifierId' in col]
df_filtered['endX'] = 0.0
df_filtered['endY'] = 0.0
for i in range(len(df_filtered)):
    df1 = df_filtered.iloc[i:i+1, :]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 140:  # endX
            endx = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            df_filtered['endX'][i] = endx
        if col == 141:  # endY
            endy = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            df_filtered['endY'][i] = endy

# Calculate Pass Length and Pass Angle
df_filtered['pass_length'] = 0.0
df_filtered['pass_angle'] = 0.0
for i in range(len(df_filtered)):
    df1 = df_filtered.iloc[i:i+1, :]
    for j in range(len(type_cols)):
        col = df1[type_cols[j]].values[0]
        if col == 213:  # Angle
            angle = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            df_filtered['pass_angle'][i] = angle
        if col == 212:  # Length
            length = df1.loc[:, 'qualifier/%i/value' % j].values[0]
            df_filtered['pass_length'][i] = length

# Filter passes (typeId = 1), shots (typeId = 13, 14, 15, 16), and assists
passes_df = df_filtered[df_filtered['typeId'] == 1]
shots_df = df_filtered[df_filtered['typeId'].isin([13, 14, 15, 16])]
assists_df = df_filtered[df_filtered['assist'] == 1]
key_passes_df = df_filtered[df_filtered['keyPass'] == 1]  # Key passes that lead to potential goals

# Load the EPV grid
epv = pd.read_csv("epv_grid.csv", header=None)  # Assuming epv_grid.csv was saved earlier
epv = np.array(epv)  # Convert to numpy array
epv_rows, epv_cols = epv.shape  # Get rows and columns

# Ensure coordinate columns are numeric
df_filtered['x'] = pd.to_numeric(df_filtered['x'], errors='coerce')
df_filtered['y'] = pd.to_numeric(df_filtered['y'], errors='coerce')
df_filtered['endX'] = pd.to_numeric(df_filtered['endX'], errors='coerce')
df_filtered['endY'] = pd.to_numeric(df_filtered['endY'], errors='coerce')

# Map start and end coordinates to EPV zones
df_filtered['x1_bin'] = pd.cut(df_filtered['x'], bins=epv_cols, labels=False).astype('Int64')
df_filtered['y1_bin'] = pd.cut(df_filtered['y'], bins=epv_rows, labels=False).astype('Int64')
df_filtered['x2_bin'] = pd.cut(df_filtered['endX'], bins=epv_cols, labels=False).astype('Int64')
df_filtered['y2_bin'] = pd.cut(df_filtered['endY'], bins=epv_rows, labels=False).astype('Int64')

# Calculate start and end zone EPV values
def get_epv_value(bin_indices, epv_grid):
    if pd.notnull(bin_indices[0]) and pd.notnull(bin_indices[1]):
        return epv_grid[int(bin_indices[1])][int(bin_indices[0])]
    return np.nan  # Return NaN if indices are invalid

df_filtered['start_zone_value'] = df_filtered[['x1_bin', 'y1_bin']].apply(lambda x: get_epv_value(x, epv), axis=1)
df_filtered['end_zone_value'] = df_filtered[['x2_bin', 'y2_bin']].apply(lambda x: get_epv_value(x, epv), axis=1)

# Ensure that the EPV values are numeric
df_filtered['start_zone_value'] = pd.to_numeric(df_filtered['start_zone_value'], errors='coerce')
df_filtered['end_zone_value'] = pd.to_numeric(df_filtered['end_zone_value'], errors='coerce')

# Compute EPV for the action
df_filtered['epv'] = df_filtered['end_zone_value'] - df_filtered['start_zone_value']

# Check for any NaN or invalid EPV values after calculation
if df_filtered['epv'].isnull().any():
    print("Warning: There are missing or invalid EPV values.")

# Strategy-specific calculations
def calculate_caro_kann(passes_df):
    # Short passes and pass difficulty
    short_passes_df = passes_df[passes_df['pass_length'] < 15]
    short_pass_success = short_passes_df['outcome'].sum() / len(short_passes_df) if len(short_passes_df) > 0 else 0
    return short_pass_success

def calculate_scotch_game(passes_df):
    # Focus on passes through the central part of the field, and passes moving forward
    central_passes_df = passes_df[(passes_df['x'] > 40) & (passes_df['x'] < 60)]  # Central area
    central_pass_success = central_passes_df['outcome'].sum() / len(central_passes_df) if len(central_passes_df) > 0 else 0
    return central_pass_success

def calculate_nimzo_indian(passes_df):
    # Lateral passes (wide passes, switching play)
    lateral_passes_df = passes_df[passes_df['x'] < 30]
    lateral_pass_success = len(lateral_passes_df) / len(passes_df) if len(passes_df) > 0 else 0
    return lateral_pass_success

def calculate_sicilian_defense(key_passes_df, shots_df):
    # Key passes leading to shots
    key_passes_leading_to_shots = key_passes_df[key_passes_df['timeSec'].isin(shots_df['timeSec'])]
    goal_added_value = len(key_passes_leading_to_shots) / len(key_passes_df) if len(key_passes_df) > 0 else 0
    return goal_added_value

def calculate_king_indian_defense(passes_df):
    # Transition passes (from defensive third to attacking third)
    transition_passes_df = passes_df[passes_df['y'] < 50]  # Defensive third
    transition_pass_success = len(transition_passes_df) / len(passes_df) if len(passes_df) > 0 else 0
    return transition_pass_success

def calculate_roy_lopez(passes_df):
    # Short passes for tiki-taka build-up
    short_passes_df = passes_df[passes_df['pass_length'] < 15]
    short_pass_success = short_passes_df['outcome'].sum() / len(short_passes_df) if len(short_passes_df) > 0 else 0
    return short_pass_success

def calculate_queens_gambit(passes_df):
    # Wide passes for possession
    wide_passes_df = passes_df[passes_df['x'] > 60]  # Passes across the field
    queens_gambit_goal_added_value = len(wide_passes_df) / len(passes_df) if len(passes_df) > 0 else 0
    return queens_gambit_goal_added_value

def calculate_english_opening(passes_df):
    # Flexible passes across the field
    english_opening_passes_df = passes_df[passes_df['x'] < 60]
    english_opening_value_per_pass = len(english_opening_passes_df) / len(passes_df) if len(passes_df) > 0 else 0
    return english_opening_value_per_pass

def calculate_french_defense(passes_df):
    # Passes completed in defensive third with controlled progression
    french_defense_passes_df = passes_df[passes_df['x'] < 30]  # Defensive area
    french_defense_pass_success = french_defense_passes_df['outcome'].sum() / len(french_defense_passes_df) if len(french_defense_passes_df) > 0 else 0
    return french_defense_pass_success

def calculate_alekhine_defense(passes_df):
    # Passes made under pressure
    alekhine_defense_passes_df = passes_df[passes_df['outcome'] == 1]  # Successful passes under pressure
    alekhine_defense_goal_added_value = len(alekhine_defense_passes_df) / len(passes_df) if len(passes_df) > 0 else 0
    return alekhine_defense_goal_added_value

def calculate_grunfeld_defense(passes_df, shots_df):
    # Fast counter-attacks
    grunfeld_defense_passes_df = passes_df[passes_df['timeSec'].isin(shots_df['timeSec'])]
    grunfeld_defense_value_per_pass = len(grunfeld_defense_passes_df) / len(passes_df) if len(passes_df) > 0 else 0
    return grunfeld_defense_value_per_pass

def calculate_pirc_defense(passes_df, shots_df):
    # Passes leading to direct attacks
    pirc_defense_passes_df = passes_df[passes_df['timeSec'].isin(shots_df['timeSec'])]
    pirc_defense_value_per_pass = len(pirc_defense_passes_df) / len(passes_df) if len(passes_df) > 0 else 0
    return pirc_defense_value_per_pass

# Calculate metrics for each strategy
caro_kann_pass_success = calculate_caro_kann(passes_df)
scotch_game_pass_success = calculate_scotch_game(passes_df)
nimzo_indian_value_per_pass = calculate_nimzo_indian(passes_df)
sicilian_defense_goal_added_value = calculate_sicilian_defense(key_passes_df, shots_df)
king_indian_defense_value_per_pass = calculate_king_indian_defense(passes_df)
ruy_lopez_pass_success = calculate_roy_lopez(passes_df)
queens_gambit_goal_added_value = calculate_queens_gambit(passes_df)
english_opening_value_per_pass = calculate_english_opening(passes_df)
french_defense_pass_success = calculate_french_defense(passes_df)
alekhine_defense_goal_added_value = calculate_alekhine_defense(passes_df)
grunfeld_defense_value_per_pass = calculate_grunfeld_defense(passes_df, shots_df)
pirc_defense_value_per_pass = calculate_pirc_defense(passes_df, shots_df)

# Store strategy results
strategies = {
    'Caro-Kann': [caro_kann_pass_success],
    'Scotch Game': [scotch_game_pass_success],
    'Nimzo-Indian': [nimzo_indian_value_per_pass],
    'Sicilian Defense': [sicilian_defense_goal_added_value],
    'King’s Indian Defense': [king_indian_defense_value_per_pass],
    'Ruy-Lopez': [ruy_lopez_pass_success],
    'Queen’s Gambit': [queens_gambit_goal_added_value],
    'English Opening': [english_opening_value_per_pass],
    'French Defense': [french_defense_pass_success],
    'Alekhine Defense': [alekhine_defense_goal_added_value],
    'Grünfeld Defense': [grunfeld_defense_value_per_pass],
    'Pirc Defense': [pirc_defense_value_per_pass]
}

# Create a DataFrame and save to Excel
strategy_df = pd.DataFrame(strategies)
output_file_path = 'strategy_metrics_with_impact_scores_detailed.xlsx'
strategy_df.to_excel(output_file_path, sheet_name='Strategy Metrics')

output_file_path




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['home_team'] = np.where(df_filtered['contestantId'] == 1, 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['away_team'] = np.where(df_filtered['contestantId'] == 2, 1, 0)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['endX'] = 0.0
A value is trying to be set



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['end_zone_value'] = df_filtered[['x2_bin', 'y2_bin']].apply(lambda x: get_epv_value(x, epv), axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['start_zone_value'] = pd.to_numeric(df_filtered['start_zone_value'], errors='coerce')
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-c

'strategy_metrics_with_impact_scores_detailed.xlsx'