Import Libraries

In [None]:
%load_ext autoreload
%autoreload 2
import pandas as pd
pd.set_option("display.max_rows", 101)
pd.set_option("display.max_columns", 101)

import matplotlib.pyplot as plt
import os

# set up initial path to data
DATADIR = os.path.join("sample-data","data")
game_id = 2 # let's look at sample match 2

Import Metrics

In [None]:
import Metrica_IO as mio
import Metrica_Viz as mviz

Read datasets

In [None]:
# read in the event data
events = mio.read_event_data(DATADIR,game_id)
tracking_home = mio.tracking_data(DATADIR,game_id,'Home')
tracking_away = mio.tracking_data(DATADIR,game_id,'Away')

# Convert positions from metrica units to meters 
tracking_home = mio.to_metric_coordinates(tracking_home)
tracking_away = mio.to_metric_coordinates(tracking_away)

Merge the two tracking DataFrames based on the 'Time [s]' column, which is common to both

In [None]:
tracking_data = pd.merge(tracking_away, tracking_home, on='Time [s]', how='outer')

# Sort the combined DataFrame by the 'Time [s]' column
tracking_data = tracking_data.sort_values(by='Time [s]')

# Reset the index of the combined DataFrame
tracking_data.reset_index(drop=True, inplace=True)

# If you want to fill NaN values with a specific value, you can use fillna
# For example, to fill NaN values with 0:
tracking_data.fillna(0, inplace=True)

# Now, you have a single DataFrame containing tracking data for both t
tracking_data

In [None]:
# Specify the columns to be removed
columns_to_remove = ['ball_x_y', 'ball_y_y', 'Period_y']

# Use the drop method to remove the specified columns
tracking_data = tracking_data.drop(columns=columns_to_remove)

In [None]:
tracking_data

In [None]:
# read in the event data
event_data = mio.read_event_data(DATADIR,game_id)

# Bit of housekeeping: unit conversion from metric data units to meters
event_data = mio.to_metric_coordinates(events)

In [None]:
event_data

In [None]:
tracking_data

In [None]:
# Merge the datasets based on 'Start Time [s]'
merged_end = pd.merge(event_data, tracking_data, left_on='End Time [s]', right_on='Time [s]', how='left')

# Rename columns with '_x' suffix to avoid conflicts
merged_end.columns = [col if '_x' not in col else col[:-2] for col in merged_end.columns]

# Remove duplicate rows based on the 'event_data' columns
merged_end = merged_end.drop_duplicates(subset=['Team', 'Type', 'Subtype', 'Period', 'Start Frame', 'Start Time [s]', 'End Frame', 'End Time [s]', 'From', 'To', 'Start X', 'Start Y', 'End X', 'End Y'])

# Reset the index
merged_end = merged_end.reset_index(drop=True)

In [None]:
merged_end.to_csv('merged_end.csv', index=False)

In [None]:
# Merge the datasets based on 'Start Time [s]'
merged_start = pd.merge(event_data, tracking_data, left_on='Start Time [s]', right_on='Time [s]', how='left')

# Rename columns with '_x' suffix to avoid conflicts
merged_start.columns = [col if '_x' not in col else col[:-2] for col in merged_start.columns]

# Remove duplicate rows based on the 'event_data' columns
merged_start = merged_start.drop_duplicates(subset=['Team', 'Type', 'Subtype', 'Period', 'Start Frame', 'Start Time [s]', 'End Frame', 'End Time [s]', 'From', 'To', 'Start X', 'Start Y', 'End X', 'End Y'])

# Reset the index
merged_start = merged_start.reset_index(drop=True)

In [None]:
merged_start.to_csv('merged_start.csv', index=False)

In [None]:
merged_start


In [None]:
merged_end

#### Define Metrics

In [None]:
pass_start = merged_start[merged_start['Type'] == 'PASS']

In [None]:
pass_start = pd.DataFrame(pass_start)

In [None]:
type(pass_start)

In [None]:
pass_start

Calculate Number Of the apponents in the area of 5m from the ball

In [None]:
pass_start['app_num'] = 0  # Initialize the app_num column

# Iterate through each row of the DataFrame
for index, row in pass_start.iterrows():
    ho_app = 0
    aw_app = 0
    if row['Team'] == 'Home':
        # Calculate the distance to each Away player and append it to the list
        for player in range(15, 27):  # Assuming player IDs are from 15 to 26
            distance = ((row['Start X'] - row[f'Away_{player}']) ** 2 + (row['Start Y'] - row[f'Away_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                ho_app = ho_app + 1
        pass_start.at[index, 'app_num'] = ho_app
    elif row['Team'] == 'Away':
        # Calculate the distance to each Home player and append it to the list
        for player in range(1, 15):  # Assuming player IDs are from 1 to 14
            distance = ((row['Start X'] - row[f'Home_{player}']) ** 2 + (row['Start Y'] - row[f'Home_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                aw_app = aw_app + 1
        pass_start.at[index, 'app_num'] = aw_app

In [None]:
pass_start

Calculate Number Of the Team players (the side who posses the ball) in the area of 5m from the ball

In [None]:
pass_start['teammate_num'] = 0  # Initialize the teammate numbers column

# Iterate through each row of the DataFrame
for index, row in pass_start.iterrows():
    ho_teammates = 0
    aw_teammates = 0
    if row['Team'] == 'Home':
        # Calculate the distance to each Away player and append it to the list
        for player in range(1,15):  # Assuming Home player IDs are from 1 to 14
            distance = ((row['Start X'] - row[f'Home_{player}']) ** 2 + (row['Start Y'] - row[f'Home_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                ho_teammates = ho_teammates + 1
        pass_start.at[index, 'teammate_num'] = ho_teammates
    elif row['Team'] == 'Away':
        # Calculate the distance to each Home player and append it to the list
        for player in range(15, 27):  # Assuming Away player IDs are from 15 to 26
            distance = ((row['Start X'] - row[f'Away_{player}']) ** 2 + (row['Start Y'] - row[f'Away_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                aw_teammates = aw_teammates + 1
        pass_start.at[index, 'teammate_num'] = aw_teammates

# Create a new column 'teammate numberss' in the DataFrame and assign the calculated aw_app or ho_app


In [None]:
pass_start

Determine the ratio of opponents to players in possession of the ball.

In [None]:
pass_start['ratio_s'] = 0  # Initialize the teammate numbers column

# Iterate through each row of the DataFrame
for index, row in pass_start.iterrows():
   pass_start.at[index, 'ratio_s'] = row['app_num'] / row['teammate_num']

In [None]:
pass_start

Calculate the ratio this time for the merged_end dataset

In [None]:
pass_end = merged_end[merged_end['Type'] == 'PASS']
pass_end = pd.DataFrame(pass_end)

##Calculate Number Of the apponents in the area of 5m from the ball

pass_end['app_num'] = 0  # Initialize the app_num column

# Iterate through each row of the DataFrame
for index, row in pass_end.iterrows():
    ho_app = 0
    aw_app = 0
    if row['Team'] == 'Home':
        # Calculate the distance to each Away player and append it to the list
        for player in range(15, 27):  # Assuming player IDs are from 15 to 26
            distance = ((row['Start X'] - row[f'Away_{player}']) ** 2 + (row['Start Y'] - row[f'Away_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                ho_app = ho_app + 1
        pass_end.at[index, 'app_num'] = ho_app
    elif row['Team'] == 'Away':
        # Calculate the distance to each Home player and append it to the list
        for player in range(1, 15):  # Assuming player IDs are from 1 to 14
            distance = ((row['Start X'] - row[f'Home_{player}']) ** 2 + (row['Start Y'] - row[f'Home_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                aw_app = aw_app + 1
        pass_end.at[index, 'app_num'] = aw_app

##Calculate Number Of the Team players (the side who posses the ball) in the area of 5m from the ball

pass_end['teammate_num'] = 0  # Initialize the teammate numbers column
# Iterate through each row of the DataFrame
for index, row in pass_end.iterrows():
    ho_teammates = 0
    aw_teammates = 0
    if row['Team'] == 'Home':
        # Calculate the distance to each Away player and append it to the list
        for player in range(1,15):  # Assuming Home player IDs are from 1 to 14
            distance = ((row['End X'] - row[f'Home_{player}']) ** 2 + (row['End Y'] - row[f'Home_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                ho_teammates = ho_teammates + 1
        pass_end.at[index, 'teammate_num'] = ho_teammates
    elif row['Team'] == 'Away':
        # Calculate the distance to each Home player and append it to the list
        for player in range(15, 27):  # Assuming Away player IDs are from 15 to 26
            distance = ((row['End X'] - row[f'Away_{player}']) ** 2 + (row['End Y'] - row[f'Away_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                aw_teammates = aw_teammates + 1
        pass_end.at[index, 'teammate_num'] = aw_teammates

##Determine the ratio of opponents to players in possession of the ball.

pass_end['ratio_e'] = 0  # Initialize the teammate numbers column
# Iterate through each row of the DataFrame
for index, row in pass_end.iterrows():
   pass_end.at[index, 'ratio_e'] = row['app_num'] / row['teammate_num']



In [None]:
pass_end

#1 Find the difference between the two ratios >> The more the better

In [None]:
pass_start['diff_ratio'] = pass_start['ratio_s'] - pass_end['ratio_e']

In [None]:
pass_start

#2 Calculate the difference between the number of opponents and the number of teammates near the player who received the pass. >> The less the better

In [None]:
pass_end['diff_app_mate'] = pass_end['app_num'] - pass_end['teammate_num']

In [None]:
pass_end

#3 Determine the distance covered by the ball during the pass.

In [None]:
# Initialize an empty list to store 'PassVector' values, it shows both the magnitude and the direction of the ball during the pass
pass_vectors = []

# Loop through each row in the DataFrame
for index, row in pass_start.iterrows():
    if row['End X'] < row['Start X']:
        pass_vectors.append(((((row['End X'] - row['Start X'])**2 + (row['End Y'] - row['Start Y'])**2) ** 0.5) * -1))
    else:
        pass_vectors.append(((row['End X'] - row['Start X'])**2 + (row['End Y'] - row['Start Y'])**2) ** 0.5)

# Add the 'PassVector' values to the DataFrame
pass_start['PassVector'] = pass_vectors

In [None]:
pass_start

In [None]:
pass_end['End X'].unique()

In [None]:
pass_end['End Y'].unique()

#4 Determine if the Pass Reciever is in the Penalty Area of the apponent team

In [None]:
pass_end.dropna(inplace=True)

In [None]:
# Convert 'Period' column to numeric (integer) type
pass_end['Period'] = pass_end['Period'].astype(int)

# Ensure 'End X' and 'End Y' columns are in numeric format (float or int)
pass_end['End X'] = pass_end['End X'].astype(float)
pass_end['End Y'] = pass_end['End Y'].astype(float)

In [None]:
pass_end['penalty_area'] = 0  # Initialize the new column with zeros

In [None]:
for i, column in enumerate(pass_start.columns):
    print(f"Column {i}: {column}")

In [None]:
pass_end.loc[(pass_end.iloc[:, 3] == 1) & (pass_end.iloc[:, 0] == 'Home') &  (pass_end.iloc[:, 12] <= -43.5) & (pass_end.iloc[:, 13] >= -20.6 ) & (pass_end.iloc[:, 13] <= 20.16), 'penalty_area'] = 1
pass_end.loc[(pass_end.iloc[:, 3] == 1) & (pass_end.iloc[:, 0] == 'Away') & (pass_end.iloc[:, 12] >= 43.5) &  (pass_end.iloc[:, 13] >= -20.6 ) & (pass_end.iloc[:, 13] <= 20.16), 'penalty_area'] = 1
pass_end.loc[(pass_end.iloc[:, 3] == 2) & (pass_end.iloc[:, 0] == 'Home') & (pass_end.iloc[:, 12] >= 43.5) &  (pass_end.iloc[:, 13] >= -20.6 ) & (pass_end.iloc[:, 13] <= 20.16), 'penalty_area'] = 1
pass_end.loc[(pass_end.iloc[:, 3] == 2) & (pass_end.iloc[:, 0] == 'Away') &  (pass_end.iloc[:, 12] <= -43.5) & (pass_end.iloc[:, 13] >= -20.6 ) & (pass_end.iloc[:, 13] <= 20.16), 'penalty_area'] = 1
#0.4

#5 Determine next event of the Pass

In [None]:
# Assuming 'merged_start' is your DataFrame
merged_start['next_event'] = merged_start['Type'].shift(-1)
merged_start['next_subtype'] = merged_start['Subtype'].shift(-1)
merged_start['next_team'] = merged_start['Team'].shift(-1)

# Set 'next_event' to 0 for rows where 'Type' is not 'Pass'
merged_start.loc[merged_start['Type'] != 'PASS', 'next_event'] = 0
merged_start.loc[merged_start['Type'] != 'PASS', 'next_subtype'] = 0
merged_start.loc[merged_start['Type'] != 'PASS', 'next_team'] = 0

# If you want to replace NaN values in 'next_event' and 'next_subtype' with something specific like 0, you can do it like this
merged_start['next_event'] = merged_start['next_event'].fillna(0)
merged_start['next_subtype'] = merged_start['next_subtype'].fillna(0)
merged_start['next_team'] = merged_start['next_team'].fillna(0)

In [None]:
merged_start

In [None]:
unique_subtypes = {}
types = ['PASS', 'BALL LOST', 'CHALLENGE', 'RECOVERY', 'BALL OUT', 'SHOT', 'FAULT RECEIVED', 'CARD']

for typ in types:
    subtypes = merged_start.loc[merged_start['Type'] == typ, 'Subtype'].unique()
    unique_subtypes[typ] = subtypes

# Print the results
for typ, subtypes in unique_subtypes.items():
    print(f"Type: {typ} {subtypes}\n") 

In [None]:
unique_combinations = merged_start[['Team','next_event', 'next_subtype','next_team']].drop_duplicates()

# If you want to see the unique combinations as a list of tuples
unique_combinations_list = list(unique_combinations.itertuples(index=False, name=None))

unique_combinations_list

In [None]:
for i, column in enumerate(merged_start.columns):
    print(f"Column {i}: {column}")


Assign Rates

In [None]:
# Define the conditions and their corresponding rates
conditions = {
    ('Away', 'SHOT', 'ON TARGET-GOAL', 'Away') : 1.0 ,
    ('Away', 'SHOT', 'ON TARGET-SAVED', 'Away') : 0.9 ,
    ('Away', 'SHOT', 'OFF TARGET-OUT', 'Away') : 0.8 ,
    ('Away', 'SHOT', 'OFF TARGET-HEAD-OUT', 'Away') : 0.8 ,
    ('Away', 'PASS', 'CROSS', 'Away') : 0.6 ,
    ('Away', 'PASS', 0, 'Away') : 0.5 ,
    ('Away', 'PASS', 'HEAD', 'Away') : 0.5 , 
    ('Away', 'BALL OUT', 'CROSS', 'Away') : 0.4 ,
    ('Away', 'CHALLENGE', 'AERIAL-WON', 'Away') : 0.3 , 
    ('Away', 'CHALLENGE', 'TACKLE-WON', 'Away') : 0.3 ,
    ('Away', 'CHALLENGE', 'GROUND-ADVANTAGE-WON', 'Away') : 0.3,
    ('Away', 'CHALLENGE', 'DRIBBLE-WON', 'Away') : 0.3,
    ('Away', 'CHALLENGE', 'GROUND-WON', 'Away') : 0.3,
    ('Away', 'CHALLENGE', 'TACKLE-ADVANTAGE-WON', 'Away') : 0.3,
    ('Away', 'CHALLENGE', 'AERIAL-LOST', 'Home') : 0.2,
    ('Away', 'CHALLENGE', 'TACKLE-LOST', 'Home') : 0.2,
    ('Away', 'CHALLENGE', 'GROUND-ADVANTAGE-LOST', 'Home') : 0.2,
    ('Away', 'CHALLENGE', 'GROUND-LOST', 'Home') : 0.2,
    ('Away', 'RECOVERY', 'THEFT', 'Home') : -0.5,
    ('Away', 'RECOVERY', 'INTERCEPTION', 'Home') : -0.5,
    ('Away', 'BALL LOST', 'CROSS-INTERCEPTION', 'Away') : -0.4,
    ('Away', 'BALL LOST', 'INTERCEPTION', 'Away') : -0.3,
    ('Away', 'BALL LOST', 'HEAD-INTERCEPTION', 'Away') : -0.3,
    ('Away', 'BALL LOST', 'FORCED', 'Away') : -0.25,
    ('Away', 'BALL LOST', 'THEFT', 'Away') : -0.25,
    ('Away', 'BALL LOST', 0, 'Away') : -0.2,
    ('Away', 'BALL LOST', 'HAND BALL', 'Away') : -0.2,
    ('Away', 'CHALLENGE', 'GROUND-FAULT-LOST', 'Home') : -0.1,
    ('Away', 'CHALLENGE', 'TACKLE-WON', 'Home') : -0.1,
    ('Away', 'CHALLENGE', 'GROUND-WON', 'Home') : -0.1,
    ('Away', 'CHALLENGE', 'TACKLE-LOST', 'Away') : -0.1,
    ('Away', 'PASS', 0, 'Home') : -0.1,
    ('Away', 'BALL OUT', 0, 'Away') : -0.1,
    ('Away', 'CHALLENGE', 'TACKLE-FAULT-WON', 'Away') : -0.1,
    ('Away', 'CHALLENGE', 'GROUND-LOST', 'Away') : -0.1,
    ('Away', 'CHALLENGE', 'GROUND', 'Home') : -0.1,
    ('Home', 'SHOT', 'ON TARGET-GOAL', 'Home') : 1.0 ,
    ('Home', 'SHOT', 'ON TARGET-SAVED', 'Home') : 0.9 ,
    ('Home', 'SHOT', 'OFF TARGET-OUT', 'Home') : 0.8,
    ('Home', 'SHOT', 'HEAD-OFF TARGET-OUT', 'Home') : 0.8,
    ('Home', 'PASS', 'CROSS', 'Home') : 0.6,
    ('Home', 'PASS', 'DEEP BALL', 'Home') : 0.5,
    ('Home', 'PASS', 0, 'Home') : 0.5,
    ('Home', 'PASS', 'HEAD', 'Home') : 0.5,
    ('Home', 'BALL OUT', 0, 'Home') : 0.4,
    ('Home', 'BALL OUT', 'CLEARANCE', 'Home') : 0.4,
    ('Home', 'CHALLENGE', 'GROUND-WON', 'Home') : 0.3,
    ('Home', 'CHALLENGE', 'AERIAL-WON', 'Home') : 0.3,
    ('Home', 'CHALLENGE', 'AERIAL-LOST', 'Away') : 0.2,
    ('Home', 'CHALLENGE', 'TACKLE-LOST', 'Away') : 0.2,
    ('Home', 'CHALLENGE', 'GROUND-LOST', 'Away') : 0.2,
    ('Home', 'RECOVERY', 'INTERCEPTION', 'Away') : -0.5,
    ('Home', 'RECOVERY', 'THEFT', 'Away') : -0.5,
    ('Home', 'BALL LOST', 'CROSS-INTERCEPTION', 'Home') : -0.4,
    ('Home', 'BALL LOST', 'INTERCEPTION', 'Home') : -0.3,
    ('Home', 'BALL LOST', 'HEAD-INTERCEPTION', 'Home') : -0.3,
    ('Home', 'BALL LOST', 'FORCED', 'Home') : -0.25,
    ('Home', 'BALL LOST', 'HEAD', 'Home') : -0.2,
    ('Home', 'BALL LOST', 0, 'Home') : -0.2,
    ('Home', 'CHALLENGE', 'TACKLE-FAULT-WON', 'Home') : -0.1,
    ('Home', 'CHALLENGE', 'TACKLE-FAULT-LOST', 'Away') : -0.1,
    ('Home', 'CHALLENGE', 'TACKLE-LOST', 'Home') : -0.1,
    ('Home', 'CHALLENGE', 'TACKLE-WON', 'Away') : -0.1,
    ('Home', 'CHALLENGE', 'GROUND-WON', 'Away') : -0.1,
    ('Home', 'CHALLENGE', 'GROUND-FAULT-WON', 'Home') : -0.1,
    ('Home', 'CHALLENGE', 'GROUND-FAULT-LOST', 'Away') : -0.1,
    ('Home', 'CHALLENGE', 'GROUND-FAULT-LOST', 'Home') : -0.1,
}

# Define a function to calculate 'r_next_event' based on the conditions
def calculate_r_next_event(row):
    for condition, rate in conditions.items():
        if all(row[col] == val for col, val in zip(['Team', 'next_event', 'next_subtype', 'next_team'], condition)):
            return rate
    return 0

# Add the 'r_next_event' column to the DataFrame
merged_start['r_next_event'] = merged_start.apply(calculate_r_next_event, axis=1)

# Print the updated DataFrame
merged_start[merged_start['next_event']!=0]

#6 Rate Pass itself based on the its Sub Type

In [None]:
pass_subtype_valuation_dict = {
    'CROSS': 0.7,
    'DEEP BALL': 0.2,
    'HEAD': 0.2,
    'GOAL KICK': 0.1,
    'HEAD-INTERCEPTION-CLEARANCE': 0.5
}

# Add a new column rate Pass sub type 'r_pass_sub'
merged_start['r_pass_sub'] = merged_start.apply(
    lambda row: pass_subtype_valuation_dict[row['Subtype']] if row['Type'] == 'PASS' and row['Subtype'] in pass_subtype_valuation_dict else 0, axis=1)

In [None]:
# Chaeck rating
filter_start = merged_start[merged_start['Type']=='PASS']
filter_start[filter_start['Subtype'].notnull()]

In [None]:
merged_start

In [None]:
pass_start