Import Libraries

In [None]:
%load_ext autoreload
%autoreload 2
import pandas as pd
pd.set_option("display.max_rows", 101)
pd.set_option("display.max_columns", 101)

import matplotlib.pyplot as plt
import os

# set up initial path to data
DATADIR = os.path.join("sample-data","data")
game_id = 2 # let's look at sample match 2

Import Metrics

In [None]:
import Metrica_IO as mio
import Metrica_Viz as mviz

Read datasets

In [None]:
# read in the event data
events = mio.read_event_data(DATADIR,game_id)
tracking_home = mio.tracking_data(DATADIR,game_id,'Home')
tracking_away = mio.tracking_data(DATADIR,game_id,'Away')

# Convert positions from metrica units to meters 
tracking_home = mio.to_metric_coordinates(tracking_home)
tracking_away = mio.to_metric_coordinates(tracking_away)

Merge the two tracking DataFrames based on the 'Time [s]' column, which is common to both

In [None]:
tracking_data = pd.merge(tracking_away, tracking_home, on='Time [s]', how='outer')

# Sort the combined DataFrame by the 'Time [s]' column
tracking_data = tracking_data.sort_values(by='Time [s]')

# Reset the index of the combined DataFrame
tracking_data.reset_index(drop=True, inplace=True)

# If you want to fill NaN values with a specific value, you can use fillna
# For example, to fill NaN values with 0:
tracking_data.fillna(0, inplace=True)

# Now, you have a single DataFrame containing tracking data for both t
tracking_data

In [None]:
# Specify the columns to be removed
columns_to_remove = ['ball_x_y', 'ball_y_y', 'Period_y']

# Use the drop method to remove the specified columns
tracking_data = tracking_data.drop(columns=columns_to_remove)

In [None]:
tracking_data

In [None]:
# read in the event data
event_data = mio.read_event_data(DATADIR,game_id)

# Bit of housekeeping: unit conversion from metric data units to meters
event_data = mio.to_metric_coordinates(events)

In [None]:
event_data

In [None]:
tracking_data

In [None]:
# Merge the datasets based on 'Start Time [s]'
merged_end = pd.merge(event_data, tracking_data, left_on='End Time [s]', right_on='Time [s]', how='left')

# Rename columns with '_x' suffix to avoid conflicts
merged_end.columns = [col if '_x' not in col else col[:-2] for col in merged_end.columns]

# Remove duplicate rows based on the 'event_data' columns
merged_end = merged_end.drop_duplicates(subset=['Team', 'Type', 'Subtype', 'Period', 'Start Frame', 'Start Time [s]', 'End Frame', 'End Time [s]', 'From', 'To', 'Start X', 'Start Y', 'End X', 'End Y'])

# Reset the index
merged_end = merged_end.reset_index(drop=True)

In [None]:
merged_end.to_csv('merged_end.csv', index=False)

In [None]:
# Merge the datasets based on 'Start Time [s]'
merged_start = pd.merge(event_data, tracking_data, left_on='Start Time [s]', right_on='Time [s]', how='left')

# Rename columns with '_x' suffix to avoid conflicts
merged_start.columns = [col if '_x' not in col else col[:-2] for col in merged_start.columns]

# Remove duplicate rows based on the 'event_data' columns
merged_start = merged_start.drop_duplicates(subset=['Team', 'Type', 'Subtype', 'Period', 'Start Frame', 'Start Time [s]', 'End Frame', 'End Time [s]', 'From', 'To', 'Start X', 'Start Y', 'End X', 'End Y'])

# Reset the index
merged_start = merged_start.reset_index(drop=True)

In [None]:
merged_start.to_csv('merged_start.csv', index=False)

In [None]:
merged_start


In [None]:
merged_end

#### Define Metrics

In [None]:
pass_start = merged_start[merged_start['Type'] == 'PASS']

In [None]:
pass_start = pd.DataFrame(pass_start)

In [None]:
type(pass_start)

In [None]:
pass_start

Calculate Number Of the apponents in the area of 5m from the ball

In [None]:
pass_start['app_num'] = 0  # Initialize the app_num column

# Iterate through each row of the DataFrame
for index, row in pass_start.iterrows():
    ho_app = 0
    aw_app = 0
    if row['Team'] == 'Home':
        # Calculate the distance to each Away player and append it to the list
        for player in range(15, 27):  # Assuming player IDs are from 15 to 26
            distance = ((row['Start X'] - row[f'Away_{player}']) ** 2 + (row['Start Y'] - row[f'Away_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                ho_app = ho_app + 1
        pass_start.at[index, 'app_num'] = ho_app
    elif row['Team'] == 'Away':
        # Calculate the distance to each Home player and append it to the list
        for player in range(1, 15):  # Assuming player IDs are from 1 to 14
            distance = ((row['Start X'] - row[f'Home_{player}']) ** 2 + (row['Start Y'] - row[f'Home_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                aw_app = aw_app + 1
        pass_start.at[index, 'app_num'] = aw_app

In [None]:
pass_start

Calculate Number Of the Team players (the side who posses the ball) in the area of 5m from the ball

In [None]:
pass_start['teammate_num'] = 0  # Initialize the teammate numbers column

# Iterate through each row of the DataFrame
for index, row in pass_start.iterrows():
    ho_teammates = 0
    aw_teammates = 0
    if row['Team'] == 'Home':
        # Calculate the distance to each Away player and append it to the list
        for player in range(1,15):  # Assuming Home player IDs are from 1 to 14
            distance = ((row['Start X'] - row[f'Home_{player}']) ** 2 + (row['Start Y'] - row[f'Home_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                ho_teammates = ho_teammates + 1
        pass_start.at[index, 'teammate_num'] = ho_teammates
    elif row['Team'] == 'Away':
        # Calculate the distance to each Home player and append it to the list
        for player in range(15, 27):  # Assuming Away player IDs are from 15 to 26
            distance = ((row['Start X'] - row[f'Away_{player}']) ** 2 + (row['Start Y'] - row[f'Away_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                aw_teammates = aw_teammates + 1
        pass_start.at[index, 'teammate_num'] = aw_teammates

# Create a new column 'teammate numberss' in the DataFrame and assign the calculated aw_app or ho_app


In [None]:
pass_start

Determine the ratio of opponents to players in possession of the ball.

In [None]:
pass_start['ratio_s'] = 0  # Initialize the teammate numbers column

# Iterate through each row of the DataFrame
for index, row in pass_start.iterrows():
   pass_start.at[index, 'ratio_s'] = row['app_num'] / row['teammate_num']

In [None]:
pass_start

Calculate the ratio this time for the merged_end dataset

In [None]:
pass_end = merged_end[merged_end['Type'] == 'PASS']
pass_end = pd.DataFrame(pass_end)

##Calculate Number Of the apponents in the area of 5m from the ball

pass_end['app_num'] = 0  # Initialize the app_num column

# Iterate through each row of the DataFrame
for index, row in pass_end.iterrows():
    ho_app = 0
    aw_app = 0
    if row['Team'] == 'Home':
        # Calculate the distance to each Away player and append it to the list
        for player in range(15, 27):  # Assuming player IDs are from 15 to 26
            distance = ((row['Start X'] - row[f'Away_{player}']) ** 2 + (row['Start Y'] - row[f'Away_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                ho_app = ho_app + 1
        pass_end.at[index, 'app_num'] = ho_app
    elif row['Team'] == 'Away':
        # Calculate the distance to each Home player and append it to the list
        for player in range(1, 15):  # Assuming player IDs are from 1 to 14
            distance = ((row['Start X'] - row[f'Home_{player}']) ** 2 + (row['Start Y'] - row[f'Home_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                aw_app = aw_app + 1
        pass_end.at[index, 'app_num'] = aw_app

##Calculate Number Of the Team players (the side who posses the ball) in the area of 5m from the ball

pass_end['teammate_num'] = 0  # Initialize the teammate numbers column
# Iterate through each row of the DataFrame
for index, row in pass_end.iterrows():
    ho_teammates = 0
    aw_teammates = 0
    if row['Team'] == 'Home':
        # Calculate the distance to each Away player and append it to the list
        for player in range(1,15):  # Assuming Home player IDs are from 1 to 14
            distance = ((row['End X'] - row[f'Home_{player}']) ** 2 + (row['End Y'] - row[f'Home_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                ho_teammates = ho_teammates + 1
        pass_end.at[index, 'teammate_num'] = ho_teammates
    elif row['Team'] == 'Away':
        # Calculate the distance to each Home player and append it to the list
        for player in range(15, 27):  # Assuming Away player IDs are from 15 to 26
            distance = ((row['End X'] - row[f'Away_{player}']) ** 2 + (row['End Y'] - row[f'Away_{player}_y']) ** 2) ** 0.5
            if distance<=5 :
                aw_teammates = aw_teammates + 1
        pass_end.at[index, 'teammate_num'] = aw_teammates

##Determine the ratio of opponents to players in possession of the ball.

pass_end['ratio_e'] = 0  # Initialize the teammate numbers column
# Iterate through each row of the DataFrame
for index, row in pass_end.iterrows():
   pass_end.at[index, 'ratio_e'] = row['app_num'] / row['teammate_num']



In [None]:
pass_end

#1 Find the difference between the two ratios >> The more the better

In [None]:
pass_start['diff_ratio'] = pass_start['ratio_s'] - pass_end['ratio_e']

In [None]:
pass_start

#2 Calculate the difference between the number of opponents and the number of teammates near the player who received the pass. >> The less the better

In [None]:
pass_end['diff_app_mate'] = pass_end['app_num'] - pass_end['teammate_num']

In [None]:
pass_end

#3 Determine the distance covered by the ball during the pass.

In [None]:
# Initialize an empty list to store 'PassVector' values, it shows both the magnitude and the direction of the ball during the pass
pass_vectors = []

# Loop through each row in the DataFrame
for index, row in pass_start.iterrows():
    if row['End X'] < row['Start X']:
        pass_vectors.append(((((row['End X'] - row['Start X'])**2 + (row['End Y'] - row['Start Y'])**2) ** 0.5) * -1))
    else:
        pass_vectors.append(((row['End X'] - row['Start X'])**2 + (row['End Y'] - row['Start Y'])**2) ** 0.5)

# Add the 'PassVector' values to the DataFrame
pass_start['PassVector'] = pass_vectors

In [None]:
pass_start

In [None]:
pass_end['End X'].unique()

In [None]:
pass_end['End Y'].unique()

#4 Determine if the Pass Reciever is in the Penalty Area of the apponent team

In [None]:
pass_end.dropna(inplace=True)

In [None]:
# Convert 'Period' column to numeric (integer) type
pass_end['Period'] = pass_end['Period'].astype(int)

# Ensure 'End X' and 'End Y' columns are in numeric format (float or int)
pass_end['End X'] = pass_end['End X'].astype(float)
pass_end['End Y'] = pass_end['End Y'].astype(float)

In [None]:
pass_end['penalty_area'] = 0  # Initialize the new column with zeros
for i in range(len(pass_end)):
    if (
        (pass_end.iloc[i, 3] == 1 and pass_end.iloc[i, 0] == 'Home' and pass_end.iloc[i, 12] <= -43.5 and -20.16 <= pass_end.iloc[i, 13] <= 20.16) or
        (pass_end.iloc[i, 3] == 1 and pass_end.iloc[i, 0] == 'Away' and pass_end.iloc[i, 12] >= 43.5 and -20.16 <= pass_end.iloc[i, 13] <= 20.16) or
        (pass_end.iloc[i, 3] == 2 and pass_end.iloc[i, 0] == 'Away' and pass_end.iloc[i, 12] <= -43.5 and -20.16 <= pass_end.iloc[i, 13] <= 20.16) or
        (pass_end.iloc[i, 3] == 2 and pass_end.iloc[i, 0] == 'Home' and pass_end.iloc[i, 12] >= 43.5 and -20.16 <= pass_end.iloc[i, 13] <= 20.16)
    ):
        pass_end.iloc[i, 74] = 1

In [None]:
pass_end[pass_end['penalty_area'] == 1]