In [None]:
#| default_exp  Feature.pressurelines

# Dynamic Pressure Lines Feature

In this module, we compute the pressure lines feature as mentioned in the paper [A framework for the fine-grained evaluation of the instantaneous expected value of soccer possessions](https://link.springer.com/article/10.1007/s10994-021-05989-6)

In [None]:
#| export 

import pandas as pd
import numpy as np
import collections
import math 
from sklearn.cluster import KMeans

In [None]:
tracking_path = "/home/islam/Downloads/data/skillCorner_tracking_df.csv"
events_map_path = "/home/islam/Downloads/data/skillCorner_opta_mapping.csv"

In [None]:
tracking_df = pd.read_csv(tracking_path,low_memory=False)
map_events_df = pd.read_csv(events_map_path)

# Cluster

To compute this feature, we use KMeans clustering. for each frame, we compute the pressure lines of the possesion team 

To compute that, we identify for each frame :
+ ball coordinates (`x`,`y`)
+ possession side (`home` or `away`)
+ `cluster input` : players coordinates of the possession side

After that, we can apply the cluster and sort it by following these steps:
+ compute centroid for each cluster
+ compute distance between each centroid and ball 

In [None]:
#| export 

def _cluster(frame_df, n_clusters=3):

    #prepare the cluster output.
    def _input(frame_df, jerseyn_goal="1"):
        opponent_team = frame_df.iloc[0]["possessionSide"]
        cluster_input = frame_df.loc[:,
                                     frame_df.columns.str.startswith(
                                         (opponent_team))]
        cluster_input = cluster_input.loc[:,
                                          cluster_input.columns.str.endswith(
                                              ("_x", "_y"))]
        cluster_input = cluster_input.drop([
            opponent_team + "_player_" + jerseyn_goal + "_x",
            opponent_team + "_player_" + jerseyn_goal + "_y"
        ],
                                           axis=1)
        cluster_input = cluster_input.dropna(axis=1, how='all')

        #find players cols
        player_col = [
            item.replace("x", "player_id")
            for item in list(cluster_input.loc[:,
                                               cluster_input.columns.str.
                                               endswith('_x')].columns.values)
        ]
        player_ids = [frame_df.iloc[0][col] for col in player_col]

        cluster_input = np.reshape(cluster_input.values, (-1, 2))

        return cluster_input, player_ids

    #calculate the centroid of a cluster.
    def centeroid(coord: list) -> float:
        x, y = zip(*coord)
        l = len(x)
        return sum(x) / l, sum(y) / l

    #calculate distance between centroid & ball.
    def ball_dist(centroid_coord: list, ball_coord: list) -> float:
        return math.sqrt((ball_coord[0] - centroid_coord[0])**2 +
                         (ball_coord[1] - centroid_coord[1])**2)

    #sort the clusters according to the centroid closest to the ball.
    def sort_cluster(cluster_output, players_id):
        
        clt_pl = collections.defaultdict(list)
        clt_coord = collections.defaultdict(list)

        for x, y in zip(cluster_output, players_id):
            clt_pl[x].append(y)
        for x, y in zip(cluster_output, cluster_input):
            clt_coord[x].append(y)

        #calculate centeroid for each cluster
        clt_coord = dict(
            map(lambda coord: (coord[0], centeroid(coord[1])),
                clt_coord.items()))
        #calculate distance between centeroid & ball
        clt_coord = dict(
            map(
                lambda coord: (coord[
                    0], ball_dist(coord[1], [ball_coord[0], ball_coord[1]])),
                clt_coord.items()))
        #sort dict
        clt_coord = dict(sorted(clt_coord.items(), key=lambda item: item[1]))

        sorted_keys = list(clt_coord.keys())

        return [(",".join(clt_pl.get(sorted_keys[cluster_num])))
                for cluster_num in sorted_keys]

    #apply cluster on input
    def cluster_output(dataset, players_id, Vpl=True):
        X = np.array(dataset, copy=True)
        #vertical pressure lines
        if Vpl:
            X[:, 1] = 0
        #horizontal pressure lines
        else:
            X[:, 0] = 0

        km = KMeans(n_clusters)
        y_means = km.fit_predict(X)

        return sort_cluster(y_means, players_id)

    ball_coord = [frame_df.iloc[0]["ball_x"], frame_df.iloc[0]["ball_y"]]

    cluster_input, players_id = _input(frame_df)

    vpl_cluster = cluster_output(cluster_input, players_id)
    hpl_cluster = cluster_output(cluster_input, players_id, Vpl=False)

    return vpl_cluster, hpl_cluster

In [None]:
#| export 

def Pressureline_feature(tracking: pd.DataFrame,
                         mapevents: pd.DataFrame) -> pd.DataFrame:

    df = pd.DataFrame()

    frame_ids = mapevents["frameId"].drop_duplicates().tolist()
    tracking_df = tracking[tracking['frameId'].isin(frame_ids)]

    for _, row in tracking_df.iterrows():
        frame_id = row["frameId"]
        frame_df = tracking[(tracking["frameId"] == frame_id)]
        if frame_df.iloc[0]["possessionSide"] != "unknown":
            vpl, hpl = _cluster(frame_df)

            row = dict(frameId=frame_id,
                       vPressureline1=vpl[0],
                       vPressureline2=vpl[1],
                       vPressureline3=vpl[2],
                       hPressureline1=hpl[0],
                       hPressureline2=hpl[1],
                       hPressureline3=hpl[2])
            df = pd.concat([df, pd.DataFrame([row])], ignore_index=True)

    return df

In [None]:
pl_feature = Pressureline_feature(tracking_df,map_events_df)
pl_feature

Unnamed: 0,frameId,vPressureline1,vPressureline2,vPressureline3,hPressureline1,hPressureline2,hPressureline3
0,741,aa96464d2088f91d0f9fe2e78e6d609854dc28860f10cc...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...,9f43f887be46814fd9e7190b0296432ddd2c83ea3ac1aa...
1,757,aa96464d2088f91d0f9fe2e78e6d609854dc28860f10cc...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,9f43f887be46814fd9e7190b0296432ddd2c83ea3ac1aa...
2,794,a295a138484cecf22212ad9f65be919fd385667e19fc88...,433ac3bb16333f5538cae2c0874dfa5042e62951b43237...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...
3,795,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,433ac3bb16333f5538cae2c0874dfa5042e62951b43237...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...
4,796,433ac3bb16333f5538cae2c0874dfa5042e62951b43237...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...
...,...,...,...,...,...,...,...
1475,68400,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,aa96464d2088f91d0f9fe2e78e6d609854dc28860f10cc...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...
1476,68401,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,aa96464d2088f91d0f9fe2e78e6d609854dc28860f10cc...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...
1477,68411,aa96464d2088f91d0f9fe2e78e6d609854dc28860f10cc...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...
1478,68412,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...,aa96464d2088f91d0f9fe2e78e6d609854dc28860f10cc...,04583a17d5b509c171892bcbf4d3a5b9ed2dfa519c06b4...,eed08a1e908beb8c77510b8d8ee0dab681d611fe5a7716...,a295a138484cecf22212ad9f65be919fd385667e19fc88...
