In [None]:
#| default_exp  Features.pressurelines

In [None]:
%load_ext autoreload
%autoreload 2
from IPython.core.debugger import set_trace

# Dynamic Pressure Lines Feature

In this module, we compute the pressure lines feature as described in <cite id="g6ews"><a href="#zotero%7C7448072%2FYJQRGD6I">(Fernández, Bornn, and Cervone 2021)</a></cite>. 

In [None]:
#| export

import collections
import os
import re

import numpy as np
import pandas as pd
from fastcore.foundation import L
from sklearn.cluster import KMeans

As usual, we start by reading some data from local disk. We will pick a particular event from the `tracking/event` mapping and select the associated frame:

In [None]:
tracking_event_path = "../data/tracking_event_mapping.csv"
tracking_path = "../data/tracking_df.csv"
lineup_path = "../data/lineup.csv"

tracking_event_mapping = pd.read_csv(tracking_event_path, low_memory=False)
tracking_df = pd.read_csv(tracking_path, low_memory=False).set_index("frameId")

## pick the frame-id associated with a randomly picked event
frame_id = tracking_event_mapping.sample(n=1).frameId.values[0]

## select the frame
frame = tracking_df.loc[frame_id]
lineup = pd.read_csv(lineup_path)

## `PressureLines` class

The `PressureLine` class takes care of preparing the required inputs to pass to the clustering algorithm, collect the algorithm results and present it in a user friendly way:

In [None]:
#| export


class PressureLines:
    def __init__(self, frame, events_to_frame, lineup, clustering_algo, *args, **kwars):
        self.frame, self.lineup, self.events_to_frame = frame, lineup, events_to_frame
        self.algo = clustering_algo(*args, **kwars)
        self.gk_ids = lineup[lineup.position == "GK"].playerId.tolist()
        self.frame_id = frame.name
        self.possession_team_id = events_to_frame.loc[
            events_to_frame.frameId == self.frame_id, "teamId"
        ].values[0]
        self.possession_side = self.lineup.loc[
            self.lineup.teamId == self.possession_team_id, "side"
        ].values[0]

    def _prepare_inputs(self):
        "Prepare necessary inputs to pass to the clustering algorithm"
        _player_id_cols = self.frame.filter(like="_player_id").index.tolist()
        linputs = L(
                {
                    "playerId": self.frame[pid_col],
                    "playerStr": pid_col.removesuffix("_player_id"),
                    "jerseyNumber": int(
                        pid_col.removesuffix("_player_id")
                        .removeprefix("away_player_")
                        .removeprefix("home_player_")
                    ),
                    "side": "home" if bool(re.match("home", pid_col)) else "away",
                    "x": self.frame[pid_col.removesuffix("_player_id") + "_x"],
                    "y": self.frame[pid_col.removesuffix("_player_id") + "_y"],
                }
                for pid_col in _player_id_cols
            )
        
        linputs.append( {
                    "playerId": "ball",
                    "playerStr": "ball",
                    "jerseyNumber": 0,
                    "side": "ball",
                    "x": self.frame["ball_x"],
                    "y": self.frame["ball_y"],
                })
        
        self.inputs = pd.DataFrame(linputs).dropna(axis=0)

    def fit():
        "Run the clustering algorithm and prepare the output"
        pass
    
    def plot():
        "Plot the resulting clusters on a football pitch"
        pass

In [None]:
press = PressureLines(frame, tracking_event_mapping, lineup, KMeans, n_clusters=3)
press._prepare_inputs()

In [None]:
press.inputs

Unnamed: 0,playerId,playerStr,jerseyNumber,side,x,y
0,3536a7999c8445b5e976a97ac29ddaeb67f8617ff3b8d7...,away_player_11,11,away,28.84,32.945507
1,959d1756675b93dfda464e3c7c5edf58f038f0608006b4...,away_player_12,12,away,34.58,27.249275
2,957e96878335293e1b2d97a906ccf182c725390214158f...,away_player_15,15,away,55.4,25.031884
3,6789255fa015dd9f081e05d224552ca72e61afd17309ed...,away_player_19,19,away,30.39,12.88058
4,d8db35953e75ba6c25c946c1fadea2dca90114fccbe767...,away_player_1,1,away,82.49,32.906087
6,fb80b18384aa51fa727536f3f44101528774e988c3aee3...,away_player_24,24,away,37.85,55.661449
8,d852020e305114abbdef2c6680626fd9743629a930f194...,away_player_29,29,away,31.62,52.074203
9,4fdd6d44f27fccd6926b99b66b96ac31acb58d78aa3c5a...,away_player_3,3,away,44.79,9.608696
10,27a873ab8064d003025d1809775a404ea6d66f3ce24ea8...,away_player_4,4,away,54.87,43.037101
11,a093f6b55254780919cddeec4b607b4b72b97497f7acfa...,away_player_5,5,away,46.23,27.751884


In [None]:
#| hide
from nbdev import nbdev_export
nbdev_export()