In [None]:
#| default_exp  pitch_control

In [None]:
%load_ext autoreload
%autoreload 2
from IPython.core.debugger import set_trace

<center text-align=\"center;center\"><h1>Pitch Control</h1></center>

In this module, we will compute the pitch control feature as described in <a href=https://www.researchgate.net/publication/327139841_Beyond_Expected_Goals.>Spearman's paper</a>.

We will recycle some code from <a href=https://github.com/Friends-of-Tracking-Data-FoTD/LaurieOnTracking/blob/master/Tutorial3_PitchControl.py>Laurie Tutorial 3</a> to produce more optimized code. You can also check [this tutorial](https://www.youtube.com/watch?v=5X1cSehLg6s&feature=youtu.be&ab_channel=FriendsofTracking) for more information.

In [None]:
#| export

from pathlib import Path
from typing import Any, Callable, Optional, Tuple

import numpy as np
import pandas as pd
from fastcore.foundation import L

In [None]:
import sys
np.set_printoptions(threshold=sys.maxsize)
np.set_printoptions(formatter={'float_kind':'{:f}'.format})

As usual, we start by reading some data from local disk. We will pick a particular event from the `tracking/event` mapping and select the associated frame:

In [None]:
data_path = Path("../data")

tracking_event_mapping = pd.read_csv(data_path / "tracking_event_mapping.csv", low_memory=False)
tracking_df = pd.read_csv(data_path / "tracking_vel_df.csv", low_memory=False).set_index("frameId")

## pick the frame-id associated with a randomly picked event
frame_id = tracking_event_mapping.sample(n=1).frameId.values[0]

## select the frame
frame = tracking_df.loc[frame_id]
lineup = pd.read_csv(data_path / "lineup.csv")

In [None]:
frame = tracking_df.loc[135036]

## Prepare the inputs

In [None]:
class inputs_:
    def __init__(self, frame, events_to_frame, lineup):
        self.frame, self.lineup, self.events_to_frame = frame, lineup, events_to_frame
        self.frame_id = frame.name
        self.gk_ids = lineup[lineup.position == "GK"].playerId.tolist()
        self.possession_team_id = events_to_frame.loc[
            events_to_frame.frameId == self.frame_id, "teamId"
        ].values[0]
        self.possession_side = self.lineup.loc[
            self.lineup.teamId == self.possession_team_id, "side"
        ].values[0]
        self.opponent_side = "away" if bool(re.match("home",self.possession_side)) else "home"
        self.ball = [self.frame["ball_x"],self.frame["ball_y"]]
    def _prepare_inputs(self):
        _player_id_cols = self.frame.filter(like="_player_id").index.tolist()
        linputs = L(
                {
                    "playerId": self.frame[pid_col],
                    "playerStr": pid_col.removesuffix("_player_id"),
                    "jerseyNumber": int(
                        pid_col.removesuffix("_player_id")
                        .removeprefix("away_player_")
                        .removeprefix("home_player_")
                    ),
                    "side": "home" if bool(re.match("home", pid_col)) else "away",
                    "x": self.frame[pid_col.removesuffix("_player_id") + "_x"],
                    "y": self.frame[pid_col.removesuffix("_player_id") + "_y"],
                    "vx": self.frame[pid_col.removesuffix("_player_id") + "_vx"],
                    "vy": self.frame[pid_col.removesuffix("_player_id") + "_vy"]
                }
                for pid_col in _player_id_cols
            )
        
        
        self.inputs = pd.DataFrame(linputs).dropna(axis=0)
        self.att = self.inputs[self.inputs["side"] == self.possession_side] 
        self.defe = self.inputs[self.inputs["side"] == self.opponent_side]
        
        self.att_gk_id = self.gk_ids[0] if self.gk_ids[0] in self.att.playerId else self.gk_ids[1]
        self.def_gk_id = self.gk_ids[0] if self.gk_ids[0] in self.defe.playerId else self.gk_ids[1]
        
        #self.gk_def_idx = pd.Index(self.defe["playerId"].values.tolist()).get_loc(self.def_gk_id)
        
        self.att_tracking= self.att[["x","y","vx","vy"]].to_numpy()
        self.def_tracking= self.defe[["x","y","vx","vy"]].to_numpy()
        


In [None]:
inputs = inputs_(frame, tracking_event_mapping, lineup)
inputs._prepare_inputs()
att_tracking = inputs.att_tracking
def_tracking = inputs.def_tracking

## Model paramters

We define the model parametrs as described down bellow

In [None]:
def default_model_params(time_to_control_veto=3):
    ''' key parameters for the model, as described in Spearman 2018'''
    params = {}
    # model parameters
    params['max_player_accel'] = 7. # maximum player acceleration m/s/s, not used in this implementation
    params['max_player_speed'] = 5. # maximum player speed m/s
    params['reaction_time'] = 0.7 # seconds, time taken for player to react and change trajectory. Roughly determined as vmax/amax
    params['tti_sigma'] = 0.45 # Standard deviation of sigmoid function in Spearman 2018 ('s') that determines uncertainty in player arrival time
    params['kappa_def'] =  1. # kappa parameter in Spearman 2018 (=1.72 in the paper) that gives the advantage defending players to control ball, I have set to 1 so that home & away players have same ball control probability
    params['lambda_att'] = 4.3 # ball control parameter for attacking team
    params['lambda_def'] = 4.3 * params['kappa_def'] # ball control parameter for defending team
    params['lambda_gk'] = params['lambda_def']*3.0 # make goal keepers must quicker to control ball (because they can catch it)
    params['average_ball_speed'] = 15. # average ball travel speed in m/s
    # numerical parameters for model evaluation
    params['int_dt'] = 0.04 # integration timestep (dt)
    params['max_int_time'] = 10 # upper limit on integral time
    params['model_converge_tol'] = 0.01 # assume convergence when PPCF>0.99 at a given location.
    # The following are 'short-cut' parameters. We do not need to calculated PPCF explicitly when a player has a sufficient head start. 
    # A sufficient head start is when the a player arrives at the target location at least 'time_to_control' seconds before the next player
    params['time_to_control_att'] = time_to_control_veto*np.log(10) * (np.sqrt(3)*params['tti_sigma']/np.pi + 1/params['lambda_att'])
    
    return params

In [None]:
params = default_model_params()

this function is for creating the integration time array for each target position.

In [None]:
def arange(ball_travel_time,step=params['int_dt'],end=params['max_int_time']):
    dT_array = np.arange(ball_travel_time-step,ball_travel_time+end,step) 
    return dT_array

We initialize this for test purposes.

In [None]:
# array of  target position 'shape 7140,2'
vector_target = np.array([[i, j] for i in range(105) for j in range(68)])

#array of ball_travel_time for all targets 'shape 7140,1'
ball_travel_time_arr = np.linalg.norm( vector_target - inputs.ball ,axis = 1)/params['average_ball_speed']


dT_array = np.array(list(map(arange,ball_travel_time_arr)),dtype=object)
min_col_dtarray = min(i.shape for i in dT_array)[0]
dT_array = np.array([np.resize(array,(min_col_dtarray)) for array in dT_array])

## Simple time to intercept
First we have to compute the `reaction location` after the player continues moving from current position at current velocity for `reaction_time` seconds and then we compute the `time to intercept` after the player runs at `full speed` to the `target position`.

### Reaction location
$\vec{r} = \vec{r}_{j}(t) + \vec{v}_{j}(t) rt. $

$ \vec{r}_{j}$ = Starting position of the player,  $ \vec{v}_{j}$ = Current velocity of the player,  $ rt$  = reaction time in seconds.

### Time to intercept
${t}_{exp}(t , \vec{r}) = {t}_{rea} +  \frac {\sqrt{({t})^2 + (\vec{r})^2}}{s} .$

$t$ = Target location , $\vec{r}$ = Reaction location , $s$ = Max speed player.

In [None]:
def reaction_location(team,params):
    return team[:,[0,1]] + team[:,[2,3]] * params['reaction_time']

we compute the reaction location for all players. the final shape is `( 11, 2)`

In [None]:
r_att = reaction_location(att_tracking,params)
r_def = reaction_location(def_tracking,params)

In [None]:
r_att

array([[53.430000, 52.842899],
       [58.810000, 41.755942],
       [90.220000, 36.926957],
       [59.600000, 22.351304],
       [66.410000, 51.039420],
       [69.800000, 56.873623],
       [68.940000, 37.439420],
       [57.780000, 47.087536],
       [39.760000, 24.381449],
       [55.190000, 46.713043],
       [39.290000, 42.731594]])

In [None]:
def simple_time_to_intercept(vector_target,r,params):
    
    sti_array = np.empty([len(vector_target),0])
    for player_reaction_time in r:
        t = params['reaction_time'] + np.linalg.norm( vector_target - player_reaction_time ,axis = 1) /params['max_player_speed']
        sti_array = np.c_[sti_array, t]
    
    return sti_array

we compute the time to intercept for each player in team and each target position. the final shape is  `( 7140, 11 ) `

In [None]:
att_sti_array = simple_time_to_intercept(vector_target,r_att,params)
def_sti_array = simple_time_to_intercept(vector_target,r_def,params)

time to intercept for the target position`[0,0]` :

In [None]:
def_sti_array[0]

array([19.196665, 12.638986, 16.549182, 8.481077, 10.075708, 15.154069,
       15.338226, 10.781220, 12.805268, 13.217456, 16.210209])

We initialize this for test purposes.

In [None]:
tau_min_att_ = np.nanmin(att_sti_array,axis=1)[np.newaxis].T
tau_min_def_ = np.nanmin(def_sti_array,axis=1)[np.newaxis].T

## probability intercept ball
The probability that a player will be able to intercept the ball at a given location on the pitch within
some time, T.

$ {f}_{j}(t,\vec{r},T|s)= [1 + {e^{-{\pi \frac{T-{t}_{exp}(t , \vec{r})}{\sqrt 3 s}}} } ]^{-1}$

$ {t}_{exp}(t , \vec{r}) $ = time to intercept, $\vec{r}$ = reaction location, $ {ùëá|ùë†} $ = time in seconds.

In [None]:
def probability_intercept_ball(simple_time_to_intercept_array,dt_array,params=params):

    return 1/(1. + np.exp( -np.pi/np.sqrt(3.0)/params["tti_sigma"] * (dt_array[:,:, np.newaxis] - simple_time_to_intercept_array[:, np.newaxis] ) ) )

we compute the probability to intercept the ball for each player and each dt and each target. The final shape is `(7140 , 251, 11)`

In [None]:
p_att = probability_intercept_ball(att_sti_array,dT_array,params=params)

the probability to intecept the ball for attacking players at first target `[0,0]` for the first dt.

In [None]:
p_att[0][0] 

array([0.000000, 0.000000, 0.000000, 0.000000, 0.000000, 0.000000,
       0.000000, 0.000000, 0.000000, 0.000000, 0.000000])

‚ö†Ô∏è all calculations above are approved.

## Calculate pitch control
Calculates the pitch control probability for the attacking and defending teams at a specified target position on the ball.

In [None]:
def solve_eq(att_sti,def_sti,ball_travel_time,tau_min_att,tau_min_def,params):
    # solve pitch control model by integrating equation 3 in Spearman et al.
    
    # first replace any player that is far (in time) from the target location with zero shape 7140,11
    att_sti = np.where( att_sti - tau_min_att < params['time_to_control_def'] , att_sti ,0.0)
    def_sti = np.where( def_sti - tau_min_def < params['time_to_control_att'] , def_sti ,0.0)
    
    
    # set up integration array for all targets
    dT_array = np.array(list(map(arange,ball_travel_time_arr)),dtype=object)
    min_col_dtarray = min(i.shape for i in dT_array)[0]
    
    #resize dT_array to same shape for all targets, shape 7140,251
    dT_array = np.array([np.resize(array,(min_col_dtarray)) for array in dT_array])
    
    #compute prob to intecept ball * lambda for all targets, shape 7140,251,11
    p_att_lambda = probability_intercept_ball(att_sti,dT_array) * params['lambda_att']
    p_def_lambda = probability_intercept_ball(def_sti,dT_array) * params['lambda_def']
    
    #set up arrays, shape 7140,251
    PPCFatt = np.zeros((len(dT_array),min_col_dtarray))
    PPCFdef = np.zeros((len(dT_array),min_col_dtarray))
    
    
    #set up arrays for ppcf of each player, shape 7140,11
    PPCFatt_players = np.zeros((len(dT_array),att_sti_array.shape[1]))
    PPCFdef_players = np.zeros((len(dT_array),def_sti_array.shape[1]))
    
    i = 1
    while  i<dT_array[0].size:
        dPPCFdT_att = (1-PPCFatt[:,i-1]-PPCFdef[:,i-1])[:, np.newaxis, np.newaxis]*p_att_lambda*params['int_dt']
        PPCFatt_players += dPPCFdT_att[:,i,:]
        PPCFatt[:,i] = np.sum(PPCFatt_players,axis=1)
        
        dPPCFdT_def = (1-PPCFatt[:,i-1]-PPCFdef[:,i-1])[:, np.newaxis, np.newaxis]*p_def_lambda*params['int_dt']
        PPCFdef_players += dPPCFdT_def[:,i,:]
        PPCFdef[:,i] = np.sum(PPCFdef_players,axis=1)
        i+=1

    return att_sti,def_sti,PPCFatt,PPCFdef

In [None]:
att_sti,def_sti,PPCFatt,PPCFdef = solve_eq(att_sti_array,def_sti_array,ball_travel_time_arr,tau_min_att_,tau_min_def_,params)

‚ö†Ô∏è the false calculation of the prob pitch control is due to replacing the removed players with zeros. After using the `probability_intercept_ball` these values are no longer zeros.

In [None]:
def calculate_pitch_control(target_vector,r_att,r_def, ball_start_pos, params):
        
    
    #compute time to intercept for both teams, shape 7140,11
    att_sti_arr = simple_time_to_intercept(target_vector,r_att,params)
    def_sti_arr = simple_time_to_intercept(target_vector,r_def,params)
    
    # ball travel time is distance to target position from current ball position divided assumed average ball speed, shape 7140,1
    ball_travel_time_arr = np.linalg.norm( target_vector - ball_start_pos ,axis = 1)/params['average_ball_speed']
    
    # first get arrival time of 'nearest' attacking player (nearest also dependent on current velocity), shape 7140,1
    tau_min_att_arr = np.nanmin(att_sti_arr,axis=1)[np.newaxis].T
    tau_min_def_arr = np.nanmin(def_sti_arr,axis=1)[np.newaxis].T
    
    #initialize array to store ppcatt & ppcdef, shape 7140,2
    ppcf_array = np.empty([0,2])
    
    # if defending team can arrive significantly before attacking team, no need to solve pitch control model
    ppcf_array = np.where(tau_min_att-(np.nanmax([ball_travel_time,tau_min_def],axis=0)) >= params['time_to_control_def'],    [0.,1.],[0.,0.])
    
    # if attacking team can arrive significantly before defending team, no need to solve pitch control model
    ppcf_array = np.where(tau_min_def-(np.nanmax([ball_travel_time,tau_min_att],axis=0)) >= params['time_to_control_def'],    [1.,0.],ppcf_array)
    
    # else we need to solve equation 3
    ppcf_array = np.where(ppcf_array == [0.,0.],solve_eq(att_sti,def_sti,ball_travel_time,tau_min_att,tau_min_def,params),ppcf_array)
    
    return ppcf_array
    

## generate pitch control

In [None]:
def generate_pitch_control_for_event(inputs, params, field_dimen = (105,68,)):
    
    
    
    # get the details of the event (frame, team in possession, ball_start_position)
    attacking_players = inputs.att_tracking
    defending_players = inputs.def_tracking
    
    r_att = reaction_location(attacking_players,params)
    r_def = reaction_location(defending_players,params)
    
    ball_start_pos = np.array(inputs.ball)
    # break the pitch down into a grid
    n_grid_cells_x = field_dimen[0]
    n_grid_cells_y = field_dimen[1]
    target_vector = np.array([[i, j] for i in range(n_grid_cells_x) for j in range(n_grid_cells_y)])
        
    
    # calculate pitch pitch control model at each location on the pitch
    
    ppcf_array = calculate_pitch_control(target_vector,r_att,r_def, ball_start_pos, params)
    
    
    return ppcf_array

In [None]:
'''generate_pitch_control_for_event(inputs, params)'''

In [None]:
#| hide

import nbdev

nbdev.nbdev_export()