In [1]:
#This notebook will run an example on howto use xthreat from socceraction module.
#The source code for xthreat is here:
#https://github.com/ML-KULeuven/socceraction/blob/master/socceraction/xthreat.py

# A very good resource for our efforts to repliocate the model using hockey data

#again, just stealing code from: https://github.com/ML-KULeuven/socceraction/blob/master/public-notebooks/EXTRA-run-xT.ipynb

# easy to install socceraction module using pip install socceraction --user
# make sure to upgrade pandas to the latest version or else code won't work
# pip install --upgrade pandas

#libraries
import os
import tqdm
import pandas as pd
import numpy as np
import socceraction.vaep.features as fs
import socceraction.xthreat as xthreat

In [9]:
## Configure file and folder names, use SPADL format.
datafolder = "data-fifa"
spadl_h5 = os.path.join(datafolder, "spadl-statsbomb.h5")
xT_h5 = os.path.join(datafolder, "xT.h5")

## Select games
games = pd.read_hdf(spadl_h5, "games")
games = games[games.competition_id == 43]
print("nb of games:", len(games))

actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")

## Read in all actions of games
A = []

for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5, f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes, how="left")
        .merge(results, how="left")
        .merge(bodyparts, how="left")
        #.sort_values(["period_id", "time_seconds"])
        .reset_index(drop=True)
    )
    [actions] = fs.play_left_to_right([actions], game.home_team_id)
        
    A.append(actions) 
    
A = pd.concat(A)

  6%|█████▏                                                                             | 4/64 [00:00<00:01, 35.17it/s]

nb of games: 64


100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [00:01<00:00, 38.72it/s]


In [12]:
#alright so this is how the data is supposed to look like.
# Seems like we need to prepare the data by adding the result of the play in terms of location
# which is basically just lookng at coordinates of the next event (except when it fails)
# so it will require some programming to figure out the correct end_x and end_y
# we have binary variables for fail/success which we can also calculate
A.head(10)

Unnamed: 0,game_id,original_event_id,period_id,time_seconds,team_id,player_id,start_x,start_y,end_x,end_y,type_id,result_id,bodypart_id,action_id,type_name,result_name,bodypart_name
0,7581,ec5ba260-9bd3-4c5a-b7a5-9f9168ea905d,1,1.0,776,3043,52.941176,33.56962,31.764706,37.012658,0,1,0,0,pass,success,foot
1,7581,abb799d1-73d8-4119-b23f-40f79765f8f8,1,2.0,776,3027,31.764706,37.012658,32.647059,32.708861,21,1,0,1,dribble,success,foot
2,7581,9d3fc438-e85d-49fa-8ff4-21cb3bcc54b1,1,4.0,776,3027,32.647059,32.708861,84.705882,19.797468,0,0,0,2,pass,fail,foot
3,7581,5be94fcb-9612-4f00-82b6-3728f4f1b45c,1,7.0,785,5468,20.294118,48.202532,41.470588,40.455696,0,0,1,3,pass,fail,head
4,7581,0370171b-81a7-49e6-809c-3ade48258b5e,1,9.0,776,5527,63.529412,27.544304,59.117647,27.544304,0,1,1,4,pass,success,head
5,7581,,1,10.0,776,3959,59.117647,27.544304,65.294118,25.822785,21,1,0,5,dribble,success,foot
6,7581,6a5b1b10-3e93-4621-be9f-05820c0c31bb,1,11.0,776,3959,65.294118,25.822785,71.470588,23.240506,0,0,0,6,pass,fail,foot
7,7581,5325dad9-df7b-40e1-bc61-e67e5860fe7c,1,12.0,785,5469,33.529412,44.759494,45.882353,42.177215,0,0,1,7,pass,fail,head
8,7581,dc59bcaf-27f6-493f-9b01-312461b14d40,1,14.0,776,3959,59.117647,25.822785,67.058824,24.962025,0,1,0,8,pass,success,foot
9,7581,,1,14.5,776,3043,67.058824,24.962025,67.058824,21.518987,21,1,0,9,dribble,success,foot


In [28]:
# This is when they run the xT function. again we can find it here:
# https://github.com/ML-KULeuven/socceraction/blob/master/socceraction/xthreat.py
## Train model
xTModel = xthreat.ExpectedThreat(l=16, w=12) # seems like we can specify the number of areas 
# which is cool
xTModel.fit(A);

#the model uses 45 iterations (and runs surprisingly fast)

# iterations:  45


In [50]:
# Here's where the guy predicts xT and makes a cool table. Very interpretable

## Predict

# xT should only be used to value actions that move the ball 
# and also keep the current team in possession of the ball
mov_actions = xthreat.get_successful_move_actions(A)
mov_actions["xT_value"] = xTModel.predict(mov_actions)

print(
'"xT should only be used to value actions that move the ball \nand also keep the current team in possession of the ball"'
)
print('\nThis statement is exactly my reservation with xT: \nit can only quantify for a series of succesful plays in a row by the same team')

print("\nHowever, we can present it as an extension of XGoals. Should be easy to sell")

mov_actions[
    ["result_name","team_id","type_name", "start_x", "start_y", "end_x", "end_y", "xT_value"]
][:10]


"xT should only be used to value actions that move the ball 
and also keep the current team in possession of the ball"

This statement is exactly my reservation with xT: 
it can only quantify for a series of succesful plays in a row by the same team

However, we can present it as an extension of XGoals. Should be easy to sell


Unnamed: 0,result_name,team_id,type_name,start_x,start_y,end_x,end_y,xT_value
0,success,776,pass,52.941176,33.56962,31.764706,37.012658,-0.00384
1,success,776,dribble,31.764706,37.012658,32.647059,32.708861,-7e-05
4,success,776,pass,63.529412,27.544304,59.117647,27.544304,0.0
5,success,776,dribble,59.117647,27.544304,65.294118,25.822785,0.0
8,success,776,pass,59.117647,25.822785,67.058824,24.962025,0.00158
9,success,776,dribble,67.058824,24.962025,67.058824,21.518987,-0.000517
12,success,776,pass,68.823529,14.632911,77.647059,27.544304,0.003146
13,success,776,dribble,77.647059,27.544304,76.764706,22.379747,-0.000317
17,success,776,dribble,98.823529,34.43038,99.705882,36.151899,0.0
18,success,776,pass,99.705882,36.151899,99.705882,40.455696,-0.102173
