Be sure to run the following notebook first before running this notebook:
- 1-load-and-convert-statsbomb-data.ipynb

In [1]:
%load_ext autoreload
%autoreload 2
import os;

import tqdm
import pandas as pd
import numpy as np
import socceraction.vaep.features as fs
import socceraction.xthreat as xthreat

In [2]:
## Configure file and folder names, use SPADL format.
datafolder = "../data-fifa"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")
xT_h5 = os.path.join(datafolder, "xT.h5")

In [3]:
## Select games
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "FIFA World Cup"]
print("nb of games:", len(games))

actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")
players = pd.read_hdf(spadl_h5, "players")

nb of games: 64


In [4]:
## Read in all actions of games
A = []

for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes,how="left")
        .merge(results,how="left")
        .merge(bodyparts,how="left")
        #.sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )
    [actions] = fs.play_left_to_right([actions],game.home_team_id)
        
    A.append(actions) 
    
A = pd.concat(A)

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [00:01<00:00, 38.80it/s]


In [6]:
import socceraction.xpoints as xpoints

xPModel = xpoints.ExpectedPoints()
xPModel.fit(A, games)
xPModel.xpoints

array([[0.5       , 1.        , 1.22222222, 2.10526316, 3.        ],
       [0.5       , 0.85714286, 1.24390244, 2.        , 3.        ],
       [0.6       , 0.83333333, 1.24285714, 1.875     , 2.8       ],
       [0.81818182, 0.25925926, 1.26923077, 2.18518519, 2.54545455],
       [0.47368421, 0.20833333, 1.30952381, 2.08333333, 2.63157895],
       [0.3       , 0.3       , 1.25      , 2.1       , 2.8       ],
       [0.3       , 0.32258065, 1.23076923, 2.09677419, 2.8       ],
       [0.3       , 0.32258065, 1.19230769, 2.12903226, 2.8       ]])

In [7]:
## Train model
xTModel = xthreat.ExpectedThreat(l=16, w=12)
xTModel.fit(A)

# iterations:  45


<socceraction.xthreat.ExpectedThreat at 0x1df0fae7108>

In [None]:
## Predict

# xT should only be used to value actions that move the ball 
# and also keep the current team in possession of the ball

mov_actions = xthreat.get_successful_move_actions(A)

# xP (expected points) if a team would score at 
# the current score and time of the action
diff = xpoints.get_diff(mov_actions, A)
xP = xPModel.predict(mov_actions, diff, diff+1)

mov_actions["xT_value"] = xTModel.predict(mov_actions, xP = xP)

players_xT = (
    mov_actions.groupby('player_id') 
    .agg({'xT_value': 'sum'}) 
    .sort_values("xT_value", ascending=False) 
    .reset_index()
)
    
players_xT.merge(players, how="left")[["player_name", "xT_value"]][:10]

 20%|████████████████▋                                                                 | 13/64 [01:10<04:23,  5.17s/it]

In [229]:
np.unique(xP, return_counts=True)

(array([-0.55892256, -0.26535088,  0.        ,  0.02258065,  0.22222222,
         0.23333333,  0.35714286,  0.36026936,  0.38675958,  0.40952381,
         0.5       ,  0.54824561,  0.63214286,  0.67096774,  0.7       ,
         0.70322581,  0.75609756,  0.77380952,  0.85      ,  0.86600496,
         0.86972705,  0.88304094,  0.89473684,  0.90818859,  0.91595442,
         0.925     ,  0.93672457,  0.95      ,  1.        ,  1.00997151,
         1.10119048]),
 array([ 647,  597, 8483,  107, 2860,  833,  658, 2216, 3747, 3205,  538,
        2118, 9293,   33, 2035,  149, 8295, 8535, 9967,  760,  111, 9662,
        2240,  161, 8448, 2686,  695, 2855, 3097, 2909, 2445], dtype=int64))