Be sure to run the following notebook first before running this notebook:
- 1-load-and-convert-statsbomb-data.ipynb

In [1]:
%load_ext autoreload
%autoreload 2
import os;

import tqdm
import pandas as pd
import numpy as np
import socceraction.vaep.features as fs
import socceraction.xthreat as xthreat

In [2]:
## Configure file and folder names, use SPADL format.
datafolder = "../data-fifa"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")

In [3]:
## Select games
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "FIFA World Cup"]
print("nb of games:", len(games))

actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")
players = pd.read_hdf(spadl_h5, "players")
players["player_name"] = players[["player_nickname","player_name"]].apply(lambda x: x[0] if x[0] else x[1],axis=1)

nb of games: 64


In [4]:
## Read in all actions of games
A = []

for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes,how="left")
        .merge(results,how="left")
        .merge(bodyparts,how="left")
        #.sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )
    [actions] = fs.play_left_to_right([actions],game.home_team_id)
        
    A.append(actions) 
    
A = pd.concat(A).reset_index(drop = True)

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [00:01<00:00, 35.55it/s]


In [44]:
import socceraction.xpoints as xpoints

xpoints.ScoreProgression(A, games).score_progression

Unnamed: 0,game_id,home_team_id,away_team_id,period_id,time_seconds,home_score,away_score
0,7525,796,799,1,0.0,0,0
1,7525,796,799,1,692.0,1,0
2,7525,796,799,1,2531.0,2,0
3,7525,796,799,2,1550.0,3,0
4,7525,796,799,2,2746.0,4,0
...,...,...,...,...,...,...,...
228,8658,771,785,1,1673.0,1,1
229,8658,771,785,1,2276.0,2,1
230,8658,771,785,2,813.0,3,1
231,8658,771,785,2,1176.0,4,1


In [162]:
import socceraction.xpoints as xpoints

xWinModel = xpoints.ExpectedPoints()
xWinModel.fit(A, games)

3* xWinModel.xwin + xWinModel.xdraw

array([[1.41111111, 2.15789474, 3.        ],
       [1.3902439 , 2.19047619, 3.        ],
       [1.37142857, 2.16666667, 3.        ],
       [1.30769231, 2.66666667, 2.72727273],
       [1.28571429, 2.66666667, 3.        ],
       [1.03571429, 3.        , 3.        ],
       [1.03846154, 2.93548387, 3.        ],
       [1.        , 3.        , 3.        ]])

In [186]:
xWinModel.predict(7, 1)

3.0

In [6]:
from socceraction.grid import PolarGrid

## Train model
xTModel = xthreat.ExpectedThreat()
# xTModel = xthreat.ExpectedThreat(grid = PolarGrid(), use_interpolation = False)
xTModel.fit(A)

# iterations:  45


<socceraction.xthreat.ExpectedThreat at 0x24c72655c88>

In [28]:
# split actions according to type

mov_actions = xthreat.get_move_actions(A)
succ_mov_actions = xthreat.get_successful(mov_actions)
fail_mov_actions = xthreat.get_failed(mov_actions)
def_actions = xthreat.get_defensive_actions(A)
succ_def_actions = xthreat.get_successful(def_actions)

# calculate xT for various action types

A["xT_total"] = xTModel.predict(A)
A["xT_move_succ"] = xTModel.predict_successful_move_actions(succ_mov_actions)
A["xT_move_fail"] = xTModel.predict_failed_move_actions(fail_mov_actions)
A["xT_def_succ"] = xTModel.predict_successful_def_actions(succ_def_actions)

xT_cols = ["xT_total", "xT_move_succ", "xT_move_fail", "xT_def_succ"]

In [32]:
pd.set_option('display.max_rows', 64)
games[['game_id','home_team_name', 'away_team_name']]

Unnamed: 0,game_id,home_team_name,away_team_name
0,7581,Croatia,Denmark
1,7549,Nigeria,Iceland
2,7555,Poland,Colombia
3,7529,Croatia,Nigeria
4,7548,Brazil,Costa Rica
5,7534,Germany,Mexico
6,7562,Australia,Peru
7,7565,Serbia,Brazil
8,7571,Senegal,Colombia
9,7569,Panama,Tunisia


In [29]:
# Top 10 actions

# actions_xT = A.sort_values("xT_total", ascending=False).reset_index()

actions_xT = A[A.game_id == 7584].sort_values("xT_move_fail", ascending=True).reset_index() #Belgium Japan
    
cols = ["player_name", "game_id", "period_id", "timestamp", "type_name", "result_name"] + xT_cols
ranking = actions_xT.merge(players, how="left")[cols]
top = ranking.head(20)
bottom = ranking.tail(5)
pd.concat([top,bottom])

Unnamed: 0,player_name,game_id,period_id,timestamp,type_name,result_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Takashi Inui,7584,1,00:41:34.560,pass,fail,-0.029733,,-0.029733,
1,Yannick Carrasco,7584,1,00:00:50.880,pass,fail,-0.025144,,-0.025144,
2,Genki Haraguchi,7584,1,00:19:34.440,pass,fail,-0.019819,,-0.019819,
3,Toby Alderweireld,7584,2,00:29:45.860,pass,fail,-0.018058,,-0.018058,
4,Gaku Shibasaki,7584,2,00:18:11.060,pass,fail,-0.014637,,-0.014637,
5,Gaku Shibasaki,7584,1,00:23:49.080,pass,fail,-0.013331,,-0.013331,
6,Jan Vertonghen,7584,1,00:00:16.200,pass,fail,-0.012756,,-0.012756,
7,Gen Shōji,7584,1,00:42:47.840,pass,fail,-0.01198,,-0.01198,
8,Romelu Lukaku,7584,1,00:09:10.853,pass,fail,-0.01163,,-0.01163,
9,Makoto Hasebe,7584,1,00:31:44.600,pass,fail,-0.011394,,-0.011394,


In [10]:
# Top 10 players

players_xT = (
    A
    .groupby(['player_id']) 
    .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'}) 
    .sort_values("xT_total", ascending=False) 
    .reset_index()
)

cols = ["player_name"] + xT_cols  
ranking = players_xT.merge(players, how="left")[cols]
top = ranking.head(10)
bottom = ranking.tail(5)
pd.concat([top,bottom])

Unnamed: 0,player_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Neymar,2.337695,2.861442,-0.583221,0.059475
1,Eden Hazard,1.65658,1.979697,-0.557754,0.234637
2,Lionel Messi,1.329859,1.660987,-0.375063,0.043936
3,Toni Kroos,1.272714,1.310106,-0.163964,0.126572
4,Philippe Coutinho,1.242738,1.49743,-0.404728,0.150037
5,Joshua Kimmich,1.12672,1.536462,-0.517976,0.108234
6,Kevin De Bruyne,1.079036,1.566445,-0.701016,0.213607
7,Paul Pogba,1.056786,1.306917,-0.531513,0.281382
8,N'Golo Kanté,0.985389,0.645915,-0.387111,0.726585
9,Thomas Meunier,0.974653,1.10178,-0.399118,0.271991


In [11]:
# Top 10 players by action type

type_names = ["pass", "dribble", "cross", "interception", "tackle"]

tops = []

for type_name in type_names:
    players_xT = (
        A[A.type_name == type_name]
        .groupby(['player_id', 'type_name']) 
        .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'}) 
        .sort_values("xT_total", ascending=False) 
        .reset_index()
    )

    cols = ["player_name", "type_name"] + xT_cols  
    ranking = players_xT.merge(players, how="left")[cols]
    tops.append(ranking.head(5))

pd.concat(tops)

Unnamed: 0,player_name,type_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Toni Kroos,pass,0.596821,0.760785,-0.163964,0.0
1,Jérôme Boateng,pass,0.400867,0.558163,-0.157296,0.0
2,Paul Pogba,pass,0.359823,0.87159,-0.511766,0.0
3,Sergio Ramos,pass,0.347909,0.574305,-0.226397,0.0
4,Neymar,pass,0.303795,0.791399,-0.487604,0.0
0,Neymar,dribble,1.498708,1.498708,0.0,0.0
1,Eden Hazard,dribble,1.166985,1.166985,0.0,0.0
2,Lionel Messi,dribble,1.025138,1.025138,0.0,0.0
3,Philippe Coutinho,dribble,0.811467,0.811467,0.0,0.0
4,Kylian Mbappé,dribble,0.791331,0.791331,0.0,0.0


In [545]:
# Top  10 teams

teams_xT = (
    A
    .groupby(['team_id']) 
    .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'})  
    .sort_values("xT_total", ascending=False) 
    .reset_index()
)
    

teams = games[["home_team_id", "home_team_name"]] \
    .drop_duplicates().reset_index(drop=True) \
    .rename(columns={"home_team_id": "team_id", "home_team_name": "team_name"})

cols = ["team_name"] + xT_cols
teams_xT.merge(teams, how="left")[cols]

Unnamed: 0,team_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Belgium,9.403559,11.7342,-4.877135,2.546494
1,Brazil,9.041579,10.757892,-4.018248,2.301936
2,Germany,6.232441,8.196901,-3.137165,1.172705
3,Spain,6.058288,8.287597,-3.339702,1.110393
4,France,5.648477,8.405249,-5.224302,2.46753
5,England,5.648437,9.388171,-5.53751,1.797777
6,Croatia,4.779132,9.700445,-7.156207,2.234893
7,Mexico,4.353459,4.954446,-2.595083,1.994096
8,Serbia,4.050763,5.153201,-2.444815,1.342377
9,Uruguay,3.84113,5.804762,-4.066866,2.103233
