Be sure to run the following notebook first before running this notebook:
- 1-load-and-convert-statsbomb-data.ipynb

In [190]:
%load_ext autoreload
%autoreload 2
import os;

import tqdm
import pandas as pd
import numpy as np
import socceraction.vaep.features as fs
import socceraction.xthreat as xthreat

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [95]:
## Configure file and folder names, use SPADL format.
datafolder = "../data-fifa"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")

In [96]:
## Select games
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "FIFA World Cup"]
print("nb of games:", len(games))

actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")
players = pd.read_hdf(spadl_h5, "players")
players["player_name"] = players[["player_nickname","player_name"]].apply(lambda x: x[0] if x[0] else x[1],axis=1)

nb of games: 64


In [97]:
## Read in all actions of games
A = []

for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes,how="left")
        .merge(results,how="left")
        .merge(bodyparts,how="left")
        #.sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )
    [actions] = fs.play_left_to_right([actions],game.home_team_id)
        
    A.append(actions) 
    
A = pd.concat(A).reset_index(drop = True)

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [00:01<00:00, 35.92it/s]


In [195]:
from socceraction.grid import PolarGrid
from socceraction.timeframe import QuarterTimeFrame

xTModel = xthreat.ExpectedThreat()
xTModel.fit(A)

xTModel2 = xthreat.ExpectedThreat(timeframe = QuarterTimeFrame())
xTModel2.fit(A)

# iterations:  45
# iterations:  47


<socceraction.xthreat.ExpectedThreat at 0x1838e36f348>

In [196]:
# split actions according to type

mov_actions = xthreat.get_move_actions(A)
succ_mov_actions = xthreat.get_successful(mov_actions)
# fail_mov_actions = xthreat.get_failed(mov_actions)
# def_actions = xthreat.get_defensive_actions(A)
# succ_def_actions = xthreat.get_successful(def_actions)

# calculate xT for various action types

A["xT"] = xTModel.predict(succ_mov_actions[:])
A["xT_2"] = xTModel2.predict(succ_mov_actions[:])
# A["xT_move_fail"] = xTModel.predict_failed_move_actions(fail_mov_actions)
# A["xT_def_succ"] = xTModel.predict_successful_def_actions(succ_def_actions)

# A["xT_total"] = A["xT_move_succ"].fillna(0) + A["xT_move_fail"].fillna(0) + A["xT_def_succ"].fillna(0)

xT_cols = ["xT", "xT_2"]

In [200]:
# Top 10 actions

actions_xT = mov_actions.sort_values("xT_2", ascending=False).reset_index()
    
cols = ["player_name", "period_id", "timestamp"] + xT_cols
ranking = actions_xT.merge(players, how="left")[cols]
top = ranking.head(20)
bottom = ranking.tail(5)
pd.concat([top,bottom])

Unnamed: 0,player_name,period_id,timestamp,xT,xT_2
0,Ivan Perišić,4,00:03:01.550,0.05621,0.604895
1,Oussama Haddadi,2,00:20:20.533,0.208363,0.286168
2,Joshua Kimmich,2,00:25:48.893,0.220631,0.285577
3,Mário Fernandes,2,00:26:27.883,0.212436,0.279876
4,Thomas Meunier,2,00:16:42.860,0.191703,0.248227
5,Kevin De Bruyne,2,00:23:10.133,0.179256,0.218262
6,André Carrillo,2,00:18:36.240,0.163417,0.211119
7,Ali Maâloul,2,00:03:59.547,0.172039,0.210119
8,Olivier Giroud,1,00:14:47.360,0.1139,0.208451
9,Youri Tielemans,2,00:44:15.280,0.137894,0.205848


In [198]:
# Top 10 players

players_xT = (
    A
    .groupby(['player_id']) 
    .agg({'xT': 'sum', 'xT_2': 'sum'}) 
    .sort_values("xT_2", ascending=False) 
    .reset_index()
)

cols = ["player_name"] + xT_cols  
ranking = players_xT.merge(players, how="left")[cols]
top = ranking.head(10)
bottom = ranking.tail(5)
pd.concat([top,bottom])

Unnamed: 0,player_name,xT,xT_2
0,Neymar,3.078027,3.125373
1,Eden Hazard,1.889653,2.122425
2,Joshua Kimmich,1.697262,1.739647
3,Kevin De Bruyne,1.702185,1.672054
4,Isco,1.495937,1.642239
5,Ivan Perišić,1.086776,1.517237
6,Philippe Coutinho,1.283439,1.441814
7,Toni Kroos,1.21717,1.421617
8,Ivan Rakitić,1.275952,1.383684
9,Thomas Meunier,1.357492,1.381121


In [17]:
# Top 10 players by action type

type_names = [ "tackle"]

tops = []

for type_name in type_names:
    players_xT = (
        A[A.type_name == type_name]
        .groupby(['player_id', 'type_name']) 
        .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'}) 
        .sort_values("xT_total", ascending=False) 
        .reset_index()
    )

    cols = ["player_name", "type_name"] + xT_cols  
    ranking = players_xT.merge(players, how="left")[cols]
    tops.append(ranking.head(5))

pd.concat(tops)

Unnamed: 0,player_name,type_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Mario Mandžukić,tackle,0.405135,0.0,0.0,0.405135
1,N'Golo Kanté,tackle,0.329154,0.0,0.0,0.329154
2,Casemiro,tackle,0.291139,0.0,0.0,0.291139
3,Wílmar Barrios,tackle,0.28577,0.0,0.0,0.28577
4,Marouane Fellaini,tackle,0.252877,0.0,0.0,0.252877


In [246]:
# Top  10 teams

teams_xT = (
    A
    .groupby(['team_id']) 
    .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'})  
    .sort_values("xT_total", ascending=False) 
    .reset_index()
)
    

teams = games[["home_team_id", "home_team_name"]] \
    .drop_duplicates().reset_index(drop=True) \
    .rename(columns={"home_team_id": "team_id", "home_team_name": "team_name"})

cols = ["team_name"] + xT_cols
teams_xT.merge(teams, how="left")[cols]

Unnamed: 0,team_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Belgium,10.827691,11.7342,-3.453003,2.546494
1,Brazil,10.460456,10.757892,-2.599371,2.301936
2,Germany,7.920912,8.196901,-1.448694,1.172705
3,England,7.567443,9.388171,-3.618504,1.797777
4,Spain,7.386202,8.287597,-2.011788,1.110393
5,France,6.810556,8.405249,-4.062223,2.46753
6,Croatia,6.39947,9.700445,-5.535869,2.234893
7,Uruguay,4.915307,5.804762,-2.992689,2.103233
8,Mexico,4.881698,4.954446,-2.066843,1.994096
9,Serbia,4.792927,5.153201,-1.702652,1.342377
