Be sure to run the following notebook first before running this notebook:
- 1-load-and-convert-statsbomb-data.ipynb

In [187]:
%load_ext autoreload
%autoreload 2
import os;

import tqdm
import pandas as pd
import numpy as np
import socceraction.vaep.features as fs
import socceraction.xthreat as xthreat

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [188]:
## Configure file and folder names, use SPADL format.
datafolder = "../data-fifa"
spadl_h5 = os.path.join(datafolder,"spadl-statsbomb.h5")

In [189]:
## Select games
games = pd.read_hdf(spadl_h5,"games")
games = games[games.competition_name == "FIFA World Cup"]
print("nb of games:", len(games))

actiontypes = pd.read_hdf(spadl_h5, "actiontypes")
bodyparts = pd.read_hdf(spadl_h5, "bodyparts")
results = pd.read_hdf(spadl_h5, "results")
players = pd.read_hdf(spadl_h5, "players")
players["player_name"] = players[["player_nickname","player_name"]].apply(lambda x: x[0] if x[0] else x[1],axis=1)

nb of games: 64


In [190]:
## Read in all actions of games
A = []

for game in tqdm.tqdm(list(games.itertuples())):
    actions = pd.read_hdf(spadl_h5,f"actions/game_{game.game_id}")
    actions = (
        actions.merge(actiontypes,how="left")
        .merge(results,how="left")
        .merge(bodyparts,how="left")
        #.sort_values(["period_id", "time_seconds", "timestamp"])
        .reset_index(drop=True)
    )
    [actions] = fs.play_left_to_right([actions],game.home_team_id)
        
    A.append(actions) 
    
A = pd.concat(A).reset_index(drop = True)

100%|██████████████████████████████████████████████████████████████████████████████████| 64/64 [00:04<00:00, 15.00it/s]


In [261]:
import socceraction.xpoints as xpoints

t = np.array([1,3])
d = np.array([0,-2])

xP_model = xpoints.ExpectedPoints()
xP_model.fit(A, games)
xP_model.predict(t,d)
xP_model.xwin

array([[0.41111111, 0.63157895, 1.        ],
       [0.3902439 , 0.66666667, 1.        ],
       [0.37142857, 0.66666667, 1.        ],
       [0.30769231, 0.85185185, 0.90909091],
       [0.28571429, 0.83333333, 1.        ],
       [0.03571429, 1.        , 1.        ],
       [0.03846154, 0.96774194, 1.        ],
       [0.        , 1.        , 1.        ]])

In [276]:
np.set_printoptions(linewidth=100)
a = 3* xP_model.xwin + xP_model.xdraw
b = 3* (1 - xP_model.xwin) - 2* xP_model.xdraw
b = np.flip(b, axis=1)[:,:2]
np.hstack((b,a))

array([[0.        , 0.57894737, 1.41111111, 2.15789474, 3.        ],
       [0.        , 0.61904762, 1.3902439 , 2.19047619, 3.        ],
       [0.        , 0.66666667, 1.37142857, 2.16666667, 3.        ],
       [0.27272727, 0.22222222, 1.30769231, 2.66666667, 2.72727273],
       [0.        , 0.16666667, 1.28571429, 2.66666667, 3.        ],
       [0.        , 0.        , 1.03571429, 3.        , 3.        ],
       [0.        , 0.03225806, 1.03846154, 2.93548387, 3.        ],
       [0.        , 0.        , 1.        , 3.        , 3.        ]])

In [248]:
from socceraction.grid import PolarGrid

## Train model
xTModel = xthreat.ExpectedThreat(expectedPoints = xP_model)
xTModel.fit(A)

# iterations:  45


<socceraction.xthreat.ExpectedThreat at 0x24c729b3548>

In [249]:
# split actions according to type

mov_actions = xthreat.get_move_actions(A)
succ_mov_actions = xthreat.get_successful(mov_actions)
fail_mov_actions = xthreat.get_failed(mov_actions)
def_actions = xthreat.get_defensive_actions(A)
succ_def_actions = xthreat.get_successful(def_actions)

# calculate xT for various action types

A["xT_move_succ"] = xTModel.predict_successful_move_actions(succ_mov_actions)
A["xT_move_fail"] = xTModel.predict_failed_move_actions(fail_mov_actions)
A["xT_def_succ"] = xTModel.predict_successful_def_actions(succ_def_actions)

A["xT_total"] = A["xT_move_succ"].fillna(0) + A["xT_move_fail"].fillna(0) + A["xT_def_succ"].fillna(0)

xT_cols = ["xT_total", "xT_move_succ", "xT_move_fail", "xT_def_succ"]

In [258]:
pd.set_option('display.max_rows', 30)
games[['game_id','home_team_name', 'away_team_name']]


Unnamed: 0,game_id,home_team_name,away_team_name
0,7581,Croatia,Denmark
1,7549,Nigeria,Iceland
2,7555,Poland,Colombia
3,7529,Croatia,Nigeria
4,7548,Brazil,Costa Rica
...,...,...,...
59,7541,Colombia,Japan
60,8655,France,Belgium
61,8657,Belgium,England
62,7540,Russia,Egypt


In [279]:
# Top 10 actions

# actions_xT = A.sort_values("xT_total", ascending=False).reset_index()

actions_xT = (A[A.game_id == 7584]
    .groupby(['player_id']) 
    .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'}) 
    .sort_values("xT_total", ascending=False).reset_index() #Belgium Japan
             )
    
cols = ["player_name"] + xT_cols
ranking = actions_xT.merge(players, how="left")[cols]
top = ranking.head(20)
bottom = ranking.tail(5)
pd.concat([top,bottom])

Unnamed: 0,player_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Thomas Meunier,0.703969,0.680196,0.025076,-0.001303
1,Nacer Chadli,0.676308,0.669701,0.006607,0.0
2,Eden Hazard,0.482967,0.443911,0.02542,0.013636
3,Kevin De Bruyne,0.386176,0.304659,0.038692,0.042825
4,Dries Mertens,0.327224,0.315618,0.011606,0.0
5,Shinji Kagawa,0.254373,0.233407,0.020965,0.0
6,Jan Vertonghen,0.252019,0.17036,0.054782,0.026877
7,Takashi Inui,0.236874,0.133251,0.053687,0.049936
8,Romelu Lukaku,0.232165,0.208038,0.024127,0.0
9,Hiroki Sakai,0.192084,0.051694,0.039569,0.100822


In [260]:
# Top 10 players

players_xT = (
    A
    .groupby(['player_id']) 
    .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'}) 
    .sort_values("xT_total", ascending=False) 
    .reset_index()
)

cols = ["player_name"] + xT_cols  
ranking = players_xT.merge(players, how="left")[cols]
top = ranking.head(10)
bottom = ranking.tail(5)
pd.concat([top,bottom])

Unnamed: 0,player_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Neymar,2.589092,2.378531,0.159748,0.050813
1,Joshua Kimmich,2.314212,2.105502,0.117738,0.090973
2,Luka Modrić,1.753982,1.211139,0.379799,0.163044
3,Isco,1.752717,1.499154,0.189371,0.064192
4,Toni Kroos,1.716071,1.553016,0.074465,0.08859
5,Šime Vrsaljko,1.62022,1.077826,0.376722,0.165671
6,Ivan Perišić,1.595821,1.177382,0.299971,0.118468
7,Ivan Rakitić,1.583309,1.102007,0.358496,0.122806
8,Eden Hazard,1.482944,1.159573,0.155787,0.167584
9,Thomas Meunier,1.38398,1.038039,0.188067,0.157874


In [245]:
# Top 10 players by action type

type_names = ["pass", "dribble", "cross", "interception", "tackle"]

tops = []

for type_name in type_names:
    players_xT = (
        A[A.type_name == type_name]
        .groupby(['player_id', 'type_name']) 
        .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'}) 
        .sort_values("xT_total", ascending=False) 
        .reset_index()
    )

    cols = ["player_name", "type_name"] + xT_cols  
    ranking = players_xT.merge(players, how="left")[cols]
    tops.append(ranking.head(5))

pd.concat(tops)

Unnamed: 0,player_name,type_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Toni Kroos,pass,0.601001,0.699995,-0.098995,0.0
1,Neymar,pass,0.583249,0.761658,-0.178409,0.0
2,Philippe Coutinho,pass,0.456859,0.645764,-0.188905,0.0
3,Sergio Busquets,pass,0.450566,0.626823,-0.176257,0.0
4,Lionel Messi,pass,0.441154,0.601322,-0.160168,0.0
0,Neymar,dribble,1.527679,1.527679,0.0,0.0
1,Eden Hazard,dribble,1.066814,1.066814,0.0,0.0
2,Isco,dribble,0.756493,0.756493,0.0,0.0
3,Kylian Mbappé,dribble,0.664141,0.664141,0.0,0.0
4,Lionel Messi,dribble,0.61731,0.61731,0.0,0.0


In [246]:
# Top  10 teams

teams_xT = (
    A
    .groupby(['team_id']) 
    .agg({'xT_total': 'sum', 'xT_move_succ': 'sum', 'xT_move_fail': 'sum', 'xT_def_succ': 'sum'})  
    .sort_values("xT_total", ascending=False) 
    .reset_index()
)
    

teams = games[["home_team_id", "home_team_name"]] \
    .drop_duplicates().reset_index(drop=True) \
    .rename(columns={"home_team_id": "team_id", "home_team_name": "team_name"})

cols = ["team_name"] + xT_cols
teams_xT.merge(teams, how="left")[cols]

Unnamed: 0,team_name,xT_total,xT_move_succ,xT_move_fail,xT_def_succ
0,Belgium,10.827691,11.7342,-3.453003,2.546494
1,Brazil,10.460456,10.757892,-2.599371,2.301936
2,Germany,7.920912,8.196901,-1.448694,1.172705
3,England,7.567443,9.388171,-3.618504,1.797777
4,Spain,7.386202,8.287597,-2.011788,1.110393
5,France,6.810556,8.405249,-4.062223,2.46753
6,Croatia,6.39947,9.700445,-5.535869,2.234893
7,Uruguay,4.915307,5.804762,-2.992689,2.103233
8,Mexico,4.881698,4.954446,-2.066843,1.994096
9,Serbia,4.792927,5.153201,-1.702652,1.342377
