In [1]:
import os
import warnings
import tqdm
import pandas as pd
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [2]:
%load_ext autoreload
%autoreload 2
import socceraction.spadl as spadl
import socceraction.vaep.formula as vaepformula

## Select data

In [6]:
# Configure file and folder names
main_folder = os.path.dirname(os.path.dirname(os.getcwd()))
datafolder = main_folder + "/data-fifa"
spadl_h5 = os.path.join(datafolder, "spadl-statsbomb.h5")
predictions_h5 = os.path.join(datafolder, "predictions.h5")

In [8]:
with pd.HDFStore(spadl_h5) as spadlstore:
    games = (
        spadlstore["games"]
        .merge(spadlstore["competitions"], how='left')
        .merge(spadlstore["teams"].add_prefix('home_'), how='left')
        .merge(spadlstore["teams"].add_prefix('away_'), how='left'))
    players = spadlstore["players"]
    teams = spadlstore["teams"]
print("nb of games:", len(games))

nb of games: 64


## Compute VAEP values

In [7]:
A = []
for game in tqdm.tqdm(list(games.itertuples()), desc="Rating actions"):
    actions = pd.read_hdf(spadl_h5, f"actions/game_{game.game_id}")
    actions = (
        spadl.add_names(actions)
        .merge(players, how="left")
        .merge(teams, how="left")
        .sort_values(["game_id", "period_id", "action_id"])
        .reset_index(drop=True)
    )
    preds = pd.read_hdf(predictions_h5, f"game_{game.game_id}")
    values = vaepformula.value(actions, preds.scores, preds.concedes)
    A.append(pd.concat([actions, preds, values], axis=1))
A = pd.concat(A).sort_values(["game_id", "period_id", "time_seconds"]).reset_index(drop=True)
A.columns

Rating actions: 100%|████████████████████████████████████████████████████████████████████████████████| 64/64 [00:20<00:00,  3.10it/s]


Index(['game_id', 'original_event_id', 'period_id', 'time_seconds', 'team_id',
       'player_id', 'start_x', 'start_y', 'end_x', 'end_y', 'type_id',
       'result_id', 'bodypart_id', 'action_id', 'type_name', 'result_name',
       'bodypart_name', 'player_name', 'nickname', 'team_name', 'scores',
       'concedes', 'offensive_value', 'defensive_value', 'vaep_value'],
      dtype='object')

## Analyse VAEP ratings
### Most valuable players

In [8]:
A["count"] = 1

# Compute each player's number of actions and total VAEP values
playersR = (
    A[["player_id", "vaep_value", "offensive_value", "defensive_value", "count"]]
    .groupby(["player_id"])
    .sum()
    .reset_index()
)
# Add player names
playersR = playersR.merge(players[["player_id", "nickname", "player_name"]], how="left")
playersR["player_name"] = playersR[["nickname","player_name"]].apply(lambda x: x.iloc[0] if x.iloc[0] else x.iloc[1], axis=1)
# Show results
playersR = playersR[["player_id", "player_name", "vaep_value", "offensive_value", "defensive_value", "count"]]
playersR.sort_values("vaep_value", ascending=False)[:10]

Unnamed: 0,player_id,player_name,vaep_value,offensive_value,defensive_value,count
92,3621.0,Eden Hazard,3.079244,3.390954,-0.31171,687
152,5186.0,Denis Cheryshev,3.048902,3.771666,-0.722765,213
6,3009.0,Kylian Mbappé,2.949428,3.219361,-0.269934,489
71,3501.0,Philippe Coutinho,2.760361,2.727498,0.032863,696
352,5574.0,Toni Kroos,2.747693,2.82621,-0.078517,641
50,3308.0,Kieran Trippier,2.701755,3.266832,-0.565076,684
17,3089.0,Kevin De Bruyne,2.688326,3.196822,-0.508495,714
599,20004.0,Paul Pogba,2.684919,2.755724,-0.070805,673
121,4319.0,Edinson Cavani,2.533961,2.652842,-0.118881,224
36,3244.0,John Stones,2.483829,2.318821,0.165008,934


In [9]:
# Normalize for minutes played
pg = pd.read_hdf(spadl_h5, "player_games")
pg = pg[pg.game_id.isin(games.game_id)]
mp = pg[["player_id", "minutes_played"]].groupby("player_id").sum().reset_index()

stats = playersR.merge(mp)
stats = stats[stats.minutes_played > 180] # at least two full games played
stats["vaep_rating"] = stats.vaep_value * 90 / stats.minutes_played
stats["offensive_rating"] = stats.offensive_value * 90 / stats.minutes_played
stats["defensive_rating"] = stats.defensive_value * 90 / stats.minutes_played
stats.sort_values("vaep_rating",ascending=False)[:10]

Unnamed: 0,player_id,player_name,vaep_value,offensive_value,defensive_value,count,minutes_played,vaep_rating,offensive_rating,defensive_rating
152,5186.0,Denis Cheryshev,3.048902,3.771666,-0.722765,213,317,0.865619,1.07082,-0.205201
352,5574.0,Toni Kroos,2.747693,2.82621,-0.078517,641,295,0.838279,0.862234,-0.023954
251,5473.0,Ahmed Musa,1.794901,1.84283,-0.047929,161,224,0.721165,0.740423,-0.019257
121,4319.0,Edinson Cavani,2.533961,2.652842,-0.118881,224,362,0.62999,0.659546,-0.029556
525,6196.0,Yerry Mina,2.461998,2.42862,0.033377,324,374,0.592459,0.584427,0.008032
451,5674.0,Moussa Wagué,1.585941,1.584486,0.001455,169,257,0.555388,0.554878,0.000509
71,3501.0,Philippe Coutinho,2.760361,2.727498,0.032863,696,458,0.542429,0.535971,0.006458
16,3083.0,Son Heung-Min,1.701298,1.787061,-0.085764,227,294,0.520805,0.54706,-0.026254
75,3531.0,Mohamed Salah,1.127095,1.312928,-0.185834,121,195,0.520198,0.605967,-0.085769
92,3621.0,Eden Hazard,3.079244,3.390954,-0.31171,687,551,0.502962,0.553876,-0.050915


### (optional) inspect Belgium's top 10 most valuable non-shot actions

In [10]:
import matplotsoccer

sorted_A = A.sort_values("vaep_value", ascending=False)
sorted_A = sorted_A[sorted_A.team_name == "Belgium"] # view only actions from Belgium
sorted_A = sorted_A[~sorted_A.type_name.str.contains("shot")] #eliminate shots

def get_time(period_id,time_seconds):
    m = int((period_id-1)*45 + time_seconds // 60)
    s = int(time_seconds % 60)
    return f"{m}m{s}s"

for j in range(0, 10):
    row = list(sorted_A[j:j+1].itertuples())[0]
    i = row.Index
    a = A[i - 3 : i+2].copy()
    
    a["player_name"] = a[["nickname", "player_name"]].apply(lambda x: x.iloc[0] if x.iloc[0] else x.iloc[1], axis=1)
    
    g = list(games[games.game_id == a.game_id.values[0]].itertuples())[0]
    game_info = f"{g.game_date} {g.home_team_name} {g.home_score}-{g.away_score} {g.away_team_name}"
    minute = int((row.period_id-1)*45 + row.time_seconds // 60)
    print(f"{game_info} {minute}' {row.type_name} {row.player_name}")

    a["scores"] = a.scores.apply(lambda x : "%.3f" % x )
    a["vaep_value"] = a.vaep_value.apply(lambda x : "%.3f" % x )
    a["time"] = a[["period_id", "time_seconds"]].apply(lambda x: get_time(*x),axis=1)
    cols = ["time", "type_name", "player_name", "team_name", "scores", "vaep_value"]
    matplotsoccer.actions(a[["start_x", "start_y", "end_x",  "end_y"]],
                a.type_name,
                team=a.team_name,
                result = a.result_name == "success",
                label=a[cols],
                labeltitle = cols,
                zoom=False)

ModuleNotFoundError: No module named 'matplotlib'