In [18]:
import os;
import warnings
import pandas as pd
import tqdm
import json
from io import BytesIO
import pickle

import socceraction.spadl as spadl
import socceraction.spadl.statsbomb as statsbomb
from socceraction.spadl.wyscout import convert_to_spadl

import socceraction.vaep.features as features
import socceraction.vaep.labels as labels
from socceraction.vaep.formula import value

from tqdm.notebook import tqdm
from xgboost import XGBClassifier

pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)

In [2]:
WYSCOUT_BASE_PATH = "../../wyscout"
TEAMS_PATH = f"{WYSCOUT_BASE_PATH}/teams.json"
PLAYERS_PATH = f"{WYSCOUT_BASE_PATH}/players.json"
MATCHES_PATH = f"{WYSCOUT_BASE_PATH}/matches_"
EVENTS_PATH = f"{WYSCOUT_BASE_PATH}/events_"

In [3]:
#os.listdir(WYSCOUT_BASE_PATH)

In [4]:
def read_json_file(filename):
    with open(filename, "rb") as json_file:
        return BytesIO(json_file.read()).getvalue().decode("unicode_escape")

## Teams to HDF

In [37]:
df_teams = pd.read_json(read_json_file(TEAMS_PATH))

In [39]:
df_teams.to_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/wyscout.h5",
                key = "teams", mode = "w")

## Players to HDF

In [42]:
df_players = pd.read_json(read_json_file(PLAYERS_PATH))

In [43]:
df_players.to_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/wyscout.h5",
                  key = "players", mode = "a")

## Matches to HDF

In [27]:
# all_competitions = ["England", "France", "Germany", "Italy", "Spain",
#                     "European Championship", "World Cup"]

# The goal is to train on England and test on Italy
competitions = ["Italy", "England"]

In [46]:
dfs_matches = list()

for comp in competitions:
    comp_name = comp.replace(" ", "_")
    filename = f"{MATCHES_PATH}{comp_name}.json"
    df_matches = pd.read_json(read_json_file(filename))
    dfs_matches.append(df_matches)

df_matches = pd.concat(dfs_matches)

In [47]:
df_matches.to_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/wyscout.h5",
                  key = "matches", mode = "a")

## Events to HDF

In [49]:
for comp in competitions:
    comp_name = comp.replace(" ", "_")
    filename = f"{EVENTS_PATH}{comp_name}.json"
    df_events = pd.read_json(read_json_file(filename))
    df_events_matches = df_events.groupby("matchId", as_index = False)
    for match_id, df_events_match in df_events_matches:
        df_events_match.to_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/wyscout.h5",
                               key = f"events/match_{match_id}", mode = "a")

## Writing the SPADL

In [50]:
convert_to_spadl(f"{WYSCOUT_BASE_PATH}/wyscout.h5",
                 f"{WYSCOUT_BASE_PATH}/spadl.h5")

...Inserting actiontypes
...Inserting bodyparts
...Inserting results
...Converting games
...Converting players
...Converting teams
...Generating player_games


100%|██████████| 760/760 [00:18<00:00, 40.69game/s]
  0%|          | 0/760 [00:00<?, ?game/s]

...Converting events to actions


100%|██████████| 760/760 [23:24<00:00,  1.85s/game]


## Reading the SPADL

In [3]:
df_games       = pd.read_hdf(f"{WYSCOUT_BASE_PATH}/spadl.h5", key = "games")
df_actiontypes = pd.read_hdf(f"{WYSCOUT_BASE_PATH}/spadl.h5", key = "actiontypes")
df_bodyparts   = pd.read_hdf(f"{WYSCOUT_BASE_PATH}/spadl.h5", key = "bodyparts")
df_results     = pd.read_hdf(f"{WYSCOUT_BASE_PATH}/spadl.h5", key = "results")

In [4]:
df_games.iloc[0]

game_id                       2576335
competition_id                    524
season_id                      181248
game_date         2018-05-20 18:45:00
home_team_id                     3162
away_team_id                     3161
Name: 0, dtype: object

In [5]:
df_actiontypes.iloc[0]

type_id         0
type_name    pass
Name: 0, dtype: object

In [6]:
df_bodyparts.iloc[0]

bodypart_id         0
bodypart_name    foot
Name: 0, dtype: object

In [7]:
df_results.iloc[0]

result_id         0
result_name    fail
Name: 0, dtype: object

## Features

In [8]:
functions_features = [
    features.actiontype_onehot,
    features.bodypart_onehot,
    features.result_onehot,
    features.goalscore,
    features.startlocation,
    features.endlocation,
    features.movement,
    features.space_delta,
    features.startpolar,
    features.endpolar,
    features.team,
    features.time_delta
]

## Example

In [64]:
for __, game in tqdm(df_games.iterrows(), total = len(df_games)):
    df_actions = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/spadl.h5",
                             key = f"actions/game_{game['game_id']}")
    break

HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))

In [65]:
df_actions.head()

Unnamed: 0,game_id,period_id,time_seconds,team_id,player_id,start_x,start_y,end_x,end_y,bodypart_id,type_id,result_id
0,2576335.0,1.0,2.41759,3161.0,3344.0,53.55,34.0,65.1,39.44,0,0,1
1,2576335.0,1.0,3.904412,3161.0,116349.0,65.1,39.44,66.15,61.88,0,0,1
2,2576335.0,1.0,6.484211,3161.0,135903.0,66.15,61.88,69.3,48.96,0,0,1
3,2576335.0,1.0,10.043835,3161.0,138408.0,69.3,48.96,67.2,9.52,0,0,1
4,2576335.0,1.0,14.03207,3161.0,21094.0,67.2,9.52,73.5,26.52,0,0,1


In [66]:
df_actions = (df_actions
    .merge(df_actiontypes, how = "left")
    .merge(df_results, how = "left")
    .merge(df_bodyparts, how = "left")
    .reset_index(drop = True))

In [73]:
len(df_actions)

1220

In [67]:
df_actions.head()

Unnamed: 0,game_id,period_id,time_seconds,team_id,player_id,start_x,start_y,end_x,end_y,bodypart_id,type_id,result_id,type_name,result_name,bodypart_name
0,2576335.0,1.0,2.41759,3161.0,3344.0,53.55,34.0,65.1,39.44,0,0,1,pass,success,foot
1,2576335.0,1.0,3.904412,3161.0,116349.0,65.1,39.44,66.15,61.88,0,0,1,pass,success,foot
2,2576335.0,1.0,6.484211,3161.0,135903.0,66.15,61.88,69.3,48.96,0,0,1,pass,success,foot
3,2576335.0,1.0,10.043835,3161.0,138408.0,69.3,48.96,67.2,9.52,0,0,1,pass,success,foot
4,2576335.0,1.0,14.03207,3161.0,21094.0,67.2,9.52,73.5,26.52,0,0,1,pass,success,foot


In [79]:
df_gamestates = features.gamestates(actions = df_actions,
                                    nb_prev_actions = 5)

# In this form it doesn't make sense, we need to...

df_gamestates = features.play_left_to_right(gamestates = df_gamestates,
                                            home_team_id = game["home_team_id"])

df_features = pd.concat([f(df_gamestates) for f in functions_features],
                        axis = 1)

In [80]:
df_features.head()

Unnamed: 0,type_pass_a0,type_cross_a0,type_throw_in_a0,type_freekick_crossed_a0,type_freekick_short_a0,type_corner_crossed_a0,type_corner_short_a0,type_take_on_a0,type_foul_a0,type_tackle_a0,type_interception_a0,type_shot_a0,type_shot_penalty_a0,type_shot_freekick_a0,type_keeper_save_a0,type_keeper_claim_a0,type_keeper_punch_a0,type_keeper_pick_up_a0,type_clearance_a0,type_bad_touch_a0,type_non_action_a0,type_dribble_a0,type_goalkick_a0,type_pass_a1,type_cross_a1,type_throw_in_a1,type_freekick_crossed_a1,type_freekick_short_a1,type_corner_crossed_a1,type_corner_short_a1,type_take_on_a1,type_foul_a1,type_tackle_a1,type_interception_a1,type_shot_a1,type_shot_penalty_a1,type_shot_freekick_a1,type_keeper_save_a1,type_keeper_claim_a1,type_keeper_punch_a1,type_keeper_pick_up_a1,type_clearance_a1,type_bad_touch_a1,type_non_action_a1,type_dribble_a1,type_goalkick_a1,type_pass_a2,type_cross_a2,type_throw_in_a2,type_freekick_crossed_a2,type_freekick_short_a2,type_corner_crossed_a2,type_corner_short_a2,type_take_on_a2,type_foul_a2,type_tackle_a2,type_interception_a2,type_shot_a2,type_shot_penalty_a2,type_shot_freekick_a2,type_keeper_save_a2,type_keeper_claim_a2,type_keeper_punch_a2,type_keeper_pick_up_a2,type_clearance_a2,type_bad_touch_a2,type_non_action_a2,type_dribble_a2,type_goalkick_a2,type_pass_a3,type_cross_a3,type_throw_in_a3,type_freekick_crossed_a3,type_freekick_short_a3,type_corner_crossed_a3,type_corner_short_a3,type_take_on_a3,type_foul_a3,type_tackle_a3,type_interception_a3,type_shot_a3,type_shot_penalty_a3,type_shot_freekick_a3,type_keeper_save_a3,type_keeper_claim_a3,type_keeper_punch_a3,type_keeper_pick_up_a3,type_clearance_a3,type_bad_touch_a3,type_non_action_a3,type_dribble_a3,type_goalkick_a3,type_pass_a4,type_cross_a4,type_throw_in_a4,type_freekick_crossed_a4,type_freekick_short_a4,type_corner_crossed_a4,type_corner_short_a4,type_take_on_a4,type_foul_a4,type_tackle_a4,type_interception_a4,type_shot_a4,type_shot_penalty_a4,type_shot_freekick_a4,type_keeper_save_a4,type_keeper_claim_a4,type_keeper_punch_a4,type_keeper_pick_up_a4,type_clearance_a4,type_bad_touch_a4,type_non_action_a4,type_dribble_a4,type_goalkick_a4,bodypart_foot_a0,bodypart_head_a0,bodypart_other_a0,bodypart_foot_a1,bodypart_head_a1,bodypart_other_a1,bodypart_foot_a2,bodypart_head_a2,bodypart_other_a2,bodypart_foot_a3,bodypart_head_a3,bodypart_other_a3,bodypart_foot_a4,bodypart_head_a4,bodypart_other_a4,result_fail_a0,result_success_a0,result_offside_a0,result_owngoal_a0,result_yellow_card_a0,result_red_card_a0,result_fail_a1,result_success_a1,result_offside_a1,result_owngoal_a1,result_yellow_card_a1,result_red_card_a1,result_fail_a2,result_success_a2,result_offside_a2,result_owngoal_a2,result_yellow_card_a2,result_red_card_a2,result_fail_a3,result_success_a3,result_offside_a3,result_owngoal_a3,result_yellow_card_a3,result_red_card_a3,result_fail_a4,result_success_a4,result_offside_a4,result_owngoal_a4,result_yellow_card_a4,result_red_card_a4,goalscore_team,goalscore_opponent,goalscore_diff,start_x_a0,start_y_a0,start_x_a1,start_y_a1,start_x_a2,start_y_a2,start_x_a3,start_y_a3,start_x_a4,start_y_a4,end_x_a0,end_y_a0,end_x_a1,end_y_a1,end_x_a2,end_y_a2,end_x_a3,end_y_a3,end_x_a4,end_y_a4,dx_a0,dy_a0,movement_a0,dx_a1,dy_a1,movement_a1,dx_a2,dy_a2,movement_a2,dx_a3,dy_a3,movement_a3,dx_a4,dy_a4,movement_a4,dx_a01,dy_a01,mov_a01,dx_a02,dy_a02,mov_a02,dx_a03,dy_a03,mov_a03,dx_a04,dy_a04,mov_a04,start_dist_to_goal_a0,start_angle_to_goal_a0,start_dist_to_goal_a1,start_angle_to_goal_a1,start_dist_to_goal_a2,start_angle_to_goal_a2,start_dist_to_goal_a3,start_angle_to_goal_a3,start_dist_to_goal_a4,start_angle_to_goal_a4,end_dist_to_goal_a0,end_angle_to_goal_a0,end_dist_to_goal_a1,end_angle_to_goal_a1,end_dist_to_goal_a2,end_angle_to_goal_a2,end_dist_to_goal_a3,end_angle_to_goal_a3,end_dist_to_goal_a4,end_angle_to_goal_a4,team_1,team_2,team_3,team_4,time_delta_1,time_delta_2,time_delta_3,time_delta_4
0,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,0,0,0,53.55,34.0,53.55,34.0,53.55,34.0,53.55,34.0,53.55,34.0,65.1,39.44,65.1,39.44,65.1,39.44,65.1,39.44,65.1,39.44,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,51.45,0.0,51.45,0.0,51.45,0.0,51.45,0.0,51.45,0.0,40.26914,0.135505,40.26914,0.135505,40.26914,0.135505,40.26914,0.135505,40.26914,0.135505,True,True,True,True,0.0,0.0,0.0,0.0
1,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,0,0,0,65.1,39.44,53.55,34.0,53.55,34.0,53.55,34.0,53.55,34.0,66.15,61.88,65.1,39.44,65.1,39.44,65.1,39.44,65.1,39.44,1.05,22.44,22.464552,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,40.26914,0.135505,51.45,0.0,51.45,0.0,51.45,0.0,51.45,0.0,47.818583,0.622462,40.26914,0.135505,40.26914,0.135505,40.26914,0.135505,40.26914,0.135505,True,True,True,True,1.486822,1.486822,1.486822,1.486822
2,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,0,0,0,66.15,61.88,65.1,39.44,53.55,34.0,53.55,34.0,53.55,34.0,69.3,48.96,66.15,61.88,65.1,39.44,65.1,39.44,65.1,39.44,3.15,-12.92,13.298455,1.05,22.44,22.464552,11.55,5.44,12.766993,11.55,5.44,12.766993,11.55,5.44,12.766993,0.0,0.0,0.0,-1.05,-22.44,22.464552,-1.05,-22.44,22.464552,-1.05,-22.44,22.464552,47.818583,0.622462,40.26914,0.135505,51.45,0.0,51.45,0.0,51.45,0.0,38.707772,0.396818,47.818583,0.622462,40.26914,0.135505,40.26914,0.135505,40.26914,0.135505,True,True,True,True,2.579799,4.066621,4.066621,4.066621
3,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,0,0,0,69.3,48.96,66.15,61.88,65.1,39.44,53.55,34.0,53.55,34.0,67.2,9.52,69.3,48.96,66.15,61.88,65.1,39.44,65.1,39.44,-2.1,-39.44,39.495868,3.15,-12.92,13.298455,1.05,22.44,22.464552,11.55,5.44,12.766993,11.55,5.44,12.766993,0.0,0.0,0.0,-3.15,12.92,13.298455,-4.2,-9.52,10.405306,-4.2,-9.52,10.405306,38.707772,0.396818,47.818583,0.622462,40.26914,0.135505,51.45,0.0,51.45,0.0,45.034547,0.5747,38.707772,0.396818,47.818583,0.622462,40.26914,0.135505,40.26914,0.135505,True,True,True,True,3.559624,6.139423,7.626245,7.626245
4,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,True,False,False,True,False,False,True,False,False,True,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,False,True,False,False,False,False,0,0,0,67.2,9.52,69.3,48.96,66.15,61.88,65.1,39.44,53.55,34.0,73.5,26.52,67.2,9.52,69.3,48.96,66.15,61.88,65.1,39.44,6.3,17.0,18.12981,-2.1,-39.44,39.495868,3.15,-12.92,13.298455,1.05,22.44,22.464552,11.55,5.44,12.766993,0.0,0.0,0.0,2.1,39.44,39.495868,-1.05,52.36,52.370527,-2.1,29.92,29.993606,45.034547,0.5747,38.707772,0.396818,47.818583,0.622462,40.26914,0.135505,51.45,0.0,32.375923,0.233142,45.034547,0.5747,38.707772,0.396818,47.818583,0.622462,40.26914,0.135505,True,True,True,True,3.988235,7.547859,10.127658,11.61448


## Overall Features

In [9]:
for __, game in tqdm(df_games.iterrows(), total = len(df_games)):
    df_actions = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/spadl.h5",
                             key = f"actions/game_{game['game_id']}")
    
    df_actions = (df_actions
        .merge(df_actiontypes, how = "left")
        .merge(df_results, how = "left")
        .merge(df_bodyparts, how = "left")
        .reset_index(drop = True))
    
    dfs_gamestates = features.gamestates(actions = df_actions,
                                         nb_prev_actions = 5)
    dfs_gamestates = features.play_left_to_right(gamestates = dfs_gamestates,
                                                 home_team_id = game["home_team_id"])
    
    df_features = pd.concat([f(dfs_gamestates) for f in functions_features],
                            axis = 1)
    df_features.to_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/features.h5",
                       key = f"game_{game['game_id']}")

HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))




## Labels

In [10]:
functions_labels = [
    labels.scores,
    labels.concedes
]

In [11]:
for __, game in tqdm(df_games.iterrows(), total = len(df_games)):
    game_id = game['game_id']
    df_actions = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/spadl.h5",
                             key = f"actions/game_{game['game_id']}")
    df_actions = (df_actions
        .merge(df_actiontypes, how = "left")
        .merge(df_results, how = "left")
        .merge(df_bodyparts, how = "left")
        .reset_index(drop = True))
    
    df_labels = pd.concat([f(df_actions) for f in functions_labels],
                          axis = 1)
    df_labels.to_hdf(f"{WYSCOUT_BASE_PATH}/labels.h5",
                     key = f"game_{game['game_id']}")

HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))




## Dataset

In [12]:
columns_features = features.feature_column_names(functions_features,
                                                 nb_prev_actions = 5)

In [13]:
columns_features[:5]

['type_pass_a0',
 'type_cross_a0',
 'type_throw_in_a0',
 'type_freekick_crossed_a0',
 'type_freekick_short_a0']

In [14]:
dfs_features = list()
for __, game in tqdm(df_games.iterrows(), total = len(df_games)):
    df_features = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/features.h5",
                              key = f"game_{game['game_id']}")
    dfs_features.append(df_features[columns_features])
    
df_features = pd.concat(dfs_features).reset_index(drop=True)

HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))




In [15]:
columns_labels = [
    "scores",
    "concedes"
]

In [16]:
dfs_labels = list()
for __, game in tqdm(df_games.iterrows(), total = len(df_games)):
    df_labels = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/labels.h5",
                            key = f"game_{game['game_id']}")
    dfs_labels.append(df_labels[columns_labels])
    
df_labels = pd.concat(dfs_labels).reset_index(drop=True)

HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))




## Modeling

In [95]:
%%time
models = dict()

for column_labels in columns_labels:
    model = XGBClassifier(
        eval_metric = "logloss",
        use_label_encoder = False)
    
    model.fit(df_features, df_labels[column_labels])
    models[column_labels] = model

Wall time: 12min 52s


In [96]:
import pickle

# save
pickle.dump(models, open(f"{WYSCOUT_BASE_PATH}/models.pkl", "wb"))

In [19]:
# load
models = pickle.load(open(f"{WYSCOUT_BASE_PATH}/models.pkl", "rb"))

## Predict

In [20]:
dfs_predictions = dict()

for column_labels in columns_labels:
    model = models[column_labels]
    probabilities = model.predict_proba(df_features)
    predictions = probabilities[:, 1]
    dfs_predictions[column_labels] = pd.Series(predictions)

df_predictions = pd.concat(dfs_predictions, axis = 1)

In [21]:
df_predictions.head()

Unnamed: 0,scores,concedes
0,0.000388,0.000291
1,0.000276,0.000367
2,0.001752,0.00082
3,0.005734,0.000814
4,0.002291,0.000618


In [22]:
dfs_game_ids = list()

for __, game in tqdm(df_games.iterrows(), total = len(df_games)):
    game_id = game["game_id"]
    df_actions = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/spadl.h5",
                             key = f"actions/game_{game_id}")
    dfs_game_ids.append(df_actions["game_id"])
    
df_game_ids = pd.concat(dfs_game_ids, axis = 0).astype("int").reset_index(drop = True)

HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))




In [23]:
df_predictions = pd.concat([df_predictions, df_game_ids], axis=1)

In [24]:
df_predictions.head()

Unnamed: 0,scores,concedes,game_id
0,0.000388,0.000291,2576335
1,0.000276,0.000367,2576335
2,0.001752,0.00082,2576335
3,0.005734,0.000814,2576335
4,0.002291,0.000618,2576335


In [25]:
df_predictions_per_game = df_predictions.groupby("game_id")

In [26]:
for game_id, df_predictions in tqdm(df_predictions_per_game):
    df_predictions = df_predictions.reset_index(drop = True)
    df_predictions[columns_labels].to_hdf(f"{WYSCOUT_BASE_PATH}/predictions.h5",
                                          key = f"game_{game_id}")

HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))




## Values

In [27]:
df_players = pd.read_hdf(f"{WYSCOUT_BASE_PATH}/spadl.h5",
                         key = "players")
df_teams = pd.read_hdf(f"{WYSCOUT_BASE_PATH}/spadl.h5",
                       key = "teams")

In [28]:
dfs_values = list()

for __, game in tqdm(df_games.iterrows(), total=len(df_games)):
    game_id = game["game_id"]
    df_actions = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/spadl.h5",
                             key = f"actions/game_{game_id}")
    df_actions = (df_actions
        .merge(df_actiontypes, how = "left")
        .merge(df_results, how = "left")
        .merge(df_bodyparts, how = "left")
        .merge(df_players, how = "left")
        .merge(df_teams, how = "left")
        .reset_index(drop = True)
    )
    
    df_predictions = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/predictions.h5",
                                 key = f"game_{game_id}")
    
    # VAEP here
    df_values = value(df_actions, df_predictions["scores"], df_predictions["concedes"])
    
    df_all = pd.concat([df_actions, df_predictions, df_values], axis = 1)
    dfs_values.append(df_all)

HBox(children=(FloatProgress(value=0.0, max=760.0), HTML(value='')))




In [29]:
df_values = (pd.concat(dfs_values)
    .sort_values(["game_id", "period_id", "time_seconds"])
    .reset_index(drop=True))

In [30]:
df_values[["short_name", "scores", "concedes", "offensive_value", "defensive_value", "vaep_value"]].head(10)

Unnamed: 0,short_name,scores,concedes,offensive_value,defensive_value,vaep_value
0,A. Lacazette,0.000935,0.000174,0.0,-0.0,0.0
1,R. Holding,0.005021,0.000344,0.004086,-0.00017,0.003916
2,M. Özil,0.006465,0.000266,0.001444,7.8e-05,0.001522
3,Mohamed Elneny,0.004535,0.001139,-0.00193,-0.000873,-0.002803
4,Bellerín,0.010321,0.00108,0.005786,5.9e-05,0.005844
5,M. Özil,0.00765,0.002075,-0.002671,-0.000994,-0.003665
6,H. Maguire,0.006598,0.00176,0.004524,0.00589,0.010414
7,Bellerín,0.01192,0.002475,0.01016,0.004123,0.014284
8,Bellerín,0.013349,0.009845,0.001429,-0.007371,-0.005942
9,G. Xhaka,0.006072,0.00102,-0.007277,0.008826,0.001548


## Rate by total VAEP

In [31]:
df_ranking = (df_values[["player_id", "team_name", "short_name", "vaep_value"]]
    .groupby(["player_id", "team_name", "short_name"])
    .agg(vaep_count = ("vaep_value", "count"), vaep_sum=("vaep_value", "sum"))
    .sort_values("vaep_sum", ascending=False)
    .reset_index())

In [32]:
df_ranking.head(10)

Unnamed: 0,player_id,team_name,short_name,vaep_count,vaep_sum
0,120353.0,Liverpool FC,Mohamed Salah,1568,24.374483
1,38021.0,Manchester City FC,K. De Bruyne,3528,19.892317
2,26150.0,Leicester City FC,R. Mahrez,2022,18.929947
3,3484.0,SS Lazio,Luis Alberto,2292,17.242476
4,25707.0,Chelsea FC,E. Hazard,1974,16.759575
5,8717.0,Tottenham Hotspur FC,H. Kane,1153,16.515976
6,54.0,Tottenham Hotspur FC,C. Eriksen,2959,16.348843
7,21384.0,SS Lazio,C. Immobile,1196,16.28487
8,89186.0,Juventus FC,P. Dybala,1782,15.745255
9,206314.0,FC Internazionale Milano,M. Icardi,709,15.043843


## Rate by Total VAEP per 90 minutes

In [34]:
df_player_games = pd.read_hdf(path_or_buf = f"{WYSCOUT_BASE_PATH}/spadl.h5",
                              key = "player_games")

df_player_games = df_player_games[df_player_games["game_id"].isin(df_games["game_id"])]

In [35]:
df_minutes_played = (df_player_games[["player_id", "minutes_played"]]
    .groupby("player_id")
    .sum()
    .reset_index())

In [36]:
df_ranking_p90 = df_ranking.merge(df_minutes_played)
df_ranking_p90 = df_ranking_p90[df_ranking_p90["minutes_played"] > 360]
df_ranking_p90["vaep_rating"] = df_ranking_p90["vaep_sum"] * 90 / df_ranking_p90["minutes_played"]
df_ranking_p90 = df_ranking_p90.sort_values("vaep_rating", ascending = False)

In [37]:
df_ranking_p90.head(10)

Unnamed: 0,player_id,team_name,short_name,vaep_count,vaep_sum,minutes_played,vaep_rating
0,120353.0,Liverpool FC,Mohamed Salah,1568,24.374483,2996.529579,0.732081
360,7926.0,SS Lazio,Nani,275,3.364659,434.623,0.69674
65,3802.0,Liverpool FC,Philippe Coutinho,1014,8.721137,1134.538547,0.691825
4,25707.0,Chelsea FC,E. Hazard,1974,16.759575,2505.438518,0.602035
229,25601.0,Benevento Calcio,C. Diabaté,177,4.734082,713.032583,0.597543
8,89186.0,Juventus FC,P. Dybala,1782,15.745255,2451.475153,0.578049
3,3484.0,SS Lazio,Luis Alberto,2292,17.242476,2761.026859,0.562046
1,38021.0,Manchester City FC,K. De Bruyne,3528,19.892317,3190.369684,0.56116
2,26150.0,Leicester City FC,R. Mahrez,2022,18.929947,3063.065477,0.556206
286,8249.0,Manchester United FC,M. Fellaini,477,4.142814,693.521021,0.537624
