In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
import socceraction.spadl as spd
from socceraction.vaep import features as ft
import socceraction.vaep.labels as lab

In [2]:
LEAGUES: list[str] = ["Spain", "England", "Germany", "Italy", "France"]

In [3]:
def spadl_transform(events, matches):
    spadl = []
    game_ids = events.game_id.unique().tolist()
    for g in tqdm(game_ids):
        match_events = events.loc[events.game_id == g]
        match_home_id = matches.loc[(matches.matchId == g) & (matches.side == "home"), "teamId"].values[0]
        match_actions = spd.wyscout.convert_to_actions(events=match_events, home_team_id=match_home_id)
        match_actions = spd.play_left_to_right(actions=match_actions, home_team_id=match_home_id)
        match_actions = spd.add_names(match_actions)
        spadl.append(match_actions)
    spadl = pd.concat(spadl).reset_index(drop=True)

    return spadl

In [4]:
spadl = {}
for league in LEAGUES:
    spadl[league] = pd.read_csv(f"../data/spadl_format/{league}.csv")

In [5]:
def features_transform(spadl):
    spadl.loc[spadl.result_id.isin([2, 3]), ["result_id"]] = 0
    spadl.loc[spadl.result_name.isin(["offside", "owngoal"]), ["result_name"]] = "fail"

    xfns = [
        ft.actiontype_onehot,
        ft.bodypart_onehot,
        ft.result_onehot,
        ft.goalscore,
        ft.startlocation,
        ft.endlocation,
        ft.team,
        ft.time,
        ft.time_delta
    ]

    features = []
    for game in tqdm(np.unique(spadl.game_id).tolist()):
        match_actions = spadl.loc[spadl.game_id == game].reset_index(drop=True)
        match_states = ft.gamestates(actions=match_actions)
        match_feats = pd.concat([fn(match_states) for fn in xfns], axis=1)
        features.append(match_feats)
    features = pd.concat(features).reset_index(drop=True)

    return features

In [6]:
features = {}
for league in LEAGUES:
    features[league] = features_transform(spadl[league])

100%|██████████| 380/380 [00:11<00:00, 32.45it/s]
100%|██████████| 380/380 [00:11<00:00, 32.65it/s]
100%|██████████| 306/306 [00:09<00:00, 33.67it/s]
100%|██████████| 380/380 [00:11<00:00, 33.34it/s]
100%|██████████| 380/380 [00:11<00:00, 33.58it/s]


In [7]:
def labels_transform(spadl):
    yfns = [lab.scores, lab.concedes]

    labels = []
    for game in tqdm(np.unique(spadl.game_id).tolist()):
        match_actions = spadl.loc[spadl.game_id == game].reset_index(drop=True)
        labels.append(pd.concat([fn(actions=match_actions) for fn in yfns], axis=1))

    labels = pd.concat(labels).reset_index(drop=True)

    return labels

In [8]:
labels = {}
for league in LEAGUES:
    labels[league] = labels_transform(spadl[league])

100%|██████████| 380/380 [00:08<00:00, 44.96it/s]
100%|██████████| 380/380 [00:08<00:00, 45.00it/s]
100%|██████████| 306/306 [00:06<00:00, 45.52it/s]
100%|██████████| 380/380 [00:08<00:00, 44.67it/s]
100%|██████████| 380/380 [00:08<00:00, 45.50it/s]


In [9]:
import xgboost as xgb
import sklearn.metrics as mt

In [10]:
def train_vaep(X_train, y_train, X_test, y_test):
    models = {}
    for m in ["scores", "concedes"]:
        models[m] = xgb.XGBClassifier(random_state=0, n_estimators=50, max_depth=3)

        print("training " + m + " model")
        models[m].fit(X_train, y_train[m])

        p = sum(y_train[m]) / len(y_train[m])
        base = [p] * len(y_train[m])
        y_train_pred = models[m].predict_proba(X_train)[:, 1]
        train_brier = mt.brier_score_loss(y_train[m], y_train_pred) / mt.brier_score_loss(y_train[m], base)
        print(m + " Train NBS: " + str(train_brier))
        print()

        p = sum(y_test[m]) / len(y_test[m])
        base = [p] * len(y_test[m])
        y_test_pred = models[m].predict_proba(X_test)[:, 1]
        test_brier = mt.brier_score_loss(y_test[m], y_test_pred) / mt.brier_score_loss(y_test[m], base)
        print(m + " Test NBS: " + str(test_brier))
        print()

        print("----------------------------------------")

    return models

In [11]:
models = train_vaep(X_train=features["England"], y_train=labels["England"], X_test=features["Spain"], y_test=labels["Spain"])

training scores model
scores Train NBS: 0.8455802930197476

scores Test NBS: 0.8508400022275144

----------------------------------------
training concedes model
concedes Train NBS: 0.9661436865979788

concedes Test NBS: 0.9766386802230537

----------------------------------------


In [12]:
def generate_predictions(features, models):
    preds = {}
    for m in ["scores", "concedes"]:
        preds[m] = models[m].predict_proba(features)[:, 1]
    preds = pd.DataFrame(preds)

    return preds


In [13]:
preds = {}
preds["England"] = generate_predictions(features=features["England"], models=models)
preds["England"]

Unnamed: 0,scores,concedes
0,0.001798,0.000578
1,0.002908,0.000537
2,0.002132,0.000503
3,0.002523,0.000378
4,0.006924,0.000335
...,...,...
483894,0.067468,0.001340
483895,0.026389,0.003738
483896,0.004810,0.044791
483897,0.063947,0.004458


In [14]:
import socceraction.vaep.formula as fm

In [15]:
def calculate_action_values(spadl, predictions):
    action_values = fm.value(actions=spadl, Pscores=predictions["scores"], Pconcedes=predictions["concedes"])
    action_values = pd.concat([
        spadl[["original_event_id", "player_id", "action_id", "game_id", "start_x", "start_y", "end_x", "end_y", "type_name", "result_name"]],
        predictions.rename(columns={"scores": "Pscores", "concedes": "Pconcedes"}),
        action_values
    ], axis=1)

    return action_values


In [16]:
action_values = {}
action_values["England"] = calculate_action_values(spadl=spadl["England"], predictions=preds["England"])
action_values["England"]

Unnamed: 0,original_event_id,player_id,action_id,game_id,start_x,start_y,end_x,end_y,type_name,result_name,Pscores,Pconcedes,offensive_value,defensive_value,vaep_value
0,177959171.0,25413,0,2499719,51.45,34.68,32.55,14.96,pass,success,0.001798,0.000578,0.000000,-0.000000,0.000000
1,177959172.0,370224,1,2499719,32.55,14.96,53.55,17.00,pass,success,0.002908,0.000537,0.001110,0.000040,0.001151
2,177959173.0,3319,2,2499719,53.55,17.00,36.75,19.72,pass,success,0.002132,0.000503,-0.000776,0.000035,-0.000741
3,177959174.0,120339,3,2499719,36.75,19.72,43.05,3.40,pass,success,0.002523,0.000378,0.000391,0.000124,0.000515
4,177959175.0,167145,4,2499719,43.05,3.40,75.60,8.16,pass,success,0.006924,0.000335,0.004402,0.000043,0.004445
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
483894,251596226.0,20620,1136,2500098,55.65,7.48,103.95,19.04,pass,success,0.067468,0.001340,0.057554,0.000622,0.058176
483895,251596229.0,14703,1137,2500098,103.95,19.04,103.95,19.04,cross,fail,0.026389,0.003738,-0.041080,-0.002399,-0.043478
483896,251596408.0,8239,1138,2500098,2.10,46.92,0.00,46.24,interception,success,0.004810,0.044791,0.001072,-0.018402,-0.017330
483897,251596232.0,70965,1139,2500098,105.00,0.00,92.40,36.04,corner_crossed,success,0.063947,0.004458,0.017447,-0.004458,0.012989


In [17]:
action_values["England"].query("Pscores > 0.95").head(10)

Unnamed: 0,original_event_id,player_id,action_id,game_id,start_x,start_y,end_x,end_y,type_name,result_name,Pscores,Pconcedes,offensive_value,defensive_value,vaep_value
34,177959212.0,25413,34,2499719,92.4,40.12,105.0,37.4,shot,success,0.980806,0.001848,0.901991,-0.000466,0.901525
58,177959280.0,14763,58,2499719,100.8,32.64,105.0,34.0,shot,success,0.979002,0.00439,0.867999,0.000935,0.868933
420,177959759.0,12829,420,2499719,98.7,31.28,105.0,34.0,shot,success,0.984362,0.002772,0.870623,-0.001097,0.869525
681,177960130.0,7945,681,2499719,96.6,34.0,105.0,34.0,shot,success,0.98658,0.004721,0.861667,-0.001124,0.860543
826,177960379.0,12829,826,2499719,96.6,31.28,105.0,37.4,shot,success,0.985016,0.002038,0.899583,0.001432,0.901015
1166,177960849.0,7870,1166,2499719,98.7,25.16,105.0,37.4,shot,success,0.986452,0.00376,0.887672,0.000341,0.888013
1196,177960902.0,26010,1196,2499719,95.55,38.08,105.0,37.4,shot,success,0.986235,0.00183,0.893829,0.001537,0.895367
2274,178148575.0,8325,976,2499720,93.45,30.6,105.0,34.0,shot,success,0.984907,0.002739,0.898128,-0.000546,0.897582
2895,178122511.0,9127,314,2499721,91.35,32.64,105.0,37.4,shot,success,0.983174,0.002764,0.912362,-0.000642,0.91172
3167,178122911.0,8433,586,2499721,95.55,51.68,105.0,30.6,shot,success,0.985816,0.001578,0.913176,0.00021,0.913386
