In [1]:
import os
import sys
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

base_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
print(f"base_path: {base_path}")
sys.path.append(base_path)

base_path: /home/uoscisai/Experiments/Football/sr-press


In [2]:
from functools import partial
from xgboost import XGBClassifier
from gplearn.genetic import SymbolicClassifier

In [3]:
from express.databases import SQLiteDatabase
from express.datasets import PressingDataset
from express.components import press
from express.visualization import plot_action
from express.utils import add_names

In [4]:
TRAIN_DB_PATH = os.path.join(base_path, "stores/train_database.sqlite")
TEST_DB_PATH = os.path.join(base_path, "stores/test_database.sqlite")

train_db = SQLiteDatabase(TRAIN_DB_PATH)
test_db = SQLiteDatabase(TEST_DB_PATH)

print("train_db:", train_db)
print("test_db:", test_db)

train_db: <express.databases.sqlite.SQLiteDatabase object at 0x75914586b4f0>
test_db: <express.databases.sqlite.SQLiteDatabase object at 0x75914586a9e0>


In [5]:
dataset_train = partial(PressingDataset, path=os.path.join(base_path, "stores", "datasets", "train"))
dataset_test = partial(PressingDataset, path=os.path.join(base_path, "stores", "datasets", "test"))

print("dataset_train:", dataset_train)
print("dataset_test:", dataset_test)

dataset_train: functools.partial(<class 'express.datasets.PressingDataset'>, path='/home/uoscisai/Experiments/Football/sr-press/stores/datasets/train')
dataset_test: functools.partial(<class 'express.datasets.PressingDataset'>, path='/home/uoscisai/Experiments/Football/sr-press/stores/datasets/test')


In [6]:
# xgboost, symbolic_regression
train_dataset = PressingDataset(
    path= os.path.join(base_path, "stores", "datasets", "train"),
    xfns=["startlocation", "closest_11_players", 'actiontype', 'get_xp_to_player'],
    yfns=["counterpress"],
    load_cached=True,
    nb_prev_actions=3
)

test_dataset = PressingDataset(
    path= os.path.join(base_path, "stores", "datasets", "test"),
    xfns=["startlocation", "closest_11_players", 'actiontype', 'get_xp_to_player'],
    yfns=["counterpress"],
    load_cached=True,
    nb_prev_actions=3
)

# soccermap
# test_dataset = PressingDataset(
#     path= os.path.join(base_path, "stores", "datasets", "test"),
#     xfns=["startlocation", "freeze_frame_360"],
#     yfns=["counterpress"],
#     load_cached=True,
#     nb_prev_actions=1
# )

print(f"Test DataSet: {test_dataset.features.shape}")
print(f"Test Dataset: {test_dataset.labels.value_counts().to_dict()}")

Test DataSet: (12406, 207)
Test Dataset: {(False,): 9954, (True,): 2452}


In [11]:
nb_prev_actions_lst = ["0", "1", "2"] # current(0), previous(1), second_previous(2)
selected_features = ["startlocation", "closest_11_players", 'actiontype', 'get_xp_to_player']
features = {}
label = ["counterpress"]

for xfn in train_dataset.xfns.items():
    key = xfn[0].__name__
    values = xfn[1]

    if key in selected_features:
        features[key] = [value for value in values if value[-1:] in nb_prev_actions_lst]

features

{'startlocation': ['start_x_a0',
  'start_y_a0',
  'start_x_a1',
  'start_y_a1',
  'start_x_a2',
  'start_y_a2'],
 'closest_11_players': ['teammate_1_x_a0',
  'teammate_1_y_a0',
  'teammate_1_distance_a0',
  'teammate_2_x_a0',
  'teammate_2_y_a0',
  'teammate_2_distance_a0',
  'teammate_3_x_a0',
  'teammate_3_y_a0',
  'teammate_3_distance_a0',
  'teammate_4_x_a0',
  'teammate_4_y_a0',
  'teammate_4_distance_a0',
  'teammate_5_x_a0',
  'teammate_5_y_a0',
  'teammate_5_distance_a0',
  'teammate_6_x_a0',
  'teammate_6_y_a0',
  'teammate_6_distance_a0',
  'teammate_7_x_a0',
  'teammate_7_y_a0',
  'teammate_7_distance_a0',
  'teammate_8_x_a0',
  'teammate_8_y_a0',
  'teammate_8_distance_a0',
  'teammate_9_x_a0',
  'teammate_9_y_a0',
  'teammate_9_distance_a0',
  'teammate_10_x_a0',
  'teammate_10_y_a0',
  'teammate_10_distance_a0',
  'opponent_1_x_a0',
  'opponent_1_y_a0',
  'opponent_1_distance_a0',
  'opponent_2_x_a0',
  'opponent_2_y_a0',
  'opponent_2_distance_a0',
  'opponent_3_x_a0',


In [12]:
import json

In [13]:
model = 'scikit'

params = {}

with open("../params.json", 'r') as f:
    all_params = json.load(f)
    params = all_params.get(model, {}) # get model's parameter

params['save_path'] = path= os.path.join(base_path, "stores", "model", model)

In [17]:
xgb=XGBClassifier(
    objective="binary:logistic", 
    eval_metric='logloss' #"auc"
    # you probably want to do some hyperparameter tuning here to get a good model
)
xgb=XGBClassifier(
    n_estimators=200, max_depth=5, n_jobs=-1, verbosity=0, eval_metric="logloss"
)

model = press.XGBoostComponent(
    model = xgb,
    features = features,
    label = label,
    params=params
)

model.train(dataset_train)

[0]	validation_0-logloss:0.43253
[1]	validation_0-logloss:0.40233
[2]	validation_0-logloss:0.38154
[3]	validation_0-logloss:0.36584
[4]	validation_0-logloss:0.35626
[5]	validation_0-logloss:0.34722
[6]	validation_0-logloss:0.34167
[7]	validation_0-logloss:0.33653
[8]	validation_0-logloss:0.33173
[9]	validation_0-logloss:0.32899
[10]	validation_0-logloss:0.32652
[11]	validation_0-logloss:0.32462
[12]	validation_0-logloss:0.32130
[13]	validation_0-logloss:0.31966
[14]	validation_0-logloss:0.31844
[15]	validation_0-logloss:0.31679
[16]	validation_0-logloss:0.31427
[17]	validation_0-logloss:0.31310
[18]	validation_0-logloss:0.31133
[19]	validation_0-logloss:0.31026
[20]	validation_0-logloss:0.30953
[21]	validation_0-logloss:0.30879
[22]	validation_0-logloss:0.30773
[23]	validation_0-logloss:0.30748
[24]	validation_0-logloss:0.30706
[25]	validation_0-logloss:0.30625
[26]	validation_0-logloss:0.30536
[27]	validation_0-logloss:0.30469
[28]	validation_0-logloss:0.30439
[29]	validation_0-loglos

In [18]:
print(f"########## Train Metrics ##########\n{model.test(dataset_train)}\n")
print(f"########## Test Metrics ##########\n{model.test(dataset_test)}\n")

########## Train Metrics ##########
{'precision': 0.9416966426858513, 'recall': 0.8443757559467814, 'f1': 0.8903847516474173, 'log_loss': 0.13419505048888722, 'brier': 0.03477561292684078, 'roc_auc': 0.9820885667456322}

########## Test Metrics ##########
{'precision': 0.7444678609062171, 'recall': 0.5762642740619902, 'f1': 0.6496551724137931, 'log_loss': 0.30638883105257786, 'brier': 0.09022691284821938, 'roc_auc': 0.9024240503051393}



In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC

In [None]:
logreg = LogisticRegression()
nn = MLPClassifier()
nb = GaussianNB()
knn = KNeighborsClassifier()
svc = SVC()

In [None]:
model = press.ScikitComponent(
    model = logreg,
    features = features,
    label = label,
    params=params
)

model.train(dataset_train)

print(f"########## Train Metrics ##########\n{model.test(dataset_train)}\n")
print(f"########## Test Metrics ##########\n{model.test(dataset_test)}\n")

In [None]:
import torch
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
from sklearn.metrics import roc_curve, roc_auc_score
from torchmetrics.classification import BinaryCalibrationError

def plot_calibration_curves(y_true, y_pred, ax):
    bce_l1 = BinaryCalibrationError(n_bins=10, norm='l1')
    ece = bce_l1(torch.Tensor(y_pred), torch.Tensor(y_true))

    prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=10)
    ax.plot(prob_pred, prob_true, marker='o', label=f'{model} (ECE = {ece:.4f})')

    ax.plot([0, 1], [0, 1], linestyle='--', color='grey', label='Perfect Calibration')
    ax.set_xlabel('Predicted Probability')
    ax.set_ylabel('True Probability')
    ax.set_title('Calibration Plot')
    ax.legend()

def plot_roc_curves(y_true, y_pred, ax):
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    auc_score = roc_auc_score(y_true, y_pred)
    ax.plot(fpr, tpr, label=f'{model} (AUC = {auc_score:.4f})')

    ax.plot([0, 1], [0, 1], linestyle='--', color='grey')
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC Curve')
    ax.legend(loc='lower right')


y_pred = component.predict(dataset_test).values
y_true = test_dataset.labels["counterpress"].values

fig, axes = plt.subplots(1, 2, figsize=(14, 6))
plot_calibration_curves(y_true, y_pred, axes[0])
plot_roc_curves(y_true, y_pred, axes[1])
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

### XGBoost feature importances

In [None]:
k = 20  # 원하는 k 값을 설정하세요 (상위 k개)

features = [col for _, cols in component.features.items() for col in cols]
importances = component.model.feature_importances_
indices = np.argsort(importances)[-k:]

plt.figure(figsize=(max(10, k * 0.5), 10))  
plt.barh(range(k), importances[indices], align='center')
plt.yticks(range(k), [features[i] for i in indices])
plt.xlabel('Feature Importance')
plt.title('Top Feature Importances')
plt.tight_layout()
plt.show()


In [None]:
game_id = 3788741

df_actions = add_names(train_db.actions(game_id)).reset_index()
df_actions.head()

### Visualization

In [None]:
pressure_idx = df_actions[df_actions["type_name"] == "pressing"].index[5]
home_team_id, away_team_id = train_db.get_home_away_team_id(game_id = 3788741)
for idx in range(pressure_idx-2, pressure_idx+2):
    if df_actions.loc[idx]["freeze_frame_360"] is None:
        print("Skip action due to missing freeze frame")
        continue
    plot_action(df_actions.loc[idx], home_team_id=home_team_id)