In [1]:
import os
import sys
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm

base_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
print(f"base_path: {base_path}")
sys.path.append(base_path)

base_path: /home/uoscisai/Experiments/Football/sr-press


In [2]:
from functools import partial
from xgboost import XGBClassifier
from gplearn.genetic import SymbolicClassifier

In [3]:
from express.databases import SQLiteDatabase
from express.datasets import PressingDataset
from express.components import press
from express.visualization import plot_action
from express.utils import add_names

In [4]:
TRAIN_DB_PATH = os.path.join(base_path, "stores/train_database.sqlite")
TEST_DB_PATH = os.path.join(base_path, "stores/test_database.sqlite")

train_db = SQLiteDatabase(TRAIN_DB_PATH)
test_db = SQLiteDatabase(TEST_DB_PATH)

print("train_db:", train_db)
print("test_db:", test_db)

train_db: <express.databases.sqlite.SQLiteDatabase object at 0x73f06b0679a0>
test_db: <express.databases.sqlite.SQLiteDatabase object at 0x73f06b0677c0>


In [5]:
dataset_train = partial(PressingDataset, path=os.path.join(base_path, "stores", "datasets", "train"))
dataset_test = partial(PressingDataset, path=os.path.join(base_path, "stores", "datasets", "test"))

print("dataset_train:", dataset_train)
print("dataset_test:", dataset_test)

dataset_train: functools.partial(<class 'express.datasets.PressingDataset'>, path='/home/uoscisai/Experiments/Football/sr-press/stores/datasets/train')
dataset_test: functools.partial(<class 'express.datasets.PressingDataset'>, path='/home/uoscisai/Experiments/Football/sr-press/stores/datasets/test')


In [6]:
# xgboost, symbolic_regression
test_dataset = PressingDataset(
    path= os.path.join(base_path, "stores", "datasets", "test"),
    xfns=["startlocation", "closest_11_players"],
    yfns=["counterpress"],
    load_cached=True,
    nb_prev_actions=3
)

# soccermap
# test_dataset = PressingDataset(
#     path= os.path.join(base_path, "stores", "datasets", "test"),
#     xfns=["startlocation", "freeze_frame_360"],
#     yfns=["counterpress"],
#     load_cached=True,
#     nb_prev_actions=1
# )

print(f"Test DataSet: {test_dataset.features.shape}")
print(f"Test Dataset: {test_dataset.labels.value_counts().to_dict()}")

Test DataSet: (19453, 195)
Test Dataset: {(False,): 9954, (True,): 2452}


In [7]:
model = "xgboost" #"soccermap"
trial = 0
save_path = os.path.join(base_path, "stores", "model", model, f"{trial:03d}", "component.pkl")
save_path

'/home/uoscisai/Experiments/Football/sr-press/stores/model/xgboost/000/component.pkl'

In [10]:
from express.components.base import exPressComponent
import pickle
from pathlib import Path

path = Path(save_path)
#component = pickle.load(path.open(mode="rb"))
#component

In [11]:
component.test(dataset_train)

NameError: name 'component' is not defined

In [None]:
component.test(dataset_test)

In [12]:
import torch
import matplotlib.pyplot as plt
from sklearn.calibration import calibration_curve
from sklearn.metrics import roc_curve, roc_auc_score
from torchmetrics.classification import BinaryCalibrationError

def plot_calibration_curves(y_true, y_pred, ax):
    bce_l1 = BinaryCalibrationError(n_bins=10, norm='l1')
    ece = bce_l1(torch.Tensor(y_pred), torch.Tensor(y_true))

    prob_true, prob_pred = calibration_curve(y_true, y_pred, n_bins=10)
    ax.plot(prob_pred, prob_true, marker='o', label=f'{model} (ECE = {ece:.4f})')

    ax.plot([0, 1], [0, 1], linestyle='--', color='grey', label='Perfect Calibration')
    ax.set_xlabel('Predicted Probability')
    ax.set_ylabel('True Probability')
    ax.set_title('Calibration Plot')
    ax.legend()

def plot_roc_curves(y_true, y_pred, ax):
    fpr, tpr, _ = roc_curve(y_true, y_pred)
    auc_score = roc_auc_score(y_true, y_pred)
    ax.plot(fpr, tpr, label=f'{model} (AUC = {auc_score:.4f})')

    ax.plot([0, 1], [0, 1], linestyle='--', color='grey')
    ax.set_xlabel('False Positive Rate')
    ax.set_ylabel('True Positive Rate')
    ax.set_title('ROC Curve')
    ax.legend(loc='lower right')


y_pred = component.predict(dataset_test).values
y_true = test_dataset.labels["counterpress"].values

fig, axes = plt.subplots(1, 2, figsize=(14, 6))
plot_calibration_curves(y_true, y_pred, axes[0])
plot_roc_curves(y_true, y_pred, axes[1])
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.show()

NameError: name 'component' is not defined

### XGBoost feature importances

In [None]:
k = 20  # 원하는 k 값을 설정하세요 (상위 k개)

features = [col for _, cols in component.features.items() for col in cols]
importances = component.model.feature_importances_
indices = np.argsort(importances)[-k:]

plt.figure(figsize=(max(10, k * 0.5), 10))  
plt.barh(range(k), importances[indices], align='center')
plt.yticks(range(k), [features[i] for i in indices])
plt.xlabel('Feature Importance')
plt.title('Top Feature Importances')
plt.tight_layout()
plt.show()


In [None]:
game_id = 3788741

df_actions = add_names(train_db.actions(game_id)).reset_index()
df_actions.head()

### Visualization

In [None]:
pressure_idx = df_actions[df_actions["type_name"] == "pressing"].index[5]
home_team_id, away_team_id = train_db.get_home_away_team_id(game_id = 3788741)
for idx in range(pressure_idx-2, pressure_idx+2):
    if df_actions.loc[idx]["freeze_frame_360"] is None:
        print("Skip action due to missing freeze frame")
        continue
    plot_action(df_actions.loc[idx], home_team_id=home_team_id)