In [1]:
import os
import sys
import warnings
import pandas as pd
from tqdm.notebook import tqdm

base_path = os.path.abspath(os.path.join(os.getcwd(), ".."))
print(f"base_path: {base_path}")
sys.path.append(base_path)

base_path: /home/uoscisai/Experiments/Football/sr-press


In [2]:
from express.databases import SQLiteDatabase
from express.datasets import PressingDataset

from express import features as fs
from express import labels as ls

In [3]:
TRAIN_DB_PATH = os.path.join(base_path, "stores/train_database.sqlite")
TEST_DB_PATH = os.path.join(base_path, "stores/test_database.sqlite")

train_db = SQLiteDatabase(TRAIN_DB_PATH)
test_db = SQLiteDatabase(TEST_DB_PATH)

print("train_db:", train_db)
print("test_db:", test_db)

train_db: <express.databases.sqlite.SQLiteDatabase object at 0x7b3af7c89240>
test_db: <express.databases.sqlite.SQLiteDatabase object at 0x7b3bf998b040>


In [4]:
test_db.games()[16:32]

Unnamed: 0_level_0,season_id,competition_id,game_day,game_date,home_team_id,away_team_id,competition_stage,home_score,away_score,venue,referee
game_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
3869117,106,43,4,2022-12-03 17:00:00,941,1839,Round of 16,3,1,Sheikh Khalifa International Stadium,Wilton Pereira Sampaio
3869118,106,43,4,2022-12-04 21:00:00,768,787,Round of 16,3,0,Al Bayt Stadium,Ivan Arcides Barton Cisneros
3869151,106,43,4,2022-12-03 21:00:00,779,792,Round of 16,2,1,Ahmad bin Ali Stadium,Szymon Marciniak
3869152,106,43,4,2022-12-04 17:00:00,771,789,Round of 16,3,1,Al Thumama Stadium,Jesús Valenzuela Sáez
3869219,106,43,4,2022-12-05 17:00:00,778,785,Round of 16,1,1,Al Janoub Stadium,Ismail Elfath
3869220,106,43,4,2022-12-06 17:00:00,788,772,Round of 16,0,0,Education City Stadium,Fernando Andrés Rapallini
3869253,106,43,4,2022-12-05 21:00:00,781,791,Round of 16,4,1,Stadium 974,Clément Turpin
3869254,106,43,4,2022-12-06 21:00:00,780,773,Round of 16,6,1,Lusail Stadium,César Arturo Ramos Palazuelos
3869321,106,43,5,2022-12-09 21:00:00,941,779,Quarter-finals,2,2,Lusail Stadium,Antonio Miguel Mateu Lahoz
3869354,106,43,5,2022-12-10 21:00:00,768,771,Quarter-finals,1,2,Al Bayt Stadium,Wilton Pereira Sampaio


In [5]:
print(train_db.games().shape, test_db.games().shape)

(151, 11) (49, 11)


In [6]:
all_features = [f.__name__ for f in fs.all_features]
all_labels = [f.__name__ for f in ls.all_labels]
print("Features:", all_features)
print("Labels:", all_labels)

Features: ['actiontype', 'actiontype_onehot', 'result', 'result_onehot', 'bodypart', 'bodypart_onehot', 'time', 'startlocation', 'relative_startlocation', 'endlocation', 'relative_endlocation', 'startpolar', 'endpolar', 'movement', 'team', 'time_delta', 'space_delta', 'goalscore', 'angle', 'under_pressure', 'packing_rate', 'ball_height_onehot', 'speed', 'freeze_frame_360', 'nb_opp_in_path', 'dist_opponent', 'defenders_in_3m_radius', 'closest_3_players', 'closest_11_players', 'expected_3_receiver_and_presser_by_distance']
Labels: ['concede_shots', 'counterpress']


In [17]:
train_dataset = PressingDataset(
    path= os.path.join(base_path, "stores", "datasets", "train"), 
    xfns=["bodypart", "time", 'goalscore'],
    yfns=["counterpress"], 
    load_cached =False,
    nb_prev_actions = 3,
)

test_dataset = PressingDataset(
    path= os.path.join(base_path, "stores", "datasets", "test"), 
    xfns=["bodypart", "time", 'goalscore'],
    yfns=["counterpress"], 
    load_cached =False,
    nb_prev_actions = 3,
)

In [18]:
train_dataset.create(train_db)
test_dataset.create(test_db)

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 151/151 [20:19<00:00,  8.08s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 151/151 [00:46<00:00,  3.22it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49/49 [06:53<00:00,  8.44s/it]
100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 49/49 [00:15<00:00,  3.11it/s]


In [None]:
train_dataset.features

In [None]:
test_dataset.features['freeze_frame_360_a0'].iloc[0]

In [None]:
train_dataset.labels

In [None]:
test_dataset.labels

In [None]:
train_dataset.labels["counterpress"].value_counts()

In [None]:
test_dataset.labels["counterpress"].value_counts()