In [1]:
%load_ext autoreload
%autoreload 2

from kloppy import sportec
%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from mplsoccer import Pitch
import numpy as np
import pandas as pd
import polars as pl
from tqdm import tqdm
pd.set_option('display.max_columns', None)  # 모든 열 출력
from unravel.soccer import KloppyPolarsDataset

import os 
import pickle
import sys

sys.path.append(os.path.abspath(".."))
from pressing_intensity import CustomPressingIntensity
from visualization import plot_single_frame_positions, plot_window_frame_positions
from config import TEAMNAME2ID, TEAMID2NAME

coordinates = "secondspectrum"

2025-06-11 02:44:28.126367: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-11 02:44:28.128683: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-06-11 02:44:28.156309: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-06-11 02:44:28.156340: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-06-11 02:44:28.156360: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to regi

In [2]:
data_path = "/data/MHL/bepro/processed/"
results = []

if os.path.exists(data_path):
    match_id_lst = os.listdir(data_path)
    total_dict = {match_id : {} for match_id in match_id_lst}
    for match_id in match_id_lst:
        print(f"Load {match_id}")
        with open(f"{data_path}/{match_id}/{match_id}_processed_dict.pkl", "rb") as f:
            match_dict = pickle.load(f)
        tracking_df = match_dict['tracking_df']
        event_df = match_dict['event_df']
        meta_data = match_dict['meta_data']
        teams_dict = match_dict['teams']
        with open(f"{data_path}/{match_id}/{match_id}_presing_intensity.pkl", "rb") as f:
            pressing_df = pickle.load(f)

        total_df = pd.merge(tracking_df, pressing_df, on=['game_id', 'period_id', 'timestamp', 'frame_id'], how='left')
        # total_df = _normalize_coordinate_direction(total_df, teams_dict['Home']['pID'].iloc[0])
        total_dict[match_id] = {}
        total_dict[match_id]['tracking_df'] = total_df
        total_dict[match_id]['event_df'] = event_df
        total_dict[match_id]['meta_data'] = meta_data
        total_dict[match_id]['Home'] = match_dict['teams']['Home']
        total_dict[match_id]['Away'] = match_dict['teams']['Away']


Load 126293


Load 126298
Load 126306
Load 126309
Load 126315
Load 126319
Load 126325
Load 126332
Load 126341
Load 126348
Load 126350
Load 126356
Load 126364
Load 126367
Load 126378
Load 126380
Load 126386
Load 126391
Load 126401
Load 126408
Load 126411
Load 126418
Load 126424
Load 126429
Load 126433
Load 126444
Load 126448
Load 126455
Load 126458
Load 126466
Load 126473
Load 126476
Load 153364
Load 153373
Load 153379
Load 153381
Load 153385
Load 153387
Load 153390
Load 126285


In [3]:
match_id = "126315"

total_df = total_dict[match_id]['tracking_df'].copy()
event_df = total_dict[match_id]['event_df'].copy()
meta_data = total_dict[match_id]['meta_data'].copy()
home_team = total_dict[match_id]['Home'].copy()
away_team = total_dict[match_id]['Away'].copy()
teams_dict = {'Home' : home_team, 'Away' : away_team}

In [4]:
# Event data parser

event_df["period_order"] = event_df['period_order'] + 1
event_df["event_time"] = event_df["event_time"] / 1000
event_df['event_time'] = (event_df['event_time'] / 0.04).round() * 0.04
event_df['event_time'] = pd.to_timedelta(event_df['event_time'], unit='s')
event_df.rename(columns={
    'period_order' : 'period_id',
    'event_time' : 'timestamp'},
    inplace=True)

teams_df = pd.concat([home_team, away_team])
teams_df.reset_index(drop=True, inplace=True)
teams_df['player_code'] = teams_df.apply(lambda row : row['team'][0] + str(row['xID']).zfill(2), axis=1)
teams_df.head()

event_df = event_df.merge(
    teams_df,
    how='left',
    left_on=['player_name', 'player_shirt_number'],
    right_on=['player', 'jID']
)

event_df = event_df.drop(['player', 'team', 'jID'], axis=1)
event_df["pID"] = event_df["pID"].astype(int)

In [5]:
def get_event_name(x):
    event_name_lst = []
    for val in x:
        if "event_name" in val:
            event_name_lst.append(val['event_name'])
        elif "name" in val:
            event_name_lst.append(val['name'])
    return event_name_lst

event_df['events_name'] = event_df['events'].apply(get_event_name)

In [6]:
event_df.head()

Unnamed: 0,period_type,period_name,period_id,period_duration,period_start_time,timestamp,team_name,player_shirt_number,player_name,events,x,y,to_x,to_y,attack_direction,position,pID,tID,xID,player_code,events_name
0,Half,1st Half,1,2700000,0,0 days 00:00:02.160000,Gwangju FC,24,Kang Hyun Lee,"[{'event_name': 'Passes', 'property': {'Outcom...",0.5008,0.5271,0.6576,0.6895,LEFT,CM,360389,4648,6,A06,[Passes]
1,Half,1st Half,1,2700000,0,0 days 00:00:03,Gimcheon Sangmu,18,Kanghyun Yu,"[{'name': 'VHIR', 'property': {'duration': 200...",0.533551,0.654105,0.637543,0.683009,RIGHT,CF,500583,2353,10,H10,[VHIR]
2,Half,1st Half,1,2700000,0,0 days 00:00:03,Gwangju FC,20,Kunhee Lee,"[{'name': 'VHIR', 'property': {'duration': 250...",0.482749,0.274083,0.354669,0.275258,LEFT,CF,250102,4648,10,A10,[VHIR]
3,Half,1st Half,1,2700000,0,0 days 00:00:03.520000,Gwangju FC,11,Gabriel,"[{'name': 'VHIR', 'property': {'duration': 150...",0.467462,0.754444,0.398143,0.716478,LEFT,RM,500135,4648,5,A05,[VHIR]
4,Half,1st Half,1,2700000,0,0 days 00:00:03.960000,Gwangju FC,6,Youngkyu Ahn,"[{'event_name': 'Passes Received', 'property':...",0.6576,0.6895,,,LEFT,CB,62365,4648,3,A03,[Passes Received]


In [None]:
event_names = []
for event_name in event_df['events_name'].values:
    event_names += event_name
set(event_names)

{'Aerial Control',
 'Assists',
 'Blocks',
 'Clearances',
 'Crosses',
 'Crosses Received',
 'Defensive Line Supports',
 'Duels',
 'Fouls',
 'Goals Conceded',
 'HIR',
 'Interceptions',
 'Key Passes',
 'MAX_SPEED',
 'Mistakes',
 'Offsides',
 'Own Goals',
 'Passes',
 'Passes Received',
 'Recoveries',
 'SPRINT',
 'Saves',
 'Set Piece Defence',
 'Set Pieces',
 'Shots & Goals',
 'Step-in',
 'Tackles',
 'Take-on',
 'Turnover',
 'VHIR'}

In [12]:
# ball carrier에 대해 pressing intensity가 0.7보다 큰 경우 pressed_df 구성
pressed_dict = {}
ball_carrier_df = total_df[total_df['is_ball_carrier'] == True].copy()
ball_carrier_df.sort_values(['period_id', 'frame_id'], inplace=True)

for idx, row in tqdm(ball_carrier_df.iterrows(), desc= "Get Pressing Intensity", miniters=len(ball_carrier_df)//10):
    if len(np.where(row['rows'] == row['id'])[0]) != 0:
        pressed_axis = 'rows'
        presser_axis = 'columns'
        id_loc = np.where(row[pressed_axis] == row['id'])[0]
        # 다중 list nested 구조로 되어 있을 수 있으므로 tolist()를 두 번 적용
        pressing_values = row['probability_to_intercept'][id_loc].tolist()[0].tolist()
    elif len(np.where(row['columns'] == row['id'])[0]) != 0:
        pressed_axis = 'columns'
        presser_axis = 'rows'
        id_loc = np.where(row[pressed_axis] == row['id'])[0]
        pressing_values = [x[id_loc] for x in row['probability_to_intercept']]
    else:
        continue
    if max(pressing_values) > 0.9:
        pressed_dict[idx] = {}
        pressed_dict[idx]['pressing_value'] = max(pressing_values)
        max_idx = pressing_values.index(max(pressing_values))
        pressed_dict[idx]['pressing_player'] = row[presser_axis][max_idx]

pressed_df = ball_carrier_df.loc[list(pressed_dict.keys())].copy()
pressed_df['pressing_values'] = [d['pressing_value'] for d in pressed_dict.values()]
pressed_df['pressing_player'] = [d.get('pressing_player') for d in pressed_dict.values()]            

Get Pressing Intensity: 0it [00:00, ?it/s]

Get Pressing Intensity: 129663it [00:04, 26198.48it/s]


In [13]:
period_list = []
for period_id in pressed_df['period_id'].unique():
    period_df = pressed_df[pressed_df['period_id']==period_id].copy()
    # frame_id 차이가 50 프레임 이상인 경우 새로운 시퀀스로 판단 (즉, 연속된 pressed 행이 아닐 경우)
    period_df['frame_diff'] = period_df['frame_id'].diff()
    period_df['sequence_id'] = (period_df['frame_diff'] > 50).cumsum()

    # 각 시퀀스별 첫 번째 frame을 기준으로 X와 Y를 설정하기 위해 마지막 frame 정보를 구함
    first_frames = period_df.groupby('sequence_id', as_index=False)[['timestamp', 'period_id', 'frame_id', 'id', 'team_id', 'pressing_player']].first()

    # total_df 에서 필요한 컬럼만 추출
    lookup = total_df[['period_id', 'frame_id', 'id', 'v']]

    # pressing_player 컬럼은 total_df.id 와 매칭되어 있으므로,
    # 컬럼명을 맞춰서 merge
    first_frames = first_frames.merge(
        lookup.rename(columns={'id': 'pressing_player'}),
        on=['period_id', 'frame_id', 'pressing_player'],
        how='left'
    )

    # 압박하는 선수의 속도가 2.0 m/s 인 경우만 압박으로 간주
    first_frames = first_frames[first_frames['v'] >= 2.0]
    period_list.append(first_frames)

first_frames_df = pd.concat(period_list, axis=0, ignore_index=True)        

In [14]:
def check_pressing_success(row, event_df, teams_dict):
    possession_gained_events = ['Aerial Control', 'Duels', 'Interceptions', 'Crosses','Crossess Received',
                             'Passes', 'Passes Received', 'Recoveries', 'Shots & Goals'] 
    
    if row['team_id'] == teams_dict['Home']['tID'].unique()[0]:
        pressing_team = teams_dict['Away']['tID'].unique()[0]

    elif row['team_id'] == teams_dict['Away']['tID'].unique()[0]:
        pressing_team = teams_dict['Home']['tID'].unique()[0]

    check_timegap = pd.Timedelta(seconds=5)
    window_events = event_df[(event_df['timestamp'] >= row['timestamp']) & (event_df['timestamp'] <= row['timestamp'] + check_timegap)]
    event_teams = window_events['team_name'].unique()
    event_team_ids = [TEAMNAME2ID[x] for x in event_teams]

    if pressing_team in event_team_ids:
        pressing_team_events = window_events[window_events['team_name'] == TEAMID2NAME[pressing_team]]
        for _, row in pressing_team_events.iterrows():
            if row['events_name'][0] in possession_gained_events:
                if row['events_name'][0] in ["Interceptions", "Crosses", 'Crossess Received', "Recoveries", "Shots & Goals"]:
                    return True
                else:
                    result = row['events'][0].get('property', None)
                    if result == 'Succeeded' or result == "Tackle Succeeded: No Possession":
                        return True
        return False
    else:
        return False

first_frames_df['ball_ownership_changed'] = first_frames_df.apply(check_pressing_success, axis=1, event_df=event_df, teams_dict=teams_dict) # window=150은 예시

In [11]:
first_frames_df['ball_ownership_changed'].value_counts()

ball_ownership_changed
False    311
True      69
Name: count, dtype: int64

In [15]:
first_frames_df['ball_ownership_changed'].value_counts()

ball_ownership_changed
False    311
True      87
Name: count, dtype: int64

In [13]:
first_frames_df.head()

Unnamed: 0,sequence_id,timestamp,period_id,frame_id,id,team_id,pressing_player,v,ball_ownership_changed
0,0,0 days 00:00:04.160000,1.0,104,62365,4648,500583,6.407665,False
1,1,0 days 00:00:09.720000,1.0,243,500133,4648,500583,3.60254,False
2,2,0 days 00:00:12.960000,1.0,324,77414,4648,356616,5.300504,True
3,3,0 days 00:00:38.440000,1.0,961,500135,4648,356609,3.966142,True
4,4,0 days 00:00:42.640000,1.0,1066,356626,2353,250102,4.798982,False


In [111]:
time_window = pd.Timedelta(seconds=5)
ref_period_id = first_frames_df['period_id'].iloc[1]
ref_timestamp = first_frames_df['timestamp'].iloc[1]

start_time = ref_timestamp - time_window
end_time = ref_timestamp + time_window

event_df[
    (event_df['period_id'] == ref_period_id) &
    (event_df['timestamp'] >= start_time) &
    (event_df['timestamp'] <= ref_timestamp)
    ]

Unnamed: 0,period_type,period_name,period_id,period_duration,period_start_time,timestamp,team_name,player_shirt_number,player_name,events,x,y,to_x,to_y,attack_direction,position,pID,tID,xID,player_code,events_name
6,Half,1st Half,1,2700000,0,0 days 00:00:06,Gwangju FC,4,Alexandar Popovic,"[{'event_name': 'Passes Received', 'property':...",0.6653,0.4573,,,LEFT,CB,500133,4648,2,A02,[Passes Received]
7,Half,1st Half,1,2700000,0,0 days 00:00:07.520000,Gwangju FC,30,Kyongrok Choi,"[{'name': 'HIR', 'property': {'duration': 2000...",0.460443,0.631463,0.422659,0.510062,LEFT,CM,500139,4648,7,A07,[HIR]


In [112]:
event_df[
    (event_df['period_id'] == ref_period_id) &
    (event_df['timestamp'] >= ref_timestamp) &
    (event_df['timestamp'] <= end_time)
    ]

Unnamed: 0,period_type,period_name,period_id,period_duration,period_start_time,timestamp,team_name,player_shirt_number,player_name,events,x,y,to_x,to_y,attack_direction,position,pID,tID,xID,player_code,events_name
8,Half,1st Half,1,2700000,0,0 days 00:00:10.680000,Gwangju FC,4,Alexandar Popovic,"[{'event_name': 'Passes', 'property': {'Outcom...",0.5473,0.4396,0.5684,0.2348,LEFT,CB,500133,4648,2,A02,[Passes]
9,Half,1st Half,1,2700000,0,0 days 00:00:12.440000,Gwangju FC,3,Mingi Lee,"[{'event_name': 'Passes', 'property': {'Outcom...",0.5684,0.2348,0.3989,0.2107,LEFT,LB,145701,4648,1,A01,"[Passes, Passes Received]"
10,Half,1st Half,1,2700000,0,0 days 00:00:12.480000,Gimcheon Sangmu,5,Dong Hyun Kim,"[{'name': 'VHIR', 'property': {'duration': 200...",0.402639,0.296187,0.383216,0.159432,RIGHT,CM,356616,2353,6,H06,[VHIR]
11,Half,1st Half,1,2700000,0,0 days 00:00:13.520000,Gwangju FC,10,Huigyun Lee,"[{'event_name': 'Mistakes', 'property': {}}, {...",0.3989,0.2107,,,LEFT,CF,77414,4648,9,A09,"[Mistakes, Passes Received]"
12,Half,1st Half,1,2700000,0,0 days 00:00:14.480000,Gwangju FC,20,Kunhee Lee,"[{'event_name': 'Duels', 'property': {'Type': ...",0.3339,0.2127,,,LEFT,CF,250102,4648,10,A10,"[Duels, Recoveries]"
13,Half,1st Half,1,2700000,0,0 days 00:00:14.520000,Gimcheon Sangmu,25,Seunguk Park,"[{'event_name': 'Duels', 'property': {'Type': ...",0.3348,0.2109,,,RIGHT,CB,500576,2353,1,H01,[Duels]
