In [1]:
import os, sys
from tqdm import tqdm
sys.path.append(os.path.abspath(os.path.dirname(os.getcwd())))

import numpy as np
import pandas as pd
import ast
import matplotlib.pyplot as plt

from src.features import intended_receiver, extract_player_pos, extract_pass
from src.visualization import plot_action
from src.preprocess_data import make_freeze_frame
from src.labels import get_intended_receiver

# this is very useful as it makes sure that always all columns and rows of a data frame are displayed
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)

In [2]:
match1 = pd.read_csv('../metrica-data/preprocess-data/tracking-data/match1.csv')
match2 = pd.read_csv('../metrica-data/preprocess-data/tracking-data/match2.csv')
match3 = pd.read_csv('../metrica-data/preprocess-data/tracking-data/match3.csv')
all_events = pd.read_csv('../metrica-data/EPV-data/labelling-all-match.csv')
all_events['freeze_frame'] = all_events['freeze_frame'].apply(ast.literal_eval)
# all_events['Intended_Receiver'] = all_events['Baseline Intended_Receiver'].apply(ast.literal_eval)

In [3]:
len(all_events)

7300

In [4]:
all_events['eventName'].value_counts()

eventName
Pass              3314
CARRY             1395
CHALLENGE          879
RECOVERY           820
BALL LOST          372
SET PIECE          247
BALL OUT           140
SHOT                68
FAULT RECEIVED      54
CARD                11
Name: count, dtype: int64

#### Dribble 라벨링

In [5]:
carry_event = all_events[all_events['eventName']=='CARRY']

In [6]:
len(carry_event)

1395

In [7]:
carry_event = all_events[all_events['eventName']=='CARRY'][['eventName', 'start_frame', 'end_frame', 'from', 'event_id', 'game_id']].reset_index(drop=True)

accurate = []
for idx, row in carry_event.iterrows():
    #event로 알아내기
    df_carry = all_events[(all_events['start_frame'] >= row['start_frame'])
                             &(all_events['start_frame'] <= row['end_frame'])
                             &(all_events['game_id'] == row['game_id'])]
    event_list = list(df_carry['eventName'].values)
    event_list_num = [1 for event in event_list if event in ["BALL LOST", "BALL OUT"]]
    if sum(event_list_num) >= 1:
            accurate.append("fail")
    else:
            accurate.append("success")

carry_event['result'] = accurate
carry_event.head()

Unnamed: 0,eventName,start_frame,end_frame,from,event_id,game_id,result
0,CARRY,377,384,A07,3682,3,success
1,CARRY,426,465,A08,3684,3,success
2,CARRY,507,530,A02,3686,3,success
3,CARRY,580,598,A03,3688,3,success
4,CARRY,628,652,A04,3690,3,success


In [8]:
carry_event['result'].value_counts()

result
success    1179
fail        216
Name: count, dtype: int64

#### Shot 라벨링

In [9]:
shot_event = all_events[all_events['eventName']=='SHOT']

In [10]:
shot_event[(shot_event['goal']==1)&(shot_event['ownGoal']==1)]

Unnamed: 0,team,type,subtype,session,start_frame,start_time,end_frame,end_time,from,to,start_x,start_y,end_x,end_y,phase,goal,ownGoal,teamId,eventName,freeze_frame,accurate,value_label,Baseline Intended-Receiver,game_id,event_id,True Intended-receiver,no pass


In [11]:
shot_event = all_events[all_events['eventName']=='SHOT']
shot_event['result'] = shot_event['goal']
shot_event['result'] =  shot_event['result'].replace({0:'fail', 1:'success'})
shot_event.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shot_event['result'] = shot_event['goal']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  shot_event['result'] =  shot_event['result'].replace({0:'fail', 1:'success'})


Unnamed: 0,team,type,subtype,session,start_frame,start_time,end_frame,end_time,from,to,start_x,start_y,end_x,end_y,phase,goal,ownGoal,teamId,eventName,freeze_frame,accurate,value_label,Baseline Intended-Receiver,game_id,event_id,True Intended-receiver,no pass,result
34,Home,SHOT,HEAD-ON TARGET-GOAL,1,2289,91.56,2309,92.36,A09,,99.36,33.84,109.08,39.6,1,1,0,1,SHOT,"{'A11': {'teammate': True, 'actor': False, 'ba...",,0,{},1,34,,,success
79,Home,SHOT,OFF TARGET-OUT,1,5923,236.92,5953,238.12,A10,,90.72,19.44,112.32,43.2,1,0,0,1,SHOT,"{'A11': {'teammate': True, 'actor': False, 'ba...",,0,{},1,79,,,fail
110,Away,SHOT,OFF TARGET-OUT,1,7753,310.12,7789,311.56,B21,,21.6,21.6,-4.32,46.8,1,0,0,2,SHOT,"{'B25': {'teammate': True, 'actor': False, 'ba...",,0,{},1,110,,,fail
135,Home,SHOT,ON TARGET-SAVED,1,9628,385.12,9632,385.28,A08,,103.68,26.64,105.84,33.12,1,0,0,1,SHOT,"{'A11': {'teammate': True, 'actor': False, 'ba...",,0,{},1,135,,,fail
239,Home,SHOT,HEAD-OFF TARGET-OUT,1,18270,730.8,18301,732.04,A08,,101.52,39.6,111.24,43.92,1,0,0,1,SHOT,"{'A11': {'teammate': True, 'actor': False, 'ba...",,0,{},1,239,,,fail


#### Cross 라벨링

- Cross / Cross-Interception 모두 Cross를 지칭한다. (영상을 통해 확인 완료)
- eventName에 Pass로 분류되어 있으며, accurate도 들어가있다.

In [71]:
all_events.loc[all_events['subtype'].str.contains('CROSS'), 'eventName'] = 'CROSS'

In [73]:
cross_event = all_events[all_events['eventName']=='CROSS']
cross_event['result'] = cross_event['accurate']
cross_event['result'] =  cross_event['result'].replace({0:'fail', 1:'success'})
cross_event.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cross_event['result'] = cross_event['accurate']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  cross_event['result'] =  cross_event['result'].replace({0:'fail', 1:'success'})


Unnamed: 0,team,type,subtype,session,start_frame,start_time,end_frame,end_time,from,to,start_x,start_y,end_x,end_y,phase,goal,ownGoal,teamId,eventName,freeze_frame,accurate,value_label,Baseline Intended-Receiver,game_id,event_id,True Intended-receiver,no pass,result
33,Home,PASS,CROSS,1,2263,90.52,2289,91.56,A10,A09,96.12,10.08,99.36,33.84,1,0,0,1,CROSS,"{'A11': {'teammate': True, 'actor': False, 'ba...",1.0,1,"{'dist': {'ID': 'A09', 'end_x': 98.46468, 'end...",1,33,,,success
89,Away,BALL LOST,CROSS-INTERCEPTION,1,6987,279.48,7043,281.72,B22,,41.04,60.48,8.64,28.8,1,0,0,2,CROSS,"{'B25': {'teammate': True, 'actor': False, 'ba...",0.0,0,"{'dist': {'ID': 'B24', 'end_x': 29.20644, 'end...",1,89,B23,,fail
162,Away,BALL LOST,CROSS-INTERCEPTION,1,11987,479.48,12032,481.28,B18,,23.76,8.64,3.24,43.92,1,0,0,2,CROSS,"{'B25': {'teammate': True, 'actor': False, 'ba...",0.0,0,"{'dist': {'ID': 'B23', 'end_x': 15.55524, 'end...",1,162,B23,,fail
393,Away,BALL LOST,CROSS-INTERCEPTION,1,29016,1160.64,29061,1162.44,B21,,17.28,69.12,9.72,31.68,1,0,0,2,CROSS,"{'B25': {'teammate': True, 'actor': False, 'ba...",0.0,0,"{'dist': {'ID': 'B24', 'end_x': 15.13404, 'end...",1,393,B24,,fail
459,Away,BALL LOST,CROSS-INTERCEPTION,1,34619,1384.76,34644,1385.76,B21,,7.56,59.04,4.32,40.32,1,0,0,2,CROSS,"{'B25': {'teammate': True, 'actor': False, 'ba...",0.0,0,"{'dist': {'ID': 'B24', 'end_x': 9.828, 'end_y'...",1,459,B24,,fail


In [74]:
len(cross_event)

43

In [75]:
cross_event['subtype'].value_counts()

subtype
CROSS-INTERCEPTION    22
CROSS                 21
Name: count, dtype: int64

In [76]:
cross_event['accurate'].value_counts()

accurate
0.0    31
1.0    12
Name: count, dtype: int64

#### 패스 라벨링

In [77]:
# get all passes
pass_event = all_events[
                (all_events["eventName"] == "Pass")
                & (all_events["start_frame"] < all_events["end_frame"])
            ].copy()
pass_event['result'] = pass_event['accurate']
pass_event['result'] =  pass_event['result'].replace({0:'fail', 1:'success'})
pass_event.head()

Unnamed: 0,team,type,subtype,session,start_frame,start_time,end_frame,end_time,from,to,start_x,start_y,end_x,end_y,phase,goal,ownGoal,teamId,eventName,freeze_frame,accurate,value_label,Baseline Intended-Receiver,game_id,event_id,True Intended-receiver,no pass,result
1,Away,PASS,PASS,1,1,0.04,3,0.12,B19,B21,48.6,28.08,59.4,30.96,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B21', 'end_x': 59.66244, 'end...",1,1,,,success
2,Away,PASS,PASS,1,3,0.12,17,0.68,B21,B15,59.4,30.96,62.64,15.12,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B15', 'end_x': 63.06444, 'end...",1,2,,,success
3,Away,PASS,PASS,1,45,1.8,61,2.44,B15,B19,59.4,13.68,48.6,22.32,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B19', 'end_x': 48.33756, 'end...",1,3,,,success
4,Away,PASS,PASS,1,77,3.08,96,3.84,B19,B21,48.6,23.04,52.92,33.84,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B21', 'end_x': 52.67376, 'end...",1,4,,,success
5,Away,PASS,PASS,1,191,7.64,217,8.68,B21,B22,43.2,52.56,34.56,70.56,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B22', 'end_x': 33.16356, 'end...",1,5,,,success


In [78]:
pass_event['accurate'].value_counts()

accurate
1.0    2870
0.0     381
Name: count, dtype: int64

In [79]:
pass_event['result'].value_counts()

result
success    2870
fail        381
Name: count, dtype: int64

#### 최종 통합

In [80]:
all_events_final = all_events.copy()
pass_carry_shot = pd.concat([pass_event, carry_event, shot_event, cross_event], axis=0)

In [81]:
pass_carry_shot['result'].value_counts()

result
success    4069
fail        688
Name: count, dtype: int64

In [82]:
pass_carry_shot['eventName'].value_counts()

eventName
Pass     3251
CARRY    1395
SHOT       68
CROSS      43
Name: count, dtype: int64

In [86]:
all_events_f = pd.merge(all_events_final, pass_carry_shot[['event_id', 'result']], how='left', on='event_id')

In [87]:
all_events_f.head()

Unnamed: 0,team,type,subtype,session,start_frame,start_time,end_frame,end_time,from,to,start_x,start_y,end_x,end_y,phase,goal,ownGoal,teamId,eventName,freeze_frame,accurate,value_label,Baseline Intended-Receiver,game_id,event_id,True Intended-receiver,no pass,result
0,Away,SET PIECE,KICK OFF,1,1,0.04,0,0.0,B19,,,,,,1,0,0,2,SET PIECE,{},,0,{},1,0,,,
1,Away,PASS,PASS,1,1,0.04,3,0.12,B19,B21,48.6,28.08,59.4,30.96,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B21', 'end_x': 59.66244, 'end...",1,1,,,success
2,Away,PASS,PASS,1,3,0.12,17,0.68,B21,B15,59.4,30.96,62.64,15.12,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B15', 'end_x': 63.06444, 'end...",1,2,,,success
3,Away,PASS,PASS,1,45,1.8,61,2.44,B15,B19,59.4,13.68,48.6,22.32,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B19', 'end_x': 48.33756, 'end...",1,3,,,success
4,Away,PASS,PASS,1,77,3.08,96,3.84,B19,B21,48.6,23.04,52.92,33.84,1,0,0,2,Pass,"{'B25': {'teammate': True, 'actor': False, 'ba...",1.0,0,"{'dist': {'ID': 'B21', 'end_x': 52.67376, 'end...",1,4,,,success


In [88]:
all_events_f['eventName'].value_counts()

eventName
Pass              3271
CARRY             1395
CHALLENGE          879
RECOVERY           820
BALL LOST          372
SET PIECE          247
BALL OUT           140
SHOT                68
FAULT RECEIVED      54
CROSS               43
CARD                11
Name: count, dtype: int64

In [89]:
len(all_events_f)

7300

In [91]:
all_events_f.to_csv('all_events_final.csv')