In [23]:
import matplotlib.pyplot as plt
from dotenv import load_dotenv
from scipy.optimize import minimize
import pandas as pd
import numpy as np
import seaborn as sns
import os
import json
import math
import jsonlines
import pyarrow.parquet as pq
from utils import load_json_file, load_jsonl_file, euclidean_distance, check_target_area, get_receive_frame, explode_data, find_ball
import plotly.graph_objects as go
import soccerfield

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
short_name_map = load_json_file('data/short_name_map.json')

In [31]:
events_path = 'data/wyscout_events.parquet'
events_df = pd.read_parquet(events_path)

In [32]:
events_df =events_df[~events_df['type_primary'].isin(['duel', 'touch', 'acceleration'])]

In [33]:
events_df.type_primary.value_counts()

type_primary
pass                 192449
interception          20976
game_interruption     18627
throw_in              11903
shot                   5470
free_kick              5402
infraction             4697
clearance              3669
goal_kick              3657
corner                 2444
shot_against           2333
offside                 896
goalkeeper_exit         733
penalty                  58
fairplay                 44
own_goal                 24
Name: count, dtype: Int64

In [34]:
pass_files = os.listdir('data/pass_data_2')
len(pass_files)

102

In [35]:
pass_df = pd.read_csv('data/pass_data_2/1133199_pass.csv')
pass_df.head()

Unnamed: 0,sk_match_id,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,team_name,possession_team_name,possession_team_name_src,possession_types,period,receive_frame,receive_x,receive_y,target_area
0,1133199,558,0,D. Spence,C. Bizet Ildhusøy,501647,RCMF3,57095,56,pass,['lateral_pass' 'short_or_medium_pass'],70,65,True,Tottenham Hotspur,Tottenham Hotspur,Tottenham Hotspur,['attack' 'transition_low'],1.0,567.0,26.96,-22.12,True
1,1133199,899,1,C. Bizet Ildhusøy,M. Bartrip,134320,LCB,62787,-157,pass,['back_pass' 'short_or_medium_pass'],89,88,True,Tottenham Hotspur,Tottenham Hotspur,Tottenham Hotspur,['corner' 'set_piece_attack' 'attack'],1.0,923.0,18.99,-22.19,True
2,1133199,1788,2,J. Rytting Kaneryd,J. Rytting Kaneryd,248585,RWB,63556,175,pass,['back_pass' 'carry' 'short_or_medium_pass'],83,96,True,Chelsea,Chelsea,Chelsea,['set_piece_attack' 'attack' 'free_kick'],1.0,1788.0,-37.5,32.77,True
3,1133199,1911,3,J. Carter,N. Charles,401061,LWB,57093,-66,pass,['short_or_medium_pass'],73,38,True,Chelsea,Chelsea,Chelsea,['attack' 'throw_in'],1.0,1926.0,-32.45,-28.15,True
4,1133199,2449,3,D. Spence,A. James,11163,RB,57095,60,pass,['lateral_pass' 'pass_to_final_third' 'short_or_medium_pass'],63,55,True,Tottenham Hotspur,Tottenham Hotspur,Tottenham Hotspur,['attack' 'transition_medium'],1.0,2462.0,17.64,-22.19,True


In [14]:
# follow = os.listdir('data/follow_data')
# match_ids = [f.split('_')[0] for f in follow]

In [36]:
for f in pass_files:
    match_id = int(f.split('_')[0])
    pass_df = pd.read_csv(f'data/pass_data_2/{f}')
    
    match_event_df = events_df[events_df['sk_match_id'] == match_id].reset_index(drop = True)
    follow_data = []
    for idx, row in pass_df.iterrows():
        pass_frame = row['frame']
        start_possession = row['possession_team_name']
        event_idx = match_event_df.index[match_event_df['frame'] == pass_frame].tolist()[0]

        follow_dict = {}
        if event_idx > len(match_event_df) - 4:
            for j in range(4):
                follow_dict[f'ball_x_{j}'] = None
                follow_dict[f'ball_y_{j}'] = None
                follow_dict[f'player_{j}'] = None
            follow_data.append(follow_dict) 
        else:           
            receive_frame = row['receive_frame']
            follow_dict['ball_x_0'], follow_dict['ball_y_0'] = find_ball(match_id, receive_frame)
            for i in range(1, 4):
                action = match_event_df.iloc[event_idx + i]
                possession = action['possession_team_name']
                player_id = action['sk_player_id']
                if possession is not pd.NA and possession == start_possession:
                    frame = action['frame']
                    ball_x, ball_y = find_ball(match_id, frame)
                    follow_dict[f'ball_x_{i}'] = ball_x
                    follow_dict[f'ball_y_{i}'] = ball_y
                    follow_dict[f'player_{i}'] = player_id
                else:
                    # Assign None to all remaining keys and stop looping
                    for j in range(i, 4):
                        follow_dict[f'ball_x_{j}'] = None
                        follow_dict[f'ball_y_{j}'] = None
                        follow_dict[f'player_{j}'] = None
                    break
            follow_data.append(follow_dict)    
    follow_df = pd.DataFrame(follow_data)
    pass_df = pd.concat([pass_df, follow_df], axis=1)
    pass_df.to_csv(f'data/follow_data/{match_id}_follow.csv', index = False)

In [38]:
pass_df

Unnamed: 0,sk_match_id,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,team_name,possession_team_name_src,possession_types,period,receive_frame,receive_x,receive_y,target_area,ball_x_1,ball_y_1,ball_x_2,ball_y_2,ball_x_3,ball_y_3,ball_x_0,ball_y_0,ball_x_1.1,ball_y_1.1,ball_x_2.1,ball_y_2.1,ball_x_3.1,ball_y_3.1,ball_x_0.1,ball_y_0.1,ball_x_1.2,ball_y_1.2,ball_x_2.2,ball_y_2.2,ball_x_3.2,ball_y_3.2,ball_x_4,ball_y_4
0,1133199,558,0,D. Spence,C. Bizet Ildhusøy,501647,RCMF3,57095,56,pass,['lateral_pass' 'short_or_medium_pass'],70,65,True,Tottenham Hotspur,Tottenham Hotspur,['attack' 'transition_low'],1.0,567.0,26.96,-22.12,True,47.2,-12.22,,,,,18.98,-15.39,47.2,-12.22,,,,,18.98,-15.39,47.2,-12.22,,,,,,
1,1133199,899,1,C. Bizet Ildhusøy,M. Bartrip,134320,LCB,62787,-157,pass,['back_pass' 'short_or_medium_pass'],89,88,True,Tottenham Hotspur,Tottenham Hotspur,['corner' 'set_piece_attack' 'attack'],1.0,923.0,18.99,-22.19,True,,,,,,,37.27,-29.96,,,,,,,37.27,-29.96,,,,,,,,
2,1133199,1788,2,J. Rytting Kaneryd,J. Rytting Kaneryd,248585,RWB,63556,175,pass,['back_pass' 'carry' 'short_or_medium_pass'],83,96,True,Chelsea,Chelsea,['set_piece_attack' 'attack' 'free_kick'],1.0,1788.0,-37.5,32.77,True,,,,,,,-36.61,32.94,,,,,,,-36.61,32.94,,,,,,,,
3,1133199,1911,3,J. Carter,N. Charles,401061,LWB,57093,-66,pass,['short_or_medium_pass'],73,38,True,Chelsea,Chelsea,['attack' 'throw_in'],1.0,1926.0,-32.45,-28.15,True,,,,,,,-21.2,-11.14,-35.55,-18.61,,,,,-21.2,-11.14,-35.55,-18.61,,,,,,
4,1133199,2449,3,D. Spence,A. James,11163,RB,57095,60,pass,['lateral_pass' 'pass_to_final_third' 'short_or_medium_pass'],63,55,True,Tottenham Hotspur,Tottenham Hotspur,['attack' 'transition_medium'],1.0,2462.0,17.64,-22.19,True,,,,,,,13.69,-7.74,21.88,-22.03,,,,,13.69,-7.74,21.88,-22.03,,,,,,
5,1133199,2618,4,A. Neville,G. Clinton,684474,LWF,68736,4,pass,['forward_pass' 'pass_to_final_third' 'progressive_pass'\n 'short_or_medium_pass'],59,6,True,Tottenham Hotspur,Tottenham Hotspur,['attack' 'transition_medium'],1.0,2626.0,23.65,28.73,True,,,,,,,,,,,,,,,14.8,24.97,21.94,28.19,12.2,24.43,-5.98,21.79,-25.37,-7.67
6,1133199,5555,9,L. James,N. Charles,401061,LWB,146967,12,pass,['forward_pass' 'pass_to_final_third' 'progressive_pass'\n 'short_or_medium_pass'],63,5,True,Chelsea,Chelsea,['attack'],1.0,5560.0,-21.53,-31.89,True,,,,,,,,,,,,,,,-15.45,-32.76,-30.38,-27.07,-30.81,-18.74,-35.69,-7.98,,
7,1133199,7105,11,D. Spence,G. Clinton,684474,LWF,57095,0,pass,['forward_pass' 'linkup_play' 'pass_to_final_third' 'short_or_medium_pass'],64,12,True,Tottenham Hotspur,Tottenham Hotspur,['attack' 'transition_low'],1.0,7113.0,25.91,30.43,True,,,,,,,,,,,,,,,19.5,25.19,27.59,21.26,26.8,17.0,21.39,14.65,,
8,1133199,8871,14,M. Thomas,C. Bizet Ildhusøy,501647,RCMF3,62874,37,pass,['forward_pass' 'linkup_play' 'pass_to_final_third' 'short_or_medium_pass'],56,67,True,Tottenham Hotspur,Tottenham Hotspur,['set_piece_attack' 'attack' 'free_kick'],1.0,8882.0,18.85,-33.35,True,,,,,,,,,,,,,,,6.91,-22.28,,,,,,,,
9,1133199,12428,20,D. Spence,C. Bizet Ildhusøy,501647,RCMF3,57095,5,pass,['forward_pass' 'linkup_play' 'pass_to_final_third' 'progressive_pass'\n 'short_or_medium_pass'],44,78,True,Tottenham Hotspur,Tottenham Hotspur,['transition_low'],1.0,12446.0,19.63,-28.65,True,,,,,,,,,,,,,,,1.53,-20.48,,,,,,,,


In [12]:
action_1

sk_match_id                                                      1133199
wy_match_id                                                      5509649
frame                                                                614
is_matched                                                          True
frame_tracking_data_available                                       True
is_matched_applicable                                               True
aerialduel_firsttouch                                               <NA>
aerialduel_height                                                   <NA>
aerialduel_opponent_height                                          <NA>
aerialduel_opponent_id                                              <NA>
aerialduel_opponent_name                                            <NA>
aerialduel_opponent_position                                        <NA>
aerialduel_relatedduelid                                            <NA>
carry_endlocation_x                                