In [1]:
from mplsoccer.pitch import Pitch
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import json
import math
import jsonlines
from skillcorner.client import SkillcornerClient
import pyarrow.parquet as pq
from utils import load_json_file, load_jsonl_file, euclidean_distance, check_target_area, get_receive_frame, explode_data
%load_ext autoreload
%autoreload 2

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [2]:
tracking_files = os.listdir('data/FA/tracking')
tracking_files = [f for f in tracking_files if f.endswith('.jsonl')]
events_path = 'data/wyscout_events.parquet'
events_df = pd.read_parquet(events_path)
match_ids = [int(f.split('.')[0]) for f in tracking_files]
event_match_ids = list(set(events_df.sk_match_id))
match_ids = [i for i in match_ids if i in event_match_ids]
events_df = events_df[events_df['sk_match_id'].isin(match_ids)]

In [3]:
cols = ['sk_match_id','frame', 'minute', 'player_name', 'pass_recipient_name', 'pass_recipient_id', 'pass_recipient_position', 'sk_player_id', 'pass_angle','type_primary', 'type_secondary', 'location_x', 'location_y', 'pass_accurate', 'team_name', 'possession_team_name_src','possession_types']
events_df = events_df[cols]

In [4]:
recipient_names = set(events_df.pass_recipient_name.dropna())

In [5]:
short_name_map = {}
for i in match_ids:
    match_path = f'data/FA/match/{i}.json'
    match_data = load_json_file(match_path)
    home_team = match_data['home_team']['short_name']
    away_team = match_data['away_team']['short_name']
    player_data = match_data['players']
    for p in player_data:
        if p['short_name'] not in short_name_map.keys():
            short_name_map[p['short_name']] = p['id']

In [6]:
len(short_name_map.keys())

323

In [7]:
missing = [n for n in recipient_names if n not in short_name_map.keys()]
missing

['A. James', 'J. Olme', 'S. Fearne', 'Geyse Ferreira']

In [8]:
short_name_map['Geyse Ferreira'] = 22555
short_name_map['A. James'] = 64167
short_name_map['J. Olme'] = 22800
short_name_map['S. Fearne'] = 809180

In [9]:
events_df_pass = events_df[(events_df['type_primary'] == 'pass') & (events_df['pass_accurate'] == True) &
(~events_df['pass_recipient_name'].isna())]

In [10]:
events_df_pass[events_df_pass['sk_match_id'] == 1133200]

Unnamed: 0,sk_match_id,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,team_name,possession_team_name_src,possession_types
110457,1133200,40518,66,Laia Aleixandri,C. Kelly,290005,RWF,64095,11,pass,"[forward_pass, progressive_pass, short_or_medium_pass]",42,87,True,Manchester City,Manchester City,"[attack, transition_low]"
110458,1133200,40537,66,C. Kelly,Laia Aleixandri,369453,RB,57068,-168,pass,"[back_pass, short_or_medium_pass]",62,92,True,Manchester City,Manchester City,"[attack, transition_low]"
110459,1133200,40560,66,Laia Aleixandri,A. Kennedy,61834,RCB,64095,-149,pass,"[back_pass, short_or_medium_pass]",40,86,True,Manchester City,Manchester City,"[attack, transition_low]"
110460,1133200,40618,66,A. Kennedy,K. Keating,730306,GK,22569,-108,pass,"[lateral_pass, short_or_medium_pass]",26,72,True,Manchester City,Manchester City,"[attack, transition_low]"
110461,1133200,40693,66,K. Keating,Y. Hasegawa,238894,DMF,68732,-8,pass,"[forward_pass, short_or_medium_pass]",21,50,True,Manchester City,Manchester City,"[attack, transition_low]"
110462,1133200,40720,66,Y. Hasegawa,A. Greenwood,11186,LCB,22684,-119,pass,"[lateral_pass, short_or_medium_pass]",28,48,True,Manchester City,Manchester City,"[attack, transition_low]"
110463,1133200,40747,66,A. Greenwood,A. Kennedy,61834,RCB,22604,83,pass,"[lateral_pass, short_or_medium_pass]",22,32,True,Manchester City,Manchester City,"[attack, transition_low]"
110465,1133200,40795,67,A. Kennedy,Laia Aleixandri,369453,RB,22569,28,pass,"[forward_pass, short_or_medium_pass]",41,76,True,Manchester City,Manchester City,"[attack, transition_low]"
110466,1133200,40847,67,Laia Aleixandri,A. Kennedy,61834,RCB,64095,-149,pass,"[back_pass, short_or_medium_pass]",61,92,True,Manchester City,Manchester City,"[attack, transition_low]"
110467,1133200,40895,67,A. Kennedy,A. Greenwood,11186,LCB,22569,-137,pass,"[back_pass, short_or_medium_pass]",44,77,True,Manchester City,Manchester City,"[attack, transition_low]"


In [15]:
pass_files = os.listdir('data/pass_data')
existing_ids = [i.split('_')[0] for i in pass_files]

In [16]:
left_ids = [i for i in match_ids if str(i) not in existing_ids]

In [17]:
left_ids

[1453349, 1454436, 1454967, 1454968, 1454969, 1454970, 1618208]

In [11]:
len(match_ids)

102

In [18]:
%%time
for match_id in left_ids:
    print(match_id)
    match_path = f'data/FA/match/{match_id}.json'
    match_data = load_json_file(match_path)
    tracking_path = f'data/FA/tracking/{match_id}.jsonl'

    home_id = match_data['home_team']['id']
    away_id = match_data['away_team']['id']
    home_p_ids = {p['id'] for p in match_data['players'] if p['team_id'] == home_id}
    away_p_ids = {p['id'] for p in match_data['players'] if p['team_id'] == away_id}
    home_start = match_data['home_team_side'][0]

    match_event_pass = events_df_pass[events_df_pass['sk_match_id'] == match_id].reset_index(drop=True)
    passing_frames = match_event_pass['frame'].to_numpy()  # Use numpy for faster indexing
    
    pitch_length = match_data['pitch_length']
    box_width = 40.3 
    distance_threshold = 2

    results = []  # Collect results for bulk updating
    
    for i in range(len(passing_frames)):
        start_frame = passing_frames[i]
        if i<len(passing_frames)-1:
            end_frame = passing_frames[i + 1]
            end_frame = min(end_frame, 100+start_frame)
        else: end_frame = 100+start_frame
        
        row = match_event_pass.iloc[i]
        recipient = row['pass_recipient_name']

        target_player_id = short_name_map[recipient]
        team = 'home' if target_player_id in home_p_ids else 'away' if target_player_id in away_p_ids else None
        
        # Get receive frame details
        
        result = get_receive_frame(
            tracking_path, team, home_start, pitch_length, start_frame, end_frame, target_player_id, distance_threshold
        )
        results.append(result)
        
    # Convert results into DataFrame and merge with match_event_pass
    results_df = pd.DataFrame(
        results, columns=['period', 'receive_frame', 'receive_x', 'receive_y', 'target_area']
    )
    match_event_pass = pd.concat([match_event_pass, results_df], axis=1)
    match_event_pass = match_event_pass[match_event_pass['target_area']==True]
            
    match_event_pass.to_csv(f'data/pass_data/{match_id}_pass.csv', index = False)

1453349
1454436
1454967
1454968
1454969
1454970
1618208
CPU times: total: 1h 1min 18s
Wall time: 1h 1min 37s


In [53]:
len(results_df)

456

In [63]:
match_event_pass

Unnamed: 0,sk_match_id,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,possession_team_name,possession_types,period,receive_frame,receive_x,receive_y,target_area
0,1145391,127,0,S. Blackstenius,J. Beattie,11167,CB,22795.0,175,pass,"[back_pass, short_or_medium_pass]",51,51,True,Arsenal,,1.0,142.0,23.3,1.13,False
1,1145391,153,0,J. Beattie,Laia Codina,579381,LCB3,22629.0,-83,pass,"[lateral_pass, short_or_medium_pass]",30,55,True,Arsenal,,1.0,162.0,22.7,-14.99,False
2,1145391,179,0,Laia Codina,A. Ilestedt,52105,RCB3,70097.0,87,pass,"[lateral_pass, short_or_medium_pass]",32,30,True,Arsenal,,1.0,197.0,23.76,17.48,False
3,1145391,400,0,M. Turner,M. Earps,11384,GK,63767.0,110,pass,"[lateral_pass, short_or_medium_pass]",22,20,True,Manchester United,"[set_piece_attack, free_kick]",1.0,414.0,-41.66,10.17,False
4,1145391,506,0,M. Earps,M. Le Tissier,559233,RCB,22622.0,80,pass,[short_or_medium_pass],14,53,True,Manchester United,"[set_piece_attack, free_kick]",1.0,513.0,-36.2,-14.28,False
5,1145391,522,0,M. Le Tissier,M. Earps,11384,GK,68697.0,-115,pass,"[lateral_pass, short_or_medium_pass]",16,69,True,Manchester United,"[set_piece_attack, free_kick]",1.0,534.0,-42.7,-3.62,False
6,1145391,573,0,M. Earps,M. Le Tissier,559233,RCB,22622.0,77,pass,"[lateral_pass, short_or_medium_pass]",9,44,True,Manchester United,"[set_piece_attack, free_kick]",1.0,582.0,-41.79,-15.07,False
7,1145391,597,0,M. Le Tissier,M. Earps,11384,GK,68697.0,-110,pass,"[lateral_pass, short_or_medium_pass]",12,69,True,Manchester United,"[set_piece_attack, free_kick]",1.0,615.0,-44.83,-1.3,False
8,1145391,633,0,M. Earps,M. Turner,258929,LCB,22622.0,-58,pass,[short_or_medium_pass],8,48,True,Manchester United,"[set_piece_attack, free_kick]",1.0,650.0,-39.53,17.83,False
9,1145391,696,1,M. Turner,M. Le Tissier,559233,RCB,63767.0,108,pass,"[lateral_pass, short_or_medium_pass]",14,33,True,Manchester United,"[set_piece_attack, free_kick]",1.0,711.0,-43.1,-11.35,False


In [64]:
results_df

Unnamed: 0,period,receive_frame,receive_x,receive_y,target_area
0,1.0,142.0,23.3,1.13,False
1,1.0,162.0,22.7,-14.99,False
2,1.0,197.0,23.76,17.48,False
3,1.0,414.0,-41.66,10.17,False
4,1.0,513.0,-36.2,-14.28,False
5,1.0,534.0,-42.7,-3.62,False
6,1.0,582.0,-41.79,-15.07,False
7,1.0,615.0,-44.83,-1.3,False
8,1.0,650.0,-39.53,17.83,False
9,1.0,711.0,-43.1,-11.35,False


In [56]:
match_event_pass[match_event_pass['target_area'] == True]

KeyError: 'target_area'