In [1]:
from mplsoccer.pitch import Pitch
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import json
import math
import jsonlines
from skillcorner.client import SkillcornerClient
import pyarrow.parquet as pq

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [2]:
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [3]:
def load_jsonl_file(file_path):
    with jsonlines.open(file_path, 'r') as file:
        data = [line for line in file if line['timestamp'] is not None]
    return data

In [4]:
def explode_data(row):
    # Extract player data into a DataFrame
    players = pd.DataFrame(row['player_data'])
    players['object'] = 'player'
    
    # Extract ball data into a DataFrame
    ball = pd.DataFrame([row['ball_data']])
    ball['object'] = 'ball'
    ball['player_id'] = 0  
   
    # Combine player and ball data
    combined = pd.concat([players, ball], ignore_index=True)
    
    # Add other columns from the original row
    for col in row.index:
        if col not in ['player_data', 'ball_data']:
            combined[col] = row[col]
    
    return combined

In [29]:
def euclidean_distance(x1, y1, x2, y2):
    """Calculate the Euclidean distance between two points."""
    return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

In [5]:
match_id = 1133203
tracking_path = f'data/FA/tracking/{match_id}.jsonl'

file_path = f'data/FA/match/{match_id}.json'
match_data = load_json_file(file_path)
file_path = 'data/wyscout_events.parquet'
events_df = pd.read_parquet(file_path)
match_event_df = events_df[events_df['sk_match_id'] == match_id]

In [25]:
id_map = {player['short_name']: player['id'] for player in match_data['players']}

In [6]:
cols = ['frame', 'minute', 'player_name', 'pass_recipient_name', 'pass_recipient_id',
       'pass_recipient_position', 'sk_player_id', 'pass_angle','type_primary', 'type_secondary', 'location_x', 'location_y', 'pass_accurate',
       'pass_endlocation_x', 'pass_endlocation_y', 'player_position', 'sk_match_id', 
       'possession_team_name', 'possession_types']
match_event_df = match_event_df[cols]

In [38]:
match_event_pass = match_event_df[match_event_df['type_primary'] == 'pass'].reset_index(drop = True)

In [39]:
match_event_pass.head()

Unnamed: 0,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,player_position,sk_match_id,possession_team_name,possession_types
0,107,0,H. Ladd,K. Zelem,134338,DMF,64166,-153,pass,"[back_pass, short_or_medium_pass]",51,50,True,38,40,RCMF,1133203,Manchester United,
1,126,0,K. Zelem,G. George,289914,LB,68743,-104,pass,"[lateral_pass, short_or_medium_pass]",38,40,True,33,10,DMF,1133203,Manchester United,
2,158,0,G. George,M. Turner,258929,LCB,68681,104,pass,"[lateral_pass, short_or_medium_pass]",33,10,True,29,39,LB,1133203,Manchester United,
3,191,0,M. Turner,M. Le Tissier,559233,RCB,63767,93,pass,"[lateral_pass, short_or_medium_pass]",29,39,True,28,70,LCB,1133203,Manchester United,
4,222,0,M. Le Tissier,M. Turner,258929,LCB,68697,-88,pass,"[lateral_pass, short_or_medium_pass]",28,70,True,29,37,RCB,1133203,Manchester United,


In [45]:
match_event_pass[match_event_pass['pass_recipient_name'].isna()]

Unnamed: 0,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,player_position,sk_match_id,possession_team_name,possession_types
6,295,0,M. Le Tissier,,0,,68697,26,pass,"[forward_pass, long_pass, pass_to_final_third]",33,74,False,69,100,RCB,1133203,Manchester United,
39,2750,4,H. Blundell,,0,,57096,56,pass,"[progressive_pass, short_or_medium_pass, under_pressure]",32,96,False,34,100,RB,1133203,Manchester United,[throw_in]
79,4573,7,K. Zelem,,0,,68743,68,pass,"[cross, cross_blocked]",78,17,False,100,100,DMF,1133203,Manchester United,"[attack, transition_high]"
100,6034,9,R. Corsie,,0,,22628,-20,pass,"[forward_pass, long_pass, progressive_pass]",29,21,False,66,0,LCB,1133203,Aston Villa,
121,7272,11,R. Daly,,0,,22618,-49,pass,"[lateral_pass, pass_to_final_third, short_or_medium_pass]",57,20,False,69,0,CF,1133203,Aston Villa,
139,9150,15,L. Staniforth,,0,,22624,27,pass,"[forward_pass, long_pass, pass_to_final_third, progressive_pass]",30,45,True,84,87,LDMF,1133203,Aston Villa,[attack]
140,9211,15,A. Lehmann,,0,,64130,-148,pass,"[cross, cross_blocked]",91,90,False,0,0,RAMF,1133203,Aston Villa,[attack]
180,11484,18,Geyse Ferreira,,0,,22555,-72,pass,"[cross, deep_completed_cross, lateral_pass, pass_to_penalty_area, progressive_pass]",88,93,True,98,43,CF,1133203,Manchester United,"[attack, transition_medium]"
192,12195,20,S. Mayling,,0,,68672,180,pass,"[back_pass, short_or_medium_pass]",70,89,True,67,89,RB,1133203,Aston Villa,"[set_piece_attack, attack, free_kick]"
217,13154,21,Lucía García,,0,,22457,38,pass,"[cross, cross_blocked]",91,89,False,100,100,RW,1133203,Manchester United,"[attack, transition_medium]"


In [37]:
passing_frames = match_event_pass.frame.to_list()

In [44]:
for i in range(len(passing_frames)):
    start_frame = passing_frames[i]
    end_frame = passing_frames[i+1]
    row = match_event_pass.iloc[i]
    target_player_id = id_map[row['pass_recipient_name']]
    print(row['pass_recipient_name'])
    print(target_player_id)

K. Zelem
68743
G. George
68681
M. Turner
63767
M. Le Tissier
68697
M. Turner
63767
M. Le Tissier
68697


KeyError: <NA>

In [34]:
start_frame = 158  # Example frame number
end_frame = 191
target_player_id = id_map['M. Turner']  # Target player_id
distance_threshold = 2

In [27]:
target_player_id

68743

In [35]:
matching_line = None

with jsonlines.open(tracking_path, 'r') as file:
    start_checking = False  # Flag to start processing lines after the starting frame
    
    for line in file:
        frame = line.get('frame')
        
        # Stop processing if end_frame is reached
        if frame and frame >= end_frame:
            break
        
        # Check if we've reached the starting frame
        if not start_checking:
            if frame == start_frame:
                start_checking = True
                print(frame)
            continue

        # Process lines after the starting frame
        player_data = line.get('player_data', [])
        ball_data = line.get('ball_data', None)

        ball_x, ball_y = ball_data['x'], ball_data['y']
        
        for player in player_data:
            # Ensure the player_id matches
            if player.get('player_id') == target_player_id:
                # Extract player's coordinates
                player_x, player_y = player['x'], player['y']
                
                # Check the distance condition
                dist = euclidean_distance(player_x, player_y, ball_x, ball_y)
                print(dist)
                if dist <= distance_threshold:
                    matching_line = line
                    break

        if matching_line:
            break

158
11.464065596462712
10.113317952086742
8.714883820223882
7.405356169692313
6.143850584120679
4.961370778323267
3.8657082145449086
2.912902332725902
2.0163581031156137
1.260158720161869


In [36]:
matching_line

{'frame': 168,
 'timestamp': '00:00:06.80',
 'period': 1,
 'ball_data': {'x': 25.09, 'y': -15.82, 'z': 0.04, 'is_detected': True},
 'possession': {'player_id': 63767, 'group': 'away team'},
 'image_corners_projection': {'x_top_left': 6.67,
  'y_top_left': 35.11,
  'x_bottom_left': 1.42,
  'y_bottom_left': -27.5,
  'x_bottom_right': 20.55,
  'y_bottom_right': -33.53,
  'x_top_right': 53.26,
  'y_top_right': 20.97},
 'player_data': [{'x': -37.66,
   'y': -2.31,
   'player_id': 63552,
   'is_detected': False},
  {'x': -10.57, 'y': -11.08, 'player_id': 68719, 'is_detected': False},
  {'x': -10.75, 'y': -1.59, 'player_id': 22628, 'is_detected': False},
  {'x': -10.2, 'y': -21.86, 'player_id': 68672, 'is_detected': False},
  {'x': -12.12, 'y': 11.49, 'player_id': 68680, 'is_detected': False},
  {'x': 0.4, 'y': -5.01, 'player_id': 22624, 'is_detected': False},
  {'x': -0.34, 'y': -14.84, 'player_id': 68661, 'is_detected': False},
  {'x': 8.82, 'y': -12.65, 'player_id': 68675, 'is_detected': T