In [1]:
from mplsoccer.pitch import Pitch
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import json
import math
import jsonlines
from skillcorner.client import SkillcornerClient
import pyarrow.parquet as pq

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [2]:
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [3]:
def load_jsonl_file(file_path):
    with jsonlines.open(file_path, 'r') as file:
        data = [line for line in file if line['timestamp'] is not None]
    return data

In [4]:
def explode_data(row):
    # Extract player data into a DataFrame
    players = pd.DataFrame(row['player_data'])
    players['object'] = 'player'
    
    # Extract ball data into a DataFrame
    ball = pd.DataFrame([row['ball_data']])
    ball['object'] = 'ball'
    ball['player_id'] = 0  
   
    # Combine player and ball data
    combined = pd.concat([players, ball], ignore_index=True)
    
    # Add other columns from the original row
    for col in row.index:
        if col not in ['player_data', 'ball_data']:
            combined[col] = row[col]
    
    return combined

In [5]:
def euclidean_distance(x1, y1, x2, y2):
    """Calculate the Euclidean distance between two points."""
    return math.sqrt((x1 - x2)**2 + (y1 - y2)**2)

In [6]:
match_id = 1133203
tracking_path = f'data/FA/tracking/{match_id}.jsonl'

file_path = f'data/FA/match/{match_id}.json'
match_data = load_json_file(file_path)
file_path = 'data/wyscout_events.parquet'
events_df = pd.read_parquet(file_path)
match_event_df = events_df[events_df['sk_match_id'] == match_id]

In [36]:
match_data

{'id': 1133203,
 'home_team_score': 1,
 'away_team_score': 2,
 'date_time': '2023-10-01T11:30:00Z',
 'stadium': {'id': 488,
  'name': 'Villa Park',
  'city': 'Birmingham',
  'capacity': 42682},
 'home_team': {'id': 2029,
  'name': 'Aston Villa, Women',
  'short_name': 'Aston Villa',
  'acronym': 'AVL'},
 'home_team_kit': {'id': 5450,
  'team_id': 2029,
  'season': {'id': 6,
   'start_year': 2019,
   'end_year': 2020,
   'name': '2019/2020'},
  'name': 'Home',
  'jersey_color': '#a50521',
  'number_color': '#ffffff'},
 'away_team': {'id': 2027,
  'name': 'Manchester United, Women',
  'short_name': 'Man Utd, W',
  'acronym': 'MUN'},
 'away_team_kit': {'id': 6537,
  'team_id': 2027,
  'season': {'id': 8,
   'start_year': 2021,
   'end_year': 2022,
   'name': '2021/2022'},
  'name': 'Away',
  'jersey_color': '#ffffff',
  'number_color': '#ff0000'},
 'home_team_coach': None,
 'away_team_coach': None,
 'competition_edition': {'id': 574,
  'competition': {'id': 127,
   'area': 'ENG',
   'name

In [35]:
home_id = match_data['home_team']['id']
away_id = match_data['away_team']['id']
home_p_ids = [p['id'] for p in match_data['players'] if p['team_id'] == home_id]
away_p_ids = [p['id'] for p in match_data['players'] if p['team_id'] == away_id]

In [7]:
id_map = {player['short_name']: player['id'] for player in match_data['players']}

In [8]:
cols = ['frame', 'minute', 'player_name', 'pass_recipient_name', 'pass_recipient_id',
       'pass_recipient_position', 'sk_player_id', 'pass_angle','type_primary', 'type_secondary', 'location_x', 'location_y', 'pass_accurate',
       'pass_endlocation_x', 'pass_endlocation_y', 'player_position', 'sk_match_id', 
       'possession_team_name', 'possession_types']
match_event_df = match_event_df[cols]

In [9]:
match_event_pass = match_event_df[match_event_df['type_primary'] == 'pass'].reset_index(drop = True)

In [10]:
match_event_pass.head()

Unnamed: 0,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,player_position,sk_match_id,possession_team_name,possession_types
0,107,0,H. Ladd,K. Zelem,134338,DMF,64166,-153,pass,"[back_pass, short_or_medium_pass]",51,50,True,38,40,RCMF,1133203,Manchester United,
1,126,0,K. Zelem,G. George,289914,LB,68743,-104,pass,"[lateral_pass, short_or_medium_pass]",38,40,True,33,10,DMF,1133203,Manchester United,
2,158,0,G. George,M. Turner,258929,LCB,68681,104,pass,"[lateral_pass, short_or_medium_pass]",33,10,True,29,39,LB,1133203,Manchester United,
3,191,0,M. Turner,M. Le Tissier,559233,RCB,63767,93,pass,"[lateral_pass, short_or_medium_pass]",29,39,True,28,70,LCB,1133203,Manchester United,
4,222,0,M. Le Tissier,M. Turner,258929,LCB,68697,-88,pass,"[lateral_pass, short_or_medium_pass]",28,70,True,29,37,RCB,1133203,Manchester United,


In [11]:
#match_event_pass[match_event_pass['pass_recipient_name'].isna()]

In [12]:
passing_frames = match_event_pass.frame.to_list()

In [34]:
pitch_length = 105  # Length of the pitch
pitch_width = 68    # Width of the pitch
box_width = 40.3  

In [45]:
def check_target_area(pitch_length, period, team, x, y, box_width = 40.3):
    third_boundary = pitch_length / 6  # Boundary for attacking third
    wide_channel_limit = box_width/2  # Wide channel boundary on y-axis

    if team == "home":
        if period ==1:
            return (x > third_boundary) and (
                abs(y) > wide_channel_limit)
        elif period ==2:
            return (x < -third_boundary) and (
                abs(y) > wide_channel_limit)  
    elif team == 'away':
        if period ==1:
            return (x < -third_boundary) and (
                abs(y) > wide_channel_limit)  
        elif period ==2:
            return (x > third_boundary) and (
                abs(y) > wide_channel_limit)         

In [43]:
def get_receive_frame(tracking_path, team, pitch_length, start_frame, end_frame, target_player_id, distance_threshold = 2):
    matching_line = None
    
    with jsonlines.open(tracking_path, 'r') as file:
        start_checking = False  # Flag to start processing lines after the starting frame
        
        for line in file:
            frame = line.get('frame')
            
            # Stop processing if end_frame is reached
            if frame and frame >= end_frame:
                break
            
            # Check if we've reached the starting frame
            if not start_checking:
                if frame == start_frame:
                    start_checking = True
                continue
    
            # Process lines after the starting frame
            period = line['period']
            player_data = line.get('player_data', [])
            if len(player_data) == 0:
                continue
            ball_data = line.get('ball_data', None)
    
            ball_x, ball_y = ball_data['x'], ball_data['y']
            if not ball_x or not ball_y:
                continue
            
            for player in player_data:
                if player.get('player_id') == target_player_id:
                    player_x, player_y = player['x'], player['y']
                    
                    dist = euclidean_distance(player_x, player_y, ball_x, ball_y)
                    if dist <= distance_threshold:
                        matching_line = line
                        break
            
            target_area = check_target_area(pitch_length, period, team, player_x, player_y)
                
            if matching_line:
                break    
    if matching_line:
        return period, matching_line['frame'], player_x, player_y, target_area
    else: return None, None, None, None, None

In [14]:
id_map['Geyse Ferreira'] = 22555

In [48]:
%%time
distance_threshold = 2
for i in range(len(passing_frames)-1):
    start_frame = passing_frames[i]
    end_frame = passing_frames[i+1]
    row = match_event_pass.iloc[i]
    recipient = row['pass_recipient_name']
    if pd.notna(recipient):
        target_player_id = id_map[recipient]
        team = 'home' if target_player_id in home_p_ids else 'away' if target_player_id in away_p_ids else None        
        period, receive_frame, player_x, player_y, target_area = get_receive_frame(tracking_path, team, pitch_length, start_frame, end_frame, target_player_id)
        match_event_pass.at[i, 'period'] = period
        match_event_pass.at[i, 'receive_frame'] = receive_frame
        match_event_pass.at[i, 'receive_x'] = player_x
        match_event_pass.at[i, 'receive_y'] = player_y      
        match_event_pass.at[i, 'target_area'] = target_area           


CPU times: total: 8min 28s
Wall time: 8min 28s


In [18]:
match_event_pass.columns

Index(['frame', 'minute', 'player_name', 'pass_recipient_name',
       'pass_recipient_id', 'pass_recipient_position', 'sk_player_id',
       'pass_angle', 'type_primary', 'type_secondary', 'location_x',
       'location_y', 'pass_accurate', 'pass_endlocation_x',
       'pass_endlocation_y', 'player_position', 'sk_match_id',
       'possession_team_name', 'possession_types', 'receive_frame'],
      dtype='object')

In [18]:
cols_n = ['frame', 'receive_frame', 'minute', 'player_name', 'pass_recipient_name',
       'pass_recipient_id', 'pass_recipient_position', 'sk_player_id',
       'pass_angle', 'type_primary', 'type_secondary', 'location_x',
       'location_y', 'pass_accurate', 'pass_endlocation_x',
       'pass_endlocation_y', 'player_position', 'sk_match_id',
       'possession_team_name', 'possession_types']

In [50]:
match_event_pass_target = match_event_pass[match_event_pass['target_area'] == True]

In [52]:
match_event_pass_target.shape

(97, 24)

In [53]:
match_event_pass_target[cols_n].tail(20)

Unnamed: 0,frame,receive_frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,player_position,sk_match_id,possession_team_name,possession_types
585,42634,42642.0,69,S. Mayling,R. Daly,11461,CF,68672,-149,pass,"[back_pass, short_or_medium_pass]",82,95,True,72,87,RB,1133203,Aston Villa,"[attack, throw_in]"
594,43772,43799.0,71,R. Williams,Lucía García,369460,LW,68734,-60,pass,"[lateral_pass, pass_to_final_third, recovery, short_or_medium_pass]",65,35,True,71,17,CF,1133203,Manchester United,"[attack, transition_medium]"
601,44112,44115.0,72,A. Leon,Lucía García,369460,LW,22735,-90,pass,"[loss, progressive_pass, short_or_medium_pass]",21,94,False,21,90,RAMF,1133203,Manchester United,"[attack, throw_in]"
613,45044,45047.0,73,H. Ladd,N. Parris,11203,RW,64166,0,pass,"[forward_pass, head_pass, recovery, counterpressing_recovery, short_or_medium_pass]",64,80,True,65,80,RCMF,1133203,Manchester United,
614,45837,45860.0,74,E. Toone,N. Parris,11203,RW,62411,40,pass,"[forward_pass, long_pass]",71,55,True,94,84,LCMF,1133203,Manchester United,"[set_piece_attack, attack, free_kick]"
618,46374,46386.0,75,L. Parker,A. Leon,138911,RW,68661,15,pass,"[forward_pass, pass_to_penalty_area, second_assist, short_or_medium_pass]",77,75,True,88,79,RCMF,1133203,Aston Villa,[attack]
634,49034,49043.0,80,E. Toone,Lucía García,369460,LW,62411,-37,pass,"[forward_pass, pass_to_final_third, short_or_medium_pass]",64,16,True,75,3,LCMF,1133203,Manchester United,[attack]
645,51065,51085.0,83,G. George,Lucía García,369460,LW,68681,19,pass,"[deep_completion, forward_pass, long_pass, pass_to_final_third, pass_to_penalty_area, progressive_pass]",40,5,True,90,31,LB,1133203,Manchester United,"[attack, transition_medium]"
662,51863,51894.0,85,E. Toone,G. George,289914,LB,62411,-50,pass,"[lateral_pass, short_or_medium_pass]",73,35,True,89,7,LCMF,1133203,Manchester United,[attack]
673,52816,52840.0,86,K. Zelem,Lucía García,369460,LW,68743,-48,pass,"[lateral_pass, long_pass, pass_to_final_third]",63,42,True,83,9,DMF,1133203,Manchester United,"[attack, throw_in]"


In [24]:
match_event_merge = pd.merge(match_event_df, match_event_pass[['frame', 'receive_frame']], on = 'frame', how = 'outer')

In [25]:
match_event_merge[cols_n].head(30)

Unnamed: 0,frame,receive_frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,player_position,sk_match_id,possession_team_name,possession_types
0,107,115.0,0,H. Ladd,K. Zelem,134338.0,DMF,64166.0,-153.0,pass,"[back_pass, short_or_medium_pass]",51,50,True,38.0,40.0,RCMF,1133203,Manchester United,
1,126,145.0,0,K. Zelem,G. George,289914.0,LB,68743.0,-104.0,pass,"[lateral_pass, short_or_medium_pass]",38,40,True,33.0,10.0,DMF,1133203,Manchester United,
2,158,168.0,0,G. George,M. Turner,258929.0,LCB,68681.0,104.0,pass,"[lateral_pass, short_or_medium_pass]",33,10,True,29.0,39.0,LB,1133203,Manchester United,
3,191,208.0,0,M. Turner,M. Le Tissier,559233.0,RCB,63767.0,93.0,pass,"[lateral_pass, short_or_medium_pass]",29,39,True,28.0,70.0,LCB,1133203,Manchester United,
4,222,234.0,0,M. Le Tissier,M. Turner,258929.0,LCB,68697.0,-88.0,pass,"[lateral_pass, short_or_medium_pass]",28,70,True,29.0,37.0,RCB,1133203,Manchester United,
5,262,276.0,0,M. Turner,M. Le Tissier,559233.0,RCB,63767.0,79.0,pass,"[lateral_pass, short_or_medium_pass]",29,37,True,33.0,74.0,LCB,1133203,Manchester United,
6,295,,0,M. Le Tissier,,0.0,,68697.0,26.0,pass,"[forward_pass, long_pass, pass_to_final_third]",33,74,False,69.0,100.0,RCB,1133203,Manchester United,
7,329,,0,,,,,,,game_interruption,[ball_out],31,0,,,,,1133203,Manchester United,
8,409,,0,D. Turner,L. Staniforth,11471.0,LDMF,68680.0,67.0,throw_in,,32,0,True,35.0,10.0,LB,1133203,Aston Villa,[throw_in]
9,438,,0,L. Staniforth,D. Turner,212012.0,LB,22624.0,-144.0,pass,"[back_pass, short_or_medium_pass]",35,10,True,29.0,3.0,LDMF,1133203,Aston Villa,[throw_in]


In [18]:
start_frame = 191  # Example frame number
end_frame = 222
target_player_id = id_map['M. Le Tissier']  # Target player_id
distance_threshold = 2

In [19]:
f = get_receive_frame(tracking_path, start_frame, end_frame, target_player_id)

191
21.615929311505443
20.10224116858615
18.365037435300806
16.73526814843431
15.110013236261576
13.51719275589425
11.969214677663693
10.525991639745873
9.208800138997479
7.97295428307475
6.791178101036667
5.64825636811928
4.578995523037777
3.61311499955371
2.7985174646587443
2.1658485634965357
1.7262676501632066


In [20]:
f

208

In [35]:
matching_line = None

with jsonlines.open(tracking_path, 'r') as file:
    start_checking = False  # Flag to start processing lines after the starting frame
    
    for line in file:
        frame = line.get('frame')
        
        # Stop processing if end_frame is reached
        if frame and frame >= end_frame:
            break
        
        # Check if we've reached the starting frame
        if not start_checking:
            if frame == start_frame:
                start_checking = True
                print(frame)
            continue

        # Process lines after the starting frame
        player_data = line.get('player_data', [])
        ball_data = line.get('ball_data', None)

        ball_x, ball_y = ball_data['x'], ball_data['y']
        
        for player in player_data:
            # Ensure the player_id matches
            if player.get('player_id') == target_player_id:
                # Extract player's coordinates
                player_x, player_y = player['x'], player['y']
                
                # Check the distance condition
                dist = euclidean_distance(player_x, player_y, ball_x, ball_y)
                print(dist)
                if dist <= distance_threshold:
                    matching_line = line
                    break

        if matching_line:
            break

158
11.464065596462712
10.113317952086742
8.714883820223882
7.405356169692313
6.143850584120679
4.961370778323267
3.8657082145449086
2.912902332725902
2.0163581031156137
1.260158720161869


In [36]:
matching_line

{'frame': 168,
 'timestamp': '00:00:06.80',
 'period': 1,
 'ball_data': {'x': 25.09, 'y': -15.82, 'z': 0.04, 'is_detected': True},
 'possession': {'player_id': 63767, 'group': 'away team'},
 'image_corners_projection': {'x_top_left': 6.67,
  'y_top_left': 35.11,
  'x_bottom_left': 1.42,
  'y_bottom_left': -27.5,
  'x_bottom_right': 20.55,
  'y_bottom_right': -33.53,
  'x_top_right': 53.26,
  'y_top_right': 20.97},
 'player_data': [{'x': -37.66,
   'y': -2.31,
   'player_id': 63552,
   'is_detected': False},
  {'x': -10.57, 'y': -11.08, 'player_id': 68719, 'is_detected': False},
  {'x': -10.75, 'y': -1.59, 'player_id': 22628, 'is_detected': False},
  {'x': -10.2, 'y': -21.86, 'player_id': 68672, 'is_detected': False},
  {'x': -12.12, 'y': 11.49, 'player_id': 68680, 'is_detected': False},
  {'x': 0.4, 'y': -5.01, 'player_id': 22624, 'is_detected': False},
  {'x': -0.34, 'y': -14.84, 'player_id': 68661, 'is_detected': False},
  {'x': 8.82, 'y': -12.65, 'player_id': 68675, 'is_detected': T