In [69]:
from mplsoccer.pitch import Pitch
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import json
import jsonlines
from skillcorner.client import SkillcornerClient
import pyarrow.parquet as pq

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
load_dotenv()
username = os.getenv('U')
password = os.getenv('P')
client = SkillcornerClient(username=username, password=password)

In [31]:
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [29]:
def load_jsonl_file(file_path):
    with jsonlines.open(file_path, 'r') as file:
        data = [line for line in file if line['timestamp'] is not None]
    return data

In [4]:
FA_matches = client.get_matches(params={'competition_edition': 574})
FA_match_ids = [m['id'] for m in FA_matches]

In [5]:
NWSL_matches = client.get_matches(params={'competition_edition': 800})
NWSL_match_ids = [m['id'] for m in NWSL_matches]

In [25]:
# Inspect the schema of the Parquet file
file_path = 'data/wyscout_events.parquet'
events_df = pd.read_parquet(file_path)

In [7]:
event_match_ids = list(set(events_df.sk_match_id))

In [16]:
FA_in_event = [i for i in FA_match_ids if i in event_match_ids]
len(FA_in_event)

102

In [11]:
FA_not_in_event = [i for i in FA_match_ids if i not in event_match_ids]
len(FA_not_in_event)

30

In [12]:
NWSL_in_event = [i for i in NWSL_match_ids if i in event_match_ids]
len(NWSL_in_event)

0

In [13]:
NWSL_not_in_event = [i for i in NWSL_match_ids if i not in event_match_ids]
len(NWSL_not_in_event)

189

In [99]:
cols = ['sk_match_id', 'frame', 'location_x', 'location_y', 'type_primary',
       'type_secondary', 'pass_angle',
       'pass_endlocation_x', 'pass_endlocation_y','pass_recipient_name',
       'pass_recipient_position', 'player_name','sk_player_id',
       'player_position']

In [17]:
FA_match_ids = FA_in_event

In [100]:
events_df = events_df[events_df['sk_match_id'].isin(FA_match_ids)][cols]

In [47]:
events_df.head()

Unnamed: 0,sk_match_id,frame,location_x,location_y,matchtimestamp,carry_endlocation_x,type_primary,type_secondary,pass_accurate,pass_angle,pass_endlocation_x,pass_endlocation_y,pass_height,pass_length,pass_recipient_id,pass_recipient_name,pass_recipient_position,player_name,sk_player_id,player_position,controlled_phase_name,carry_endlocation_y,carry_progression
106932,1133203,107,51,50,00:00:00.862,,pass,"[back_pass, short_or_medium_pass]",True,-153,38,40,,15,134338,K. Zelem,DMF,H. Ladd,64166,RCMF,Phase 1,,
106933,1133203,126,38,40,00:00:02.903,,pass,"[lateral_pass, short_or_medium_pass]",True,-104,33,10,,21,289914,G. George,LB,K. Zelem,68743,DMF,Phase 1,,
106934,1133203,158,33,10,00:00:05.945,,pass,"[lateral_pass, short_or_medium_pass]",True,104,29,39,,20,258929,M. Turner,LCB,G. George,68681,LB,Phase 1,,
106935,1133203,191,29,39,00:00:09.187,,pass,"[lateral_pass, short_or_medium_pass]",True,93,28,70,,21,559233,M. Le Tissier,RCB,M. Turner,63767,LCB,Phase 1,,
106936,1133203,222,28,70,00:00:12.663,,pass,"[lateral_pass, short_or_medium_pass]",True,-88,29,37,,22,258929,M. Turner,LCB,M. Le Tissier,68697,RCB,Phase 1,,


In [114]:
match_id = 1133203

In [115]:
match_data = load_json_file(rf'data/FA/match/{match_id}.json')

In [116]:
home_p_ids = [p['id'] for p in match_data['players'] if p['team_id'] == home_id]
away_p_ids = [p['id'] for p in match_data['players'] if p['team_id'] == away_id]

In [132]:
match_event_df = events_df[(events_df['sk_match_id'] == match_id) & (events_df['type_primary'] == 'pass')]

In [133]:
match_event_df.head()

Unnamed: 0,sk_match_id,frame,location_x,location_y,type_primary,type_secondary,pass_angle,pass_endlocation_x,pass_endlocation_y,pass_recipient_name,pass_recipient_position,player_name,sk_player_id,player_position
106932,1133203,107,51,50,pass,"[back_pass, short_or_medium_pass]",-153,38,40,K. Zelem,DMF,H. Ladd,64166,RCMF
106933,1133203,126,38,40,pass,"[lateral_pass, short_or_medium_pass]",-104,33,10,G. George,LB,K. Zelem,68743,DMF
106934,1133203,158,33,10,pass,"[lateral_pass, short_or_medium_pass]",104,29,39,M. Turner,LCB,G. George,68681,LB
106935,1133203,191,29,39,pass,"[lateral_pass, short_or_medium_pass]",93,28,70,M. Le Tissier,RCB,M. Turner,63767,LCB
106936,1133203,222,28,70,pass,"[lateral_pass, short_or_medium_pass]",-88,29,37,M. Turner,LCB,M. Le Tissier,68697,RCB


In [119]:
field_length = 105
field_width = 68

# Adjust coordinates to center [0, 0]
match_event_df['pass_endlocation_x'] = match_event_df['pass_endlocation_x'] - field_length / 2
match_event_df['pass_endlocation_y'] = match_event_df['pass_endlocation_y'] - field_width / 2
match_event_df['location_x'] = match_event_df['location_x'] - field_length / 2
match_event_df['location_y'] = match_event_df['location_y'] - field_width / 2

In [120]:
match_event_df['team'] = match_event_df['sk_player_id'].apply(lambda x: 'home' if x in home_p_ids else
                                                                  'away' if x in away_p_ids else None)

In [121]:
tracking_path = f'data/FA/tracking/{match_id}.jsonl'

In [122]:
with jsonlines.open(tracking_path, 'r') as file:
    frame_to_period = {line['frame']: line['period'] for line in file}

# Map the period to each row using the pre-built dictionary
match_event_df['period'] = match_event_df['frame'].map(frame_to_period)

In [123]:
match_event_df.tail(20)

Unnamed: 0,sk_match_id,frame,location_x,location_y,type_primary,type_secondary,pass_angle,pass_endlocation_x,pass_endlocation_y,pass_recipient_name,pass_recipient_position,player_name,sk_player_id,player_position,team,period
310067,1133203,58791,-5.5,57.0,pass,"[back_pass, short_or_medium_pass]",180,-8.5,57.0,A. Patten,RCB,S. Mayling,68672,RB,home,2.0
310068,1133203,58800,-8.5,57.0,pass,"[forward_pass, long_pass, loss]",-28,11.5,41.0,M. Turner,LCB,A. Patten,68719,RCB,home,2.0
310069,1133203,58823,-16.5,-9.0,pass,"[head_pass, progressive_pass, short_or_medium_pass]",-90,-16.5,-13.0,J. Nobbs,RCMF,M. Turner,63767,LCB,away,2.0
310074,1133203,58879,2.5,48.0,pass,"[back_pass, short_or_medium_pass]",-174,-6.5,47.0,A. Patten,RCB,A. Leon,22735,RW,home,2.0
310075,1133203,58892,-6.5,47.0,pass,"[forward_pass, long_pass, loss, pass_to_final_third]",-4,18.5,44.0,M. Turner,LCB,A. Patten,68719,RCB,home,2.0
310076,1133203,58915,-23.5,-12.0,pass,"[forward_pass, head_pass, loss, progressive_pass, short_or_medium_pass]",-8,-17.5,-13.0,J. Nobbs,RCMF,M. Turner,63767,LCB,away,2.0
310077,1133203,58930,12.5,45.0,pass,"[forward_pass, head_pass, pass_to_final_third, recovery, counterpressing_recovery, short_or_medium_pass]",32,22.5,56.0,E. Salmon,LW,J. Nobbs,63765,RCMF,home,2.0
310078,1133203,58939,22.5,56.0,pass,[short_or_medium_pass],-80,24.5,39.0,A. Leon,RW,E. Salmon,62646,LW,home,2.0
310080,1133203,58958,-25.5,-6.0,pass,"[loss, progressive_pass, short_or_medium_pass]",83,-24.5,5.0,J. Nobbs,RCMF,M. Turner,63767,LCB,away,2.0
310081,1133203,58993,19.5,27.0,pass,"[back_pass, recovery, counterpressing_recovery, short_or_medium_pass]",-137,7.5,9.0,R. Corsie,LCB,J. Nobbs,63765,RCMF,home,2.0


In [124]:
home_id = match_data['home_team']['id']
away_id = match_data['away_team']['id']

In [125]:
wt_json =[p for p in match_info['players'] if p['player_role']['position_group'] == 'Wide Attacker']

In [126]:
wt_info = [p['first_name']+ ' '+ p['last_name'] for p in wt_json]

In [45]:
wt_ids = [p['id'] for p in wt_json]
wt_ids

[22608, 64066, 22735, 68744, 22457, 64130]

In [128]:
pitch_length = 105  # Length of the pitch
pitch_width = 68    # Width of the pitch
box_width = 40.3 

def is_wide_channel(row):
    third_boundary = pitch_length / 6  # Boundary for attacking third
    half_width = pitch_width / 2
    wide_channel_limit = half_width - box_width  # Wide channel boundary on y-axis
    
    if row['team'] == 'home':
        if row['period'] == 1:  # Home team attacking right
            return (row['pass_endlocation_x'] > third_boundary) and (
                abs(row['pass_endlocation_y']) > wide_channel_limit)
        elif row['period'] == 2:  # Home team attacking left
            return (row['pass_endlocation_x'] < -third_boundary) and (
                abs(row['pass_endlocation_y']) > wide_channel_limit)
    elif row['team'] == 'away':
        if row['period'] == 1:  # Away team attacking left
            return (row['pass_endlocation_x'] < -third_boundary) and (
                abs(row['pass_endlocation_y']) > wide_channel_limit)
        elif row['period'] == 2:  # Away team attacking right
            return (row['pass_endlocation_x'] > third_boundary) and (
                abs(row['pass_endlocation_y']) > wide_channel_limit)
    return False

In [129]:
match_event_df['in_wide_channel'] = match_event_df.apply(is_wide_channel, axis=1)

In [130]:
match_event_df.in_wide_channel.value_counts()

in_wide_channel
False    546
True     229
Name: count, dtype: int64

In [131]:
match_event_df[match_event_df['in_wide_channel'] == True]

Unnamed: 0,sk_match_id,frame,location_x,location_y,type_primary,type_secondary,pass_angle,pass_endlocation_x,pass_endlocation_y,pass_recipient_name,pass_recipient_position,player_name,sk_player_id,player_position,team,period,in_wide_channel
106933,1133203,126,-14.5,6.0,pass,"[lateral_pass, short_or_medium_pass]",-104,-19.5,-24.0,G. George,LB,K. Zelem,68743,DMF,away,1.0,True
106934,1133203,158,-19.5,-24.0,pass,"[lateral_pass, short_or_medium_pass]",104,-23.5,5.0,M. Turner,LCB,G. George,68681,LB,away,1.0,True
106935,1133203,191,-23.5,5.0,pass,"[lateral_pass, short_or_medium_pass]",93,-24.5,36.0,M. Le Tissier,RCB,M. Turner,63767,LCB,away,1.0,True
106936,1133203,222,-24.5,36.0,pass,"[lateral_pass, short_or_medium_pass]",-88,-23.5,3.0,M. Turner,LCB,M. Le Tissier,68697,RCB,away,1.0,True
106937,1133203,262,-23.5,3.0,pass,"[lateral_pass, short_or_medium_pass]",79,-19.5,40.0,M. Le Tissier,RCB,M. Turner,63767,LCB,away,1.0,True
106971,1133203,1055,-15.5,49.0,pass,"[lateral_pass, short_or_medium_pass]",-125,-25.5,25.0,M. Earps,GK,M. Le Tissier,68697,RCB,away,1.0,True
106972,1133203,1088,-25.5,25.0,pass,[short_or_medium_pass],-74,-23.5,15.0,M. Turner,LCB,M. Earps,22622,GK,away,1.0,True
106973,1133203,1115,-23.5,15.0,pass,[short_or_medium_pass],61,-18.5,30.0,M. Le Tissier,RCB,M. Turner,63767,LCB,away,1.0,True
106985,1133203,1580,34.5,-8.0,pass,"[cross, deep_completed_cross, key_pass, lateral_pass, touch_in_box]",90,34.5,23.0,A. Lehmann,RAMF,K. Hanson,64066,LAMF,home,1.0,True
106990,1133203,1621,-43.5,28.0,pass,"[forward_pass, long_pass, loss, progressive_pass, under_pressure]",-22,-27.5,17.0,L. Staniforth,LDMF,H. Blundell,57096,RB,away,1.0,True
