In [2]:
from mplsoccer.pitch import Pitch
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import json
import jsonlines
from skillcorner.client import SkillcornerClient
import pyarrow.parquet as pq

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [3]:
load_dotenv()
username = os.getenv('U')
password = os.getenv('P')
client = SkillcornerClient(username=username, password=password)

In [4]:
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [5]:
def load_jsonl_file(file_path):
    with jsonlines.open(file_path, 'r') as file:
        data = [line for line in file if line['timestamp'] is not None]
    return data

In [6]:
FA_matches = client.get_matches(params={'competition_edition': 574})
FA_match_ids = [m['id'] for m in FA_matches]

In [7]:
NWSL_matches = client.get_matches(params={'competition_edition': 800})
NWSL_match_ids = [m['id'] for m in NWSL_matches]

In [8]:
# Inspect the schema of the Parquet file
file_path = 'data/wyscout_events.parquet'
events_df = pd.read_parquet(file_path)

In [9]:
event_match_ids = list(set(events_df.sk_match_id))

In [10]:
FA_in_event = [i for i in FA_match_ids if i in event_match_ids]
len(FA_in_event)

102

In [11]:
FA_not_in_event = [i for i in FA_match_ids if i not in event_match_ids]
len(FA_not_in_event)

30

In [12]:
NWSL_in_event = [i for i in NWSL_match_ids if i in event_match_ids]
len(NWSL_in_event)

0

In [13]:
NWSL_not_in_event = [i for i in NWSL_match_ids if i not in event_match_ids]
len(NWSL_not_in_event)

189

In [14]:
cols = ['sk_match_id', 'frame', 'minute', 'player_name', 'pass_recipient_name', 'pass_recipient_id',
       'pass_recipient_position', 'sk_player_id', 'pass_angle','type_primary', 'location_x', 'location_y', 'pass_accurate',
       'pass_endlocation_x', 'pass_endlocation_y',
       'type_secondary', 'player_position']

In [15]:
FA_match_ids = FA_in_event

In [16]:
events_df = events_df[events_df['sk_match_id'].isin(FA_match_ids)][cols]

In [17]:
match_id = 1133203

In [18]:
match_data = load_json_file(rf'data/FA/match/{match_id}.json')

In [19]:
match_data

{'id': 1133203,
 'home_team_score': 1,
 'away_team_score': 2,
 'date_time': '2023-10-01T11:30:00Z',
 'stadium': {'id': 488,
  'name': 'Villa Park',
  'city': 'Birmingham',
  'capacity': 42682},
 'home_team': {'id': 2029,
  'name': 'Aston Villa, Women',
  'short_name': 'Aston Villa',
  'acronym': 'AVL'},
 'home_team_kit': {'id': 5450,
  'team_id': 2029,
  'season': {'id': 6,
   'start_year': 2019,
   'end_year': 2020,
   'name': '2019/2020'},
  'name': 'Home',
  'jersey_color': '#a50521',
  'number_color': '#ffffff'},
 'away_team': {'id': 2027,
  'name': 'Manchester United, Women',
  'short_name': 'Man Utd, W',
  'acronym': 'MUN'},
 'away_team_kit': {'id': 6537,
  'team_id': 2027,
  'season': {'id': 8,
   'start_year': 2021,
   'end_year': 2022,
   'name': '2021/2022'},
  'name': 'Away',
  'jersey_color': '#ffffff',
  'number_color': '#ff0000'},
 'home_team_coach': None,
 'away_team_coach': None,
 'competition_edition': {'id': 574,
  'competition': {'id': 127,
   'area': 'ENG',
   'name

In [20]:
home_id = match_data['home_team']['id']
away_id = match_data['away_team']['id']

In [21]:
home_p_ids = [p['id'] for p in match_data['players'] if p['team_id'] == home_id]
away_p_ids = [p['id'] for p in match_data['players'] if p['team_id'] == away_id]

In [22]:
match_event_df = events_df[(events_df['sk_match_id'] == match_id) & (events_df['type_primary'] == 'pass')]

In [23]:
# field_length = 105
# field_width = 68

# # Adjust coordinates to center [0, 0]
# match_event_df['pass_endlocation_x'] =  match_event_df['pass_endlocation_x'] * field_length / 100 - field_length / 2
# match_event_df['pass_endlocation_y'] =  field_width / 2 - match_event_df['pass_endlocation_y'] * field_width / 100

# match_event_df['location_x'] =  match_event_df['location_x'] * field_length / 100 - field_length / 2
# match_event_df['location_y'] =  field_width / 2 - match_event_df['location_y'] * field_width / 100

In [24]:
match_event_df.head()

Unnamed: 0,sk_match_id,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,type_secondary,player_position
106932,1133203,107,0,H. Ladd,K. Zelem,134338,DMF,64166,-153,pass,51,50,True,38,40,"[back_pass, short_or_medium_pass]",RCMF
106933,1133203,126,0,K. Zelem,G. George,289914,LB,68743,-104,pass,38,40,True,33,10,"[lateral_pass, short_or_medium_pass]",DMF
106934,1133203,158,0,G. George,M. Turner,258929,LCB,68681,104,pass,33,10,True,29,39,"[lateral_pass, short_or_medium_pass]",LB
106935,1133203,191,0,M. Turner,M. Le Tissier,559233,RCB,63767,93,pass,29,39,True,28,70,"[lateral_pass, short_or_medium_pass]",LCB
106936,1133203,222,0,M. Le Tissier,M. Turner,258929,LCB,68697,-88,pass,28,70,True,29,37,"[lateral_pass, short_or_medium_pass]",RCB


In [25]:
match_event_df['team'] = match_event_df['sk_player_id'].apply(lambda x: 'home' if x in home_p_ids else
                                                                  'away' if x in away_p_ids else None)

In [26]:
tracking_path = f'data/FA/tracking/{match_id}.jsonl'

In [27]:
with jsonlines.open(tracking_path, 'r') as file:
    frame_to_period = {line['frame']: line['period'] for line in file}

# Map the period to each row using the pre-built dictionary
match_event_df['period'] = match_event_df['frame'].map(frame_to_period)

In [28]:
# wt_json =[p for p in match_info['players'] if p['player_role']['position_group'] == 'Wide Attacker']

In [29]:
# wt_info = [p['first_name']+ ' '+ p['last_name'] for p in wt_json]

In [30]:
# wt_ids = [p['id'] for p in wt_json]
# wt_ids

In [31]:
match_event_df['in_wide_channel'] = match_event_df.apply(
    lambda row: True if row['pass_endlocation_x'] >= 66 and (row['pass_endlocation_y'] >= 81 or row['pass_endlocation_y'] <= 19) and row['pass_recipient_id'] != 0 else False, axis = 1)

In [32]:
match_event_df.in_wide_channel.value_counts()

in_wide_channel
False    675
True     100
Name: count, dtype: int64

In [173]:
match_event_df[match_event_df['in_wide_channel'] == True]

Unnamed: 0,sk_match_id,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,type_secondary,player_position,team,period,in_wide_channel
106975,1133203,1199,1,G. George,S. Mayling,230789,RB,68681.0,-4,pass,35,12,False,72,8,"[forward_pass, long_pass, loss, pass_to_final_third]",LB,away,1.0,True
106977,1133203,1245,1,L. Galton,E. Toone,401062,LCMF,68744.0,-61,pass,71,20,True,77,4,"[lateral_pass, recovery, counterpressing_recovery, short_or_medium_pass]",LW,away,1.0,True
106978,1133203,1274,1,E. Toone,L. Galton,134321,LW,62411.0,25,pass,77,4,True,93,16,"[forward_pass, progressive_pass, short_or_medium_pass]",LCMF,away,1.0,True
107051,1133203,3688,5,M. Le Tissier,D. Turner,212012,LB,68697.0,14,pass,42,68,False,76,81,"[forward_pass, long_pass, loss, pass_to_final_third]",RCB,away,1.0,True
107067,1133203,4214,6,L. Galton,G. George,289914,LB,68744.0,-45,pass,70,19,True,78,8,"[forward_pass, short_or_medium_pass]",LW,away,1.0,True
107070,1133203,4249,6,G. George,L. Galton,134321,LW,68681.0,180,pass,77,8,True,69,7,"[back_pass, short_or_medium_pass]",LB,away,1.0,True
107079,1133203,4453,7,L. Galton,E. Toone,401062,LCMF,68744.0,12,pass,65,2,True,87,9,"[forward_pass, pass_to_final_third, progressive_pass, short_or_medium_pass, under_pressure]",LW,away,1.0,True
107129,1133203,6514,10,K. Zelem,L. Galton,134321,LW,68743.0,-79,pass,65,39,True,68,18,"[lateral_pass, pass_to_final_third, short_or_medium_pass]",DMF,away,1.0,True
107133,1133203,6784,11,R. Daly,L. Blindkilde Brown,772422,AMF,22618.0,-3,pass,36,13,False,72,10,"[forward_pass, long_pass, pass_to_final_third, progressive_pass]",CF,home,1.0,True
107167,1133203,7810,12,E. Toone,L. Galton,134321,LW,62411.0,-39,pass,62,26,True,77,7,"[forward_pass, pass_to_final_third, short_or_medium_pass, under_pressure]",LCMF,away,1.0,True


In [33]:
match_event_df[match_event_df['frame'] == 21900]

Unnamed: 0,sk_match_id,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,type_secondary,player_position,team,period,in_wide_channel
309103,1133203,21900,36,L. Galton,K. Zelem,134338,DMF,68744,180,pass,72,15,True,67,15,"[back_pass, short_or_medium_pass]",LW,away,1.0,True
