In [1]:
from mplsoccer.pitch import Pitch
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import pandas as pd
import numpy as np
import os
import json
import jsonlines
from skillcorner.client import SkillcornerClient
import pyarrow.parquet as pq

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

In [2]:
load_dotenv()
username = os.getenv('U')
password = os.getenv('P')
client = SkillcornerClient(username=username, password=password)

In [3]:
def load_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    return data

In [4]:
def load_jsonl_file(file_path):
    with jsonlines.open(file_path, 'r') as file:
        data = [line for line in file if line['timestamp'] is not None]
    return data

In [5]:
FA_matches = client.get_matches(params={'competition_edition': 574})
FA_match_ids = [m['id'] for m in FA_matches]

In [7]:
# Inspect the schema of the Parquet file
file_path = 'data/wyscout_events.parquet'
events_df = pd.read_parquet(file_path)

In [8]:
event_match_ids = list(set(events_df.sk_match_id))

In [9]:
FA_in_event = [i for i in FA_match_ids if i in event_match_ids]
len(FA_in_event)

102

In [10]:
cols = ['frame', 'minute', 'player_name', 'pass_recipient_name', 'pass_recipient_id',
       'pass_recipient_position', 'sk_player_id', 'pass_angle','type_primary', 'type_secondary', 'location_x', 'location_y', 'pass_accurate',
       'pass_endlocation_x', 'pass_endlocation_y', 'player_position', 'sk_match_id', 
       'possession_team_name', 'possession_types']

In [11]:
FA_match_ids = FA_in_event

In [12]:
events_df = events_df[events_df['sk_match_id'].isin(FA_match_ids)][cols]

In [107]:
events_df.head()

Unnamed: 0,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,player_position,sk_match_id,possession_id,possession_team_id,possession_team_name,possession_types
106932,107,0,H. Ladd,K. Zelem,134338,DMF,64166,-153,pass,"[back_pass, short_or_medium_pass]",51,50,True,38,40,RCMF,1133203,1783543568,63195,Manchester United,
106933,126,0,K. Zelem,G. George,289914,LB,68743,-104,pass,"[lateral_pass, short_or_medium_pass]",38,40,True,33,10,DMF,1133203,1783543568,63195,Manchester United,
106934,158,0,G. George,M. Turner,258929,LCB,68681,104,pass,"[lateral_pass, short_or_medium_pass]",33,10,True,29,39,LB,1133203,1783543568,63195,Manchester United,
106935,191,0,M. Turner,M. Le Tissier,559233,RCB,63767,93,pass,"[lateral_pass, short_or_medium_pass]",29,39,True,28,70,LCB,1133203,1783543568,63195,Manchester United,
106936,222,0,M. Le Tissier,M. Turner,258929,LCB,68697,-88,pass,"[lateral_pass, short_or_medium_pass]",28,70,True,29,37,RCB,1133203,1783543568,63195,Manchester United,


In [13]:
match_id = 1133203

In [14]:
match_data = load_json_file(rf'data/FA/match/{match_id}.json')

In [38]:
match_data

{'id': 1133203,
 'home_team_score': 1,
 'away_team_score': 2,
 'date_time': '2023-10-01T11:30:00Z',
 'stadium': {'id': 488,
  'name': 'Villa Park',
  'city': 'Birmingham',
  'capacity': 42682},
 'home_team': {'id': 2029,
  'name': 'Aston Villa, Women',
  'short_name': 'Aston Villa',
  'acronym': 'AVL'},
 'home_team_kit': {'id': 5450,
  'team_id': 2029,
  'season': {'id': 6,
   'start_year': 2019,
   'end_year': 2020,
   'name': '2019/2020'},
  'name': 'Home',
  'jersey_color': '#a50521',
  'number_color': '#ffffff'},
 'away_team': {'id': 2027,
  'name': 'Manchester United, Women',
  'short_name': 'Man Utd, W',
  'acronym': 'MUN'},
 'away_team_kit': {'id': 6537,
  'team_id': 2027,
  'season': {'id': 8,
   'start_year': 2021,
   'end_year': 2022,
   'name': '2021/2022'},
  'name': 'Away',
  'jersey_color': '#ffffff',
  'number_color': '#ff0000'},
 'home_team_coach': None,
 'away_team_coach': None,
 'competition_edition': {'id': 574,
  'competition': {'id': 127,
   'area': 'ENG',
   'name

In [15]:
home_id = match_data['home_team']['id']
away_id = match_data['away_team']['id']

In [16]:
home_p_ids = [p['id'] for p in match_data['players'] if p['team_id'] == home_id]
away_p_ids = [p['id'] for p in match_data['players'] if p['team_id'] == away_id]

In [17]:
match_event_df = events_df[(events_df['sk_match_id'] == match_id) & (events_df['type_primary'] == 'pass')].reset_index(drop = True)

In [88]:
# field_length = 105
# field_width = 68

# # Adjust coordinates to center [0, 0]
# match_event_df['pass_endlocation_x'] =  match_event_df['pass_endlocation_x'] * field_length / 100 - field_length / 2
# match_event_df['pass_endlocation_y'] =  field_width / 2 - match_event_df['pass_endlocation_y'] * field_width / 100

# match_event_df['location_x'] =  match_event_df['location_x'] * field_length / 100 - field_length / 2
# match_event_df['location_y'] =  field_width / 2 - match_event_df['location_y'] * field_width / 100

In [89]:
match_event_df.head()

Unnamed: 0,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,player_position,sk_match_id
0,107,0,H. Ladd,K. Zelem,134338,DMF,64166,-153,pass,"[back_pass, short_or_medium_pass]",51,50,True,38,40,RCMF,1133203
1,126,0,K. Zelem,G. George,289914,LB,68743,-104,pass,"[lateral_pass, short_or_medium_pass]",38,40,True,33,10,DMF,1133203
2,158,0,G. George,M. Turner,258929,LCB,68681,104,pass,"[lateral_pass, short_or_medium_pass]",33,10,True,29,39,LB,1133203
3,191,0,M. Turner,M. Le Tissier,559233,RCB,63767,93,pass,"[lateral_pass, short_or_medium_pass]",29,39,True,28,70,LCB,1133203
4,222,0,M. Le Tissier,M. Turner,258929,LCB,68697,-88,pass,"[lateral_pass, short_or_medium_pass]",28,70,True,29,37,RCB,1133203


In [18]:
match_event_df['team'] = match_event_df['sk_player_id'].apply(lambda x: 'home' if x in home_p_ids else
                                                                  'away' if x in away_p_ids else None)

In [19]:
tracking_path = f'data/FA/tracking/{match_id}.jsonl'

In [20]:
with jsonlines.open(tracking_path, 'r') as file:
    frame_to_period = {line['frame']: line['period'] for line in file}

# Map the period to each row using the pre-built dictionary
match_event_df['period'] = match_event_df['frame'].map(frame_to_period)

In [93]:
# wt_json =[p for p in match_info['players'] if p['player_role']['position_group'] == 'Wide Attacker']

In [94]:
# wt_info = [p['first_name']+ ' '+ p['last_name'] for p in wt_json]

In [95]:
# wt_ids = [p['id'] for p in wt_json]
# wt_ids

Box length = 40.3, width = 16.5
L = 103.125, W = 65

In [122]:
W = 68
L = 105

In [115]:
((W - 40.3)/2)/W

0.20367647058823532

In [117]:
(40.3/2+W/2)/W

0.7963235294117647

In [125]:
match_event_df['in_wide_channel'] = match_event_df.apply(
    lambda row: True if row['pass_endlocation_x'] >= 66 and (row['pass_endlocation_y'] > 81 or row['pass_endlocation_y'] < 19) and row['pass_recipient_id'] != 0 else False, axis = 1)

In [126]:
match_event_df.in_wide_channel.value_counts()

in_wide_channel
False    677
True      98
Name: count, dtype: int64

In [127]:
result_df.columns

Index(['frame', 'minute', 'player_name', 'pass_recipient_name',
       'pass_recipient_id', 'pass_recipient_position', 'sk_player_id',
       'pass_angle', 'type_primary', 'type_secondary', 'location_x',
       'location_y', 'pass_accurate', 'pass_endlocation_x',
       'pass_endlocation_y', 'player_position', 'sk_match_id', 'team',
       'period', 'in_wide_channel'],
      dtype='object')

In [128]:
W*0.82-W/2, L*0.72-L/2

(21.759999999999998, 23.099999999999994)

In [129]:
true_indices = match_event_df[match_event_df['in_wide_channel'] == True].index
all_indices = true_indices.tolist() + (true_indices + 1).tolist()
all_indices = list(set(all_indices))
all_indices = [idx for idx in all_indices if idx < len(match_event_df)]
result_df = match_event_df.loc[all_indices]
result_df = result_df.sort_index()
result_df[['frame', 'minute', 'player_name', 'pass_recipient_name','location_x','location_y',
    'pass_endlocation_x', 'pass_endlocation_y']]

Unnamed: 0,frame,minute,player_name,pass_recipient_name,location_x,location_y,pass_endlocation_x,pass_endlocation_y
26,1199,1,G. George,S. Mayling,35,12,72,8
27,1245,1,L. Galton,E. Toone,71,20,77,4
28,1274,1,E. Toone,L. Galton,77,4,93,16
29,1297,1,L. Galton,R. Daly,93,16,62,74
68,4214,6,L. Galton,G. George,70,19,78,8
69,4249,6,G. George,L. Galton,77,8,69,7
70,4265,6,L. Galton,K. Zelem,69,7,63,28
77,4453,7,L. Galton,E. Toone,65,2,87,9
78,4545,7,E. Toone,K. Zelem,95,18,78,24
108,6514,10,K. Zelem,L. Galton,65,39,68,18


In [124]:
match_event_df[match_event_df['frame'] == 11013]

Unnamed: 0,frame,minute,player_name,pass_recipient_name,pass_recipient_id,pass_recipient_position,sk_player_id,pass_angle,type_primary,type_secondary,location_x,location_y,pass_accurate,pass_endlocation_x,pass_endlocation_y,player_position,sk_match_id,team,period,in_wide_channel
166,11013,18,K. Zelem,H. Ladd,11164,RCMF,68743,141,pass,"[back_pass, short_or_medium_pass]",70,81,True,66,87,DMF,1133203,away,1.0,False
