In [1]:
import matplotlib.pyplot as plt
from dotenv import load_dotenv
from scipy.optimize import minimize
import pandas as pd
import numpy as np
import seaborn as sns
import os
import json
import math
import jsonlines
import pyarrow.parquet as pq
from utils import *
import plotly.graph_objects as go
import soccerfield

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
%load_ext autoreload
%autoreload 2

In [2]:
short_name_map = load_json_file('data/short_name_map.json')

In [3]:
events_path = 'data/wyscout_events.parquet'
events_df = pd.read_parquet(events_path)

In [4]:
events_df =events_df[~events_df['type_primary'].isin(['duel', 'touch', 'acceleration'])]

In [5]:
events_df.type_primary.value_counts()

type_primary
pass                 192449
interception          20976
game_interruption     18627
throw_in              11903
shot                   5470
free_kick              5402
infraction             4697
clearance              3669
goal_kick              3657
corner                 2444
shot_against           2333
offside                 896
goalkeeper_exit         733
penalty                  58
fairplay                 44
own_goal                 24
Name: count, dtype: Int64

In [6]:
pass_files = os.listdir('data/pass_data_2')
len(pass_files)

102

In [8]:
for f in pass_files:
    match_id = int(f.split('_')[0])
    tracking_data = load_jsonl_file(f'data/FA/tracking/{match_id}.jsonl')
    pass_df = pd.read_csv(f'data/pass_data_2/{f}')
    
    match_event_df = events_df[events_df['sk_match_id'] == match_id].reset_index(drop = True)
    follow_data = []
    for idx, row in pass_df.iterrows():
        pass_frame = row['frame']
        start_possession = row['possession_team_name']
        event_idx = match_event_df.index[match_event_df['frame'] == pass_frame].tolist()[0]

        follow_dict = {}
        if event_idx > len(match_event_df) - 4:
            follow_dict['ball_x_0'] = None
            follow_dict['ball_y_0'] = None
            for j in range(1, 4):
                follow_dict[f'ball_x_{j}'] = None
                follow_dict[f'ball_y_{j}'] = None
                follow_dict[f'player_x_{j}'] = None
                follow_dict[f'player_y_{j}'] = None
                follow_dict[f'player_{j}'] = None
            follow_data.append(follow_dict) 
        else:           
            receive_frame = row['receive_frame']
            follow_dict['ball_x_0'], follow_dict['ball_y_0'] = find_ball(tracking_data, receive_frame)
            for i in range(1, 4):
                action = match_event_df.iloc[event_idx + i]
                possession = action['possession_team_name']
                type_primary = action['type_primary']
                player_id = action['sk_player_id']
                if possession is not pd.NA and possession == start_possession and player_id is not pd.NA and type_primary in ['pass', 'shot']:
                    frame = action['frame']  
                    ball_x, ball_y = find_ball(tracking_data, frame)
                    player_x, player_y = find_player(tracking_data, frame, player_id)
                    follow_dict[f'ball_x_{i}'] = ball_x
                    follow_dict[f'ball_y_{i}'] = ball_y
                    follow_dict[f'player_x_{i}'] = player_x
                    follow_dict[f'player_y_{i}'] = player_y
                    follow_dict[f'player_{i}'] = player_id
                    
                else:
                    for j in range(i, 4):
                        follow_dict[f'ball_x_{j}'] = None
                        follow_dict[f'ball_y_{j}'] = None
                        follow_dict[f'player_x_{j}'] = None
                        follow_dict[f'player_y_{j}'] = None
                        follow_dict[f'player_{j}'] = None
                        
                    break
            follow_data.append(follow_dict)    
    follow_df = pd.DataFrame(follow_data)
    pass_df = pd.concat([pass_df, follow_df], axis=1)
    pass_df.to_csv(f'data/follow_data_2/{match_id}_follow.csv', index = False)

In [43]:
frame, player_id

(59324, 64025)

In [42]:
[line for line in tracking_data if line['frame'] == frame]

[{'frame': 59324,
  'timestamp': '01:36:20.40',
  'period': 2,
  'ball_data': {'x': -24.99, 'y': -27.44, 'z': 0.47, 'is_detected': True},
  'possession': {'player_id': 68716, 'group': 'home team'},
  'image_corners_projection': {'x_top_left': -113.01,
   'y_top_left': 39.0,
   'x_bottom_left': -31.57,
   'y_bottom_left': -38.01,
   'x_bottom_right': -13.45,
   'y_bottom_right': -26.85,
   'x_top_right': -35.75,
   'y_top_right': 39.0},
  'player_data': [{'x': -49.46,
    'y': -1.54,
    'player_id': 261931,
    'is_detected': True},
   {'x': -38.47, 'y': -2.83, 'player_id': 57127, 'is_detected': True},
   {'x': -38.89, 'y': -4.89, 'player_id': 68534, 'is_detected': True},
   {'x': -37.07, 'y': 1.56, 'player_id': 62648, 'is_detected': True},
   {'x': -35.18, 'y': -16.45, 'player_id': 63773, 'is_detected': True},
   {'x': -36.54, 'y': 7.23, 'player_id': 185330, 'is_detected': True},
   {'x': -30.54, 'y': -8.52, 'player_id': 75087, 'is_detected': True},
   {'x': -26.06, 'y': -24.35, 'play

In [24]:
[p for p in player_data if p['player_id'] == player_id]

'd'

In [30]:
player_id

<NA>