In [6]:
import pandas as pd
import numpy as np
import os
from tqdm import tqdm
import json

In [7]:
def load_all_events_data(dataset_path='../raw_data', sub_dir='sub', verbose=True):
    data = []
    if verbose:
        print('\nLoading all events data')
    dir_ = os.path.join(dataset_path, sub_dir, '')
    files_ = os.listdir(dir_)
    for match_ in tqdm(files_, total=len(files_)):
        with open(f'{dir_}{match_}') as data_file:
            data_ = json.load(data_file)
            data.append(pd.json_normalize(data_, sep="_").assign(match_id=match_))
    if verbose:
        print(' - COMPLETED\n')
    all_events_data = pd.concat(data)
    return all_events_data

data = load_all_events_data()
df = data.copy()

for y in ["Starting XI","Half Start","Pressure","Camera On","Camera off","Tactical Shift","Offside","Substitution","Injury Stoppage",
 "Referee Ball-Drop","Player On","Player Off"]:
    df = df[df.type_name != y]


Loading all events data


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 20/20 [00:04<00:00,  4.92it/s]


 - COMPLETED



In [10]:
df.columns

Index(['id', 'index', 'period', 'timestamp', 'minute', 'second', 'possession',
       'duration', 'type_id', 'type_name',
       ...
       'shot_open_goal', 'shot_aerial_won', 'miscontrol_aerial_won',
       'foul_committed_card_id', 'foul_committed_card_name',
       'pass_miscommunication', 'block_save_block', 'foul_committed_penalty',
       'foul_won_penalty', 'shot_redirect'],
      dtype='object', length=121)

In [11]:
df.head()

Unnamed: 0,id,index,period,timestamp,minute,second,possession,duration,type_id,type_name,...,shot_open_goal,shot_aerial_won,miscontrol_aerial_won,foul_committed_card_id,foul_committed_card_name,pass_miscommunication,block_save_block,foul_committed_penalty,foul_won_penalty,shot_redirect
4,60d35b7c-3b85-42da-9af8-a74a21d8f7ca,5,1,00:00:00.100,0,0,2,0.0,30,Pass,...,,,,,,,,,,
5,23fcb90e-16ec-46af-b513-97750d74d58a,6,1,00:00:00.100,0,0,2,,42,Ball Receipt*,...,,,,,,,,,,
6,eb965fcc-3962-4f91-8768-3756a4f4bba7,7,1,00:00:00.100,0,0,2,0.4,43,Carry,...,,,,,,,,,,
8,bbdcd0fe-1943-4b01-8b03-4eb9a22c7991,9,1,00:00:00.500,0,0,2,1.64,30,Pass,...,,,,,,,,,,
9,2ca32e1b-1e10-4a21-9266-5871a12bac57,10,1,00:00:02.140,0,2,2,,42,Ball Receipt*,...,,,,,,,,,,


In [31]:
df['location']

4        [61.0, 40.0]
5        [63.0, 37.0]
6        [63.0, 37.0]
8        [69.0, 33.0]
9         [84.0, 5.0]
            ...      
3023    [108.0, 20.0]
3024     [10.0, 61.0]
3025    [106.0, 21.0]
3026              NaN
3027              NaN
Name: location, Length: 56563, dtype: object

In [35]:
df['position_name']

4               Center Midfield
5           Left Center Forward
6           Left Center Forward
8           Left Center Forward
9                 Left Midfield
                 ...           
3023    Left Defensive Midfield
3024      Right Center Midfield
3025    Left Defensive Midfield
3026                        NaN
3027                        NaN
Name: position_name, Length: 56563, dtype: object

In [36]:
def get_location_bin(x, y, pitch_dimensions=(120.0, 80.0), output='bin_rel',
                     num_x_bins: int = 5, num_y_bins: int = 5, rel_coordinates=True):
    '''
    :param x: float, x value [-pitch_dimensions[0] / 2, pitch_dimensions[0] / 2]
    :param y: float, y value [-pitch_dimensions[1] / 2, pitch_dimensions[1] / 2]
    :param pitch_dimensions: (x, y) of pitch dimensions
    :param num_x_bins: number of bins to split the length of the pitch (along pitch_dimensions[0])
    :param num_y_bins: number of bins to split the width of the pitch (along pitch_dimensions[1])
    :param rel_coordinates: if True, coordinates assumed to be relative to pitch center
    :param output: 'bin_name', 'bin_rel', or 'bin_ix'
    :return:
    '''

    bin_names = {'x': {3: ['back', 'med', 'fwd'],
                       4: ['back', 'mback', 'mfwd', 'fwd']},
                 'y': {3: ['left', 'center', 'right'],
                       4: ['left', 'mleft', 'mright', 'right'],
                       5: ['left', 'mleft', 'enter', 'mright', 'right']}}

    bin_x_width, bin_y_width = np.ceil(pitch_dimensions[0] / num_x_bins), np.ceil(pitch_dimensions[1] / num_y_bins)

    if rel_coordinates:
        x, y = x + pitch_dimensions[0] / 2, y + pitch_dimensions[1] / 2

    # Extract bin values [0, num bins - 1]
    bin_x = int(min(np.floor(x / bin_x_width), num_x_bins - 1))
    bin_y = int(min(np.floor(y / bin_y_width), num_y_bins - 1))

    if output == 'bin_ix':
        return bin_x, bin_y
    elif output == 'bin_rel':
        return f"({str(bin_x + 1)}/{str(num_x_bins)}, {str(bin_y + 1)}/{str(num_y_bins)})"
    else:
        x_labels, y_labels = bin_names['x'][num_x_bins], bin_names['y'][num_y_bins]
        return x_labels[bin_x], y_labels[bin_y]

In [45]:
x = 0
y = 0

get_location_bin(x, y)

'(3/5, 3/5)'

In [None]:
def get_location_bin(x, y, pitch_dimensions=(120.0, 80.0), output='bin_rel',
                     num_x_bins: int = 5, num_y_bins: int = 5, rel_coordinates=True):
    '''
    :param x: float, x value [-pitch_dimensions[0] / 2, pitch_dimensions[0] / 2]
    :param y: float, y value [-pitch_dimensions[1] / 2, pitch_dimensions[1] / 2]
    :param pitch_dimensions: (x, y) of pitch dimensions
    :param num_x_bins: number of bins to split the length of the pitch (along pitch_dimensions[0])
    :param num_y_bins: number of bins to split the width of the pitch (along pitch_dimensions[1])
    :param rel_coordinates: if True, coordinates assumed to be relative to pitch center
    :param output: 'bin_name', 'bin_rel', or 'bin_ix'
    :return:
    '''

    bins_x = [<24]
    bins_y = 80/5
    
    
    
    
    
    bin_names = {'x': {3: ['back', 'med', 'fwd'],
                       4: ['back', 'mback', 'mfwd', 'fwd']},
                 'y': {3: ['left', 'center', 'right'],
                       4: ['left', 'mleft', 'mright', 'right'],
                       5: ['left', 'mleft', 'enter', 'mright', 'right']}}

    bin_x_width, bin_y_width = np.ceil(pitch_dimensions[0] / num_x_bins), np.ceil(pitch_dimensions[1] / num_y_bins)

    if rel_coordinates:
        x, y = x + pitch_dimensions[0] / 2, y + pitch_dimensions[1] / 2

    # Extract bin values [0, num bins - 1]
    bin_x = int(min(np.floor(x / bin_x_width), num_x_bins - 1))
    bin_y = int(min(np.floor(y / bin_y_width), num_y_bins - 1))

    if output == 'bin_ix':
        return bin_x, bin_y
    elif output == 'bin_rel':
        return f"({str(bin_x + 1)}/{str(num_x_bins)}, {str(bin_y + 1)}/{str(num_y_bins)})"
    else:
        x_labels, y_labels = bin_names['x'][num_x_bins], bin_names['y'][num_y_bins]
        return x_labels[bin_x], y_labels[bin_y]