In [6]:
def high_pressing(match_id, min_duration, min_players):
    import pandas as pd
    import numpy as np
    pd.set_option('display.max_columns', None)

    events = pd.read_json(f'../data/{match_id}.json', orient='records')

    # Calculate direction each team is attacking
    kick_off1_id = events.loc[1, 'teamId']
    kick_off2_id = events.loc[events['episode'] == 2, 'teamId'].iloc[0]

    if (float(events.loc[0, 'coordinates']['x']) - float(events.loc[2, 'coordinates']['x'])) > 0:
        attacking_right1_left2 = kick_off1_id
        attacking_left1_right2 = kick_off2_id
    else:
        attacking_left1_right2 = kick_off1_id
        attacking_right1_left2 = kick_off2_id

    # Events taking place in attacking third
    attacking_third_events = np.where(
        ((events['interval'] == 1) & (events['teamId'] == attacking_right1_left2) & (events['x_coord'] >= 70)) | 
        ((events['interval'] == 1) & (events['teamId'] == attacking_left1_right2) & (events['x_coord'] <= 35)) | 
        ((events['interval'] == 2) & (events['teamId'] == attacking_left1_right2) & (events['x_coord'] >= 70)) | 
        ((events['interval'] == 2) & (events['teamId'] == attacking_right1_left2) & (events['x_coord'] <= 35)))

    # Events that aren't a 'standard situation'
    standard_situations = ['KICKOFF', 'FREE_KICK', 'CORNER_KICK', 'THROW_IN', 'GOAL_KICK']
    standard_start = []

    for situation in standard_situations:
        for i in events.index:
            if situation in events.loc[i, 'qualifiers']:
                standard_start.append(i)

    for i in events.index:
        if events.loc[i, 'eventType'] == 'KICKOFF':
            standard_start.append(i)

    non_standard_start = list(set(events.index) - set(standard_start))

    # Events that happen in the attacking third AND are not a 'standard situation'
    attacking_third_non_standard_events = np.intersect1d(attacking_third_events, np.array(non_standard_start))

    # Episodes that end in a shot
    shots = events[events['eventType'] == 'SHOT'] # subset events DF to 'SHOT' events
    shot_list = list(shots['episode']) # get list of episode numbers from this subset
    shot_episode_start = np.where((events['episode'].isin(shot_list)) & (events['episodePosition'] == 1)) # find first event of these episodes

    # Previous episode was for the opposition, with a set maximum duration and involved a set maximum number of players
    high_pressing_episodes = []

    for start_event in list(np.array(shot_episode_start).flatten()):
        if events.loc[start_event, 'teamId'] != events.loc[start_event-1, 'teamId']: # if episode before belongs to the opposition
            if events.loc[start_event-1, 'episodeCumDuration'] > min_duration: # if episode before was longer than 5 seconds
                if events.loc[start_event-1, 'playersInEpisode'] > min_players: # if episode before involved more than 2 players
                    high_pressing_episodes.append(start_event)

    # Combine previous criteria
    high_pressing_attacking_episodes = np.intersect1d(attacking_third_non_standard_events, np.array(high_pressing_episodes))
    high_pressing_attacking_episodes_df = events.loc[high_pressing_attacking_episodes]
    high_pressing_attacking_episode_numbers = list(high_pressing_attacking_episodes_df['episode'])

    # Episodes where the shot happened within 15 seconds of the ball claim
    final_idx = np.where((events['episode'].isin(high_pressing_attacking_episode_numbers)) & 
                         (events['eventType'] == 'SHOT') & 
                         (events['episodeCumDuration'] <= 15))
    final_high_pressing_episodes = events.loc[final_idx]
    final_high_pressing_episodes_numbers = list(final_high_pressing_episodes['episode'])

    print(f'Match ID = {match_id}')
    print(f'Minimum duration = {min_duration} seconds, Minimum number of players involved = {min_players}')
    print(f'"High pressing" episode numbers: {final_high_pressing_episodes_numbers}')

In [7]:
high_pressing(7672,5,2)

Match ID = 7672
Minimum duration = 5 seconds, Minimum number of players involved = 2
"High pressing" episode numbers: [275.0]
