In [5]:
def gegenpressing(match_id, min_duration, max_duration, max_players):
    import pandas as pd
    import numpy as np
    pd.set_option('display.max_columns', None)

    events = pd.read_json(f'../data/{match_id}.json', orient='records')

    # Calculate direction each team is attacking...
    kick_off1_id = events.loc[1, 'teamId']
    kick_off2_id = events.loc[events['episode'] == 2, 'teamId'].iloc[0]

    if (float(events.loc[0, 'coordinates']['x']) - float(events.loc[2, 'coordinates']['x'])) > 0:
        attacking_right1_left2 = kick_off1_id
        attacking_left1_right2 = kick_off2_id
    else:
        attacking_left1_right2 = kick_off1_id
        attacking_right1_left2 = kick_off2_id

    # Events taking place in the attacking third
    attacking_third_events = np.where(
        ((events['interval'] == 1) & (events['teamId'] == attacking_right1_left2) & (events['x_coord'] >= 70)) | 
        ((events['interval'] == 1) & (events['teamId'] == attacking_left1_right2) & (events['x_coord'] <= 35)) | 
        ((events['interval'] == 2) & (events['teamId'] == attacking_left1_right2) & (events['x_coord'] >= 70)) | 
        ((events['interval'] == 2) & (events['teamId'] == attacking_right1_left2) & (events['x_coord'] <= 35)))

    # Events that aren't a 'standard situation'
    standard_situations = ['KICKOFF', 'FREE_KICK', 'CORNER_KICK', 'THROW_IN', 'GOAL_KICK']
    standard_start = []

    for situation in standard_situations:
        for i in events.index:
            if situation in events.loc[i, 'qualifiers']:
                standard_start.append(i)

    for i in events.index:
        if events.loc[i, 'eventType'] == 'KICKOFF':
            standard_start.append(i)

    non_standard_start = list(set(events.index) - set(standard_start))

    # Events that happen in the attacking third AND are not a 'standard situation'
    attacking_third_non_standard_events = np.intersect1d(attacking_third_events, np.array(non_standard_start))

    # Episodes that end in 'SHOT'
    shots = events[events['eventType'] == 'SHOT'] # subset events DF to 'SHOT' events
    shot_list = list(shots['episode']) # get list of episode numbers from this subset
    shot_episode_start = np.where((events['episode'].isin(shot_list)) & (events['episodePosition'] == 1)) # find first event of these episodes

    # Previous episode was for the opposition, was controlled, with a set minimum & maximum duration and involved a set maximum number of players
    gegenpressing_episodes = []

    for start_event in list(np.array(shot_episode_start).flatten()):
        if events.loc[start_event, 'teamId'] != events.loc[start_event-1, 'teamId']: # if episode before belongs to the opposition
            if events.loc[start_event-1, 'game_control'] == 'c': # if episode before was 'controlled'
                if (events.loc[start_event-1, 'episodeCumDuration'] >= min_duration) & (events.loc[start_event-1, 'episodeCumDuration'] <= max_duration): # if episode before was between 3 and 6 seconds
                    if events.loc[start_event-1, 'playersInEpisode'] <= max_players: # if episode before involved no more than 3 players
                        gegenpressing_episodes.append(start_event)

    # Episode numbers of the opposition episode
    opposition_control_end = []
    [opposition_control_end.append(episode-1) for episode in gegenpressing_episodes]
    opp_control_end_df = events.iloc[opposition_control_end]
    opposition_control_episodes = list(opp_control_end_df['episode'])

    # Indexes of first event of opposition episode
    opposition_control_start = np.where((events['episode'].isin(opposition_control_episodes)) & 
                                             (events['episodePosition'] == 1))

    # Delete opposition control episodes that ARE NOT preceded by attacking team episode
    non_sandwich = []

    for i, start_event in enumerate(list(np.array(opposition_control_start).flatten())):
        if np.isnan(events.loc[start_event-1, 'teamId']):
            non_sandwich.append(i)

        elif events.loc[start_event, 'teamId'] == events.loc[start_event-1, 'teamId']: # episode before belonged to attacking team
            non_sandwich.append(i)

    for index in sorted(non_sandwich, reverse=True):
        del gegenpressing_episodes[index]

    # Combine previous criteria
    gegenpressing_attacking_episodes = np.intersect1d(attacking_third_non_standard_events, np.array(gegenpressing_episodes))
    gegenpressing_attacking_episodes_df = events.loc[gegenpressing_attacking_episodes]
    gegenpressing_attacking_episode_numbers = list(gegenpressing_attacking_episodes_df['episode'])

    # Episodes where the shot happened within 15 seconds of the ball claim
    final_idx = np.where((events['episode'].isin(gegenpressing_attacking_episode_numbers)) & 
                         (events['eventType'] == 'SHOT') & 
                         (events['episodeCumDuration'] <= 15))
    final_gegenpressing_episodes = events.loc[final_idx]
    final_gegenpressing_episodes_numbers = list(final_gegenpressing_episodes['episode'])

    print(f'Match ID = {match_id}')
    print(f'Minimum duration = {min_duration} seconds, Maximum duration = {max_duration} seconds, Maximum number of players involved = {max_players}')
    print(f'"Gegenpressing" episode numbers: {final_gegenpressing_episodes_numbers}')


In [6]:
gegenpressing(7672,3,6,3)

Match ID = 7672
Minimum duration = 3 seconds, Maximum duration = 6 seconds, Maximum number of players involved = 3
"Gegenpressing" episode numbers: [336.0, 483.0]
