In [6]:
def pull_match_data(match_id):
    import pandas as pd
    import numpy as np
    import requests
    import json
    pd.set_option('display.max_columns', None)

    # Pull all match events from API
    url = 'https://api.readthegame.com/v1/events'
    headers = {'x-api-key': "dpEYeSq7dWQ2KIkv5vrY98yPqTJLaU82tQqAaYPg"}
    page = 0
    querystring = {'match': str(match_id),
                   'page': page, 
                   'pageSize': str(match_id)}

    response = requests.request("GET", url, headers=headers, params=querystring)

    data = response.json()
    events = pd.DataFrame(data['data'])

    while len(data['data']) == 500:
        page += 1
        querystring = {'match': match_id,
                       'page': page, 
                       'pageSize': "500"}

        response = requests.request("GET", url, headers=headers, params=querystring)

        data = response.json()
        events_loop = pd.DataFrame(data['data'])
        #CONCAT
        events = events.append(events_loop, ignore_index=True)

    # Tidy dataframe and merge with 'game_control' data
    events = events.rename(columns={"index": "event_index"})
    game_control = pd.read_csv('../data/game_control.csv')
    game_control = game_control.iloc[:,:-1]
    game_control.columns = ['game_control', 'event_id']
    game_control['event_id'] = game_control['event_id'].astype(str)
    events = events.merge(game_control, how = 'left', left_on='id', right_on='event_id')
    events = events.drop(columns=['event_id'])

    # Define the centre circle coordinates and enter into DF for every pass made directly from kick-off
    centre_circle = events.loc[0, 'coordinates']
    for i in events.index:
        if events.loc[i, 'eventType'] == 'KICKOFF' and events.loc[i+1, 'eventType'] == 'PASS':
            events.loc[i+1, 'coordinates'] = [centre_circle]

    # Expand 'coordinates' column and concat to main DF
    expand_coord = events['coordinates'].apply(pd.Series).add_suffix('_coord')
    combined = pd.concat([events, expand_coord.drop(['0_coord'], axis=1)], axis=1)
    combined = combined.astype({'x_coord': 'float64', 'y_coord': 'float64', 
                                'xSource_coord': 'float64', 'ySource_coord': 'float64'})
    events = combined.copy()

    # Create 'positionInEpisode' column to find out pitch location of episode start
    episode_position = 1
    episode_check = 1
    episode_position_list = []

    for index in events.index:
        if np.isnan(events.loc[index, 'episode']):
            episode_position_list.append(np.nan)
        elif events.loc[index, 'episode'] == episode_check:
            episode_position_list.append(episode_position)
            episode_position += 1
        elif events.loc[index, 'episode'] != episode_check:
            episode_check += 1
            episode_position_list.append(1) #previously 'episode_position'
            episode_position = 2 #previously 1

    events['episodePosition'] = episode_position_list

    # Create 'durationOfEpisode' column to ascertain length of an episode in seconds
    episode_duration = 0
    episode_check = 1
    episode_duration_list = []

    for index in events.index:
        if np.isnan(events.loc[index, 'episode']):
            episode_duration_list.append(np.nan)
        elif events.loc[index, 'episode'] == episode_check:
            episode_duration += events.loc[index, 'duration']
            episode_duration_list.append(episode_duration)
        elif events.loc[index, 'episode'] != episode_check:
            episode_check += 1
            episode_duration = events.loc[index, 'duration']
            episode_duration_list.append(episode_duration)

    events['episodeCumDuration'] = episode_duration_list

    # Create 'playersInEpisode' column to ascertain how many players are involved in an episode
    episode_check = 1
    players_involved = set()
    episode_players_list = []

    for index in events.index:
        if np.isnan(events.loc[index, 'episode']):
            episode_players_list.append(np.nan)
        elif events.loc[index, 'episode'] == episode_check:
            players_involved.add(events.loc[index, 'playerId'])
            episode_players_list.append(len(players_involved))
        elif events.loc[index, 'episode'] != episode_check:
            episode_check += 1
            players_involved = set()
            players_involved.add(events.loc[index, 'playerId'])
            episode_players_list.append(len(players_involved))

    events['playersInEpisode'] = episode_players_list

    events = events.to_json(f'../data/{match_id}.json', orient='records')

In [7]:
pull_match_data(7672)