## 2021 Roland Garros

In [1]:
import pandas as pd
import numpy as np
import json
import requests
import os

# --> Import functions from "process" script
import sys
sys.path.append('../src')

from process import categorise_serve_direction, get_point_level_info, get_match_point_level_info, add_server_and_returner_scores, add_cum_games_and_sets, collect_serve_return_locations


In [3]:
# Save JSON Data

def save_tracking_data_roland_garros_2021(year, num_matches = 20):
    '''
    Args:
    -----
    tournament_id [int]
    year: [int]
    '''

    for i in range(1,num_matches):
        if i < 10:
            match_num = '0' + str(i)
            match_id = 'SM0' + match_num
        elif ( i >= 10) & (i < 100):
            match_num = str(i)
            match_id = 'SM0' + match_num
        else:
            match_num = str(i)
            match_id = 'SM' + match_num

        

        #print(match_id)

        api_url = 'https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/' + str(year) + '/eventId/520/matchId/' + match_id +'/pointId/0_0_0'
        get_json_file = requests.get(api_url)

        try:
            tracking_data_json = get_json_file.json()

        except ValueError:

            #print('The Match {0} does not exist!'.format(match_id))

            continue
            
        if not tracking_data_json['courtVisionData']:
            #print('The Match {0} is empty!'.format(match_id))
            
            continue

        file_name = 'year_' + str(year)  + '_' + match_id + '_tracking_data.json'

        with open(file_name, 'w') as json_file:
            json.dump(tracking_data_json, json_file)
            
            
def save_tracking_data_roland_garros_wta_2021(year, num_matches = 20):
    '''
    Args:
    -----
    tournament_id [int]
    year: [int]
    '''

    for i in range(1,num_matches):
        if i < 10:
            match_num = '0' + str(i)
            match_id = 'SD0' + match_num
        elif ( i >= 10) & (i < 100):
            match_num = str(i)
            match_id = 'SD0' + match_num
        else:
            match_num = str(i)
            match_id = 'SD' + match_num

        
        
        #print(match_id)
        #https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/2021/eventId/520/matchId/SD103/pointId/0_0_0

        api_url = 'https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/' + str(year) + '/eventId/520/matchId/' + match_id +'/pointId/0_0_0'
        get_json_file = requests.get(api_url)

        try:
            tracking_data_json = get_json_file.json()

        except ValueError:

            print('The Match {0} does not exist!'.format(match_id))

            continue
            
        if not tracking_data_json['courtVisionData']:
            print('The Match {0} is empty!'.format(match_id))
            
            continue

        file_name = 'wta_year_' + str(year)  + '_' + match_id + '_tracking_data.json'

        with open(file_name, 'w') as json_file:
            json.dump(tracking_data_json, json_file)

In [63]:
%%capture 
save_tracking_data_roland_garros_2021(year = 2021, num_matches=200)
save_tracking_data_roland_garros_wta_2021(year = 2021, num_matches=200)

In [2]:
%%capture 
# Supresses print message

rg_2021_data_frame = pd.DataFrame([])
for match_filename in os.listdir("./rg_2021_json_data"):
    
    if match_filename.endswith(".json"):
    
        with open('/Users/petertea/tennis_analytics/projects/roland_garros_project/2021/rg_2021_json_data/' + match_filename) as filename:
            tracking_data_json = json.load(filename)

        print(filename)

        data_to_add = get_match_point_level_info(tracking_data_json)
        data_to_add['match_id'] = match_filename

        year = [int(s) for s in match_filename.split('_') if s.isdigit()][0]
        is_wta = match_filename[0:3] == 'wta'
        data_to_add['year'] = year
        data_to_add['is_wta'] = is_wta
        rg_2021_data_frame = rg_2021_data_frame.append( data_to_add )

In [3]:
rg_2021_data_frame.to_csv('roland_garros_2021.csv', index=False)

In [55]:
# PLAYER IDS
def get_match_level_info(tracking_data_json, year):
    '''
    Args:
    -----
    year: match year
    tracking_data_json [json]: Json file
    
    Returns:
    --------
    dict of row to append into a dataframe
    
    
    Returns basic information from a match (player names, seed, court name)
    '''
    
    # Get to the crux of the information
    tracking_data_dict = tracking_data_json['courtVisionData'][0]
    
    #atp_tracking_data_dict['isMatchComplete']


    # -----------------------------
    # Player information
    # -----------------------------
    player1_info = tracking_data_dict['playersData']['playerTeam']
    player2_info = tracking_data_dict['playersData']['opponentTeam']

    
    match_dict = dict(
        year = year,
        player1 = player1_info[0]['name'],
        player2 = player2_info[0]['name'],
        player1_id = player1_info[0]['id'],
        player1_country = player1_info[0]['country'],
        player1_seed = player1_info[0]['seed'],
        player2_id = player2_info[0]['id'],
        player2_country = player2_info[0]['country'],
        player2_seed = player2_info[0]['seed'],
        
    # -------------------------------
    # - I have no idea what this is
    # -------------------------------
        point_id = tracking_data_dict['pointId'],
        court_name = tracking_data_dict['courtName'],
        court_id = tracking_data_dict['courtId'],
        num_sets_completed = tracking_data_dict['setsCompleted'],
        
        # Mens/Womens Singles
        match_type = tracking_data_dict['eventType'],

        # Complete status?
        match_status = tracking_data_dict['matchStatus']
        
        
    )
    
    return match_dict

In [68]:
### Loop through all json files in directory
data_list = []

for filename in os.listdir('./rg_2021_json_data'):
    if filename.endswith(".json"): 
         with open('/Users/petertea/tennis_analytics/projects/roland_garros_project/2021/rg_2021_json_data/' + filename) as file_name:
                tracking_data_json = json.load(file_name)
                file_year = [int(s) for s in filename.split('_') if s.isdigit()]
                match_info_to_add = get_match_level_info(tracking_data_json, year = file_year[0])
                match_info_to_add['filename'] = filename
                data_list.append(match_info_to_add)
    else:
        continue

        
available_matches = pd.DataFrame(data_list)
available_matches.sort_values(by=['year', 'match_type'], inplace = True)
available_matches.to_csv('rg_2021_matches_in_repo.csv', index = False)
#available_matches[available_matches['match_type'] == "Men's Singles"].head()

In [59]:
rg_2021_data_frame[rg_2021_data_frame['match_id'] == 'year_2021_SM112_tracking_data.json']

Unnamed: 0,point_ID,set_num,game_num,point_num,serve_num,server_id,returner_id,point_winner_id,court_side,serve_speed_kph,...,returner_score,player1,player2,p1_cum_games,p2_cum_games,p1_cum_sets,p2_cum_sets,match_id,year,is_wta
0,1_1_1_1,1.0,1.0,1.0,1.0,11493,35398,35398,DeuceCourt,193 KPH,...,0.0,11493,35398,0.0,0.0,0.0,0.0,year_2021_SM112_tracking_data.json,2021,False
1,1_1_2_1,1.0,1.0,2.0,1.0,11493,35398,11493,AdCourt,203 KPH,...,1.0,11493,35398,0.0,0.0,0.0,0.0,year_2021_SM112_tracking_data.json,2021,False
2,1_1_3_1,1.0,1.0,3.0,1.0,11493,35398,11493,DeuceCourt,212 KPH,...,1.0,11493,35398,0.0,0.0,0.0,0.0,year_2021_SM112_tracking_data.json,2021,False
3,1_1_4_1,1.0,1.0,4.0,1.0,11493,35398,35398,AdCourt,210.4 KPH,...,1.0,11493,35398,0.0,0.0,0.0,0.0,year_2021_SM112_tracking_data.json,2021,False
4,1_1_4_2,1.0,1.0,4.0,2.0,11493,35398,35398,AdCourt,135.84 KPH,...,1.0,11493,35398,0.0,0.0,0.0,0.0,year_2021_SM112_tracking_data.json,2021,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
238,3_7_1_1,3.0,7.0,1.0,1.0,11493,35398,35398,DeuceCourt,177 KPH,...,0.0,11493,35398,1.0,5.0,0.0,2.0,year_2021_SM112_tracking_data.json,2021,False
239,3_7_2_1,3.0,7.0,2.0,1.0,11493,35398,35398,AdCourt,204.94 KPH,...,1.0,11493,35398,1.0,5.0,0.0,2.0,year_2021_SM112_tracking_data.json,2021,False
240,3_7_2_2,3.0,7.0,2.0,2.0,11493,35398,35398,AdCourt,159.79 KPH,...,1.0,11493,35398,1.0,5.0,0.0,2.0,year_2021_SM112_tracking_data.json,2021,False
241,3_7_3_1,3.0,7.0,3.0,1.0,11493,35398,35398,DeuceCourt,193.28 KPH,...,2.0,11493,35398,1.0,5.0,0.0,2.0,year_2021_SM112_tracking_data.json,2021,False


In [47]:
all_tracking_data_dict = tracking_data_json['courtVisionData'][0]['pointsData']
all_tracking_data_dict.keys()

dict_keys(['1_1_4_1', '1_3_3_1', '1_3_5_1', '2_8_3_1', '3_6_3_1', '1_8_5_2', '2_7_6_2', '3_1_6_2', '3_3_6_1', '1_5_5_1', '1_5_5_2', '2_4_5_1', '2_4_5_2', '1_1_1_1', '1_1_3_1', '1_3_2_1', '1_5_2_1', '1_5_3_1', '1_5_4_1', '1_5_6_1', '1_7_3_1', '1_7_4_1', '1_9_1_1', '1_9_4_1', '2_2_2_1', '2_2_3_1', '2_2_4_1', '2_4_1_1', '2_4_3_1', '2_4_4_1', '2_4_6_1', '2_6_2_1', '2_6_3_1', '2_6_4_1', '2_6_5_1', '2_6_6_1', '2_6_8_1', '2_8_1_1', '2_8_2_1', '2_8_4_1', '2_10_1_1', '2_10_3_1', '2_10_4_1', '2_12_2_1', '2_12_4_1', '2_12_6_1', '3_2_1_1', '3_2_2_1', '3_2_5_1', '3_2_6_1', '3_2_7_1', '3_2_8_1', '3_2_11_1', '3_2_12_1', '3_4_1_1', '3_6_1_1', '3_6_2_1', '3_6_4_1', '3_6_5_1', '3_6_8_1', '1_2_4_2', '1_7_2_2', '3_1_5_2', '3_6_7_2', '1_1_2_2', '1_2_1_2', '1_3_1_2', '1_3_4_2', '1_4_3_2', '1_5_1_2', '1_6_2_1', '1_6_4_1', '1_6_6_1', '1_6_7_1', '1_7_1_2', '1_8_1_1', '1_8_3_1', '1_8_4_2', '1_9_2_2', '1_9_3_2', '2_1_1_1', '2_1_3_2', '2_1_4_2', '2_2_1_2', '2_5_1_1', '2_5_2_1', '2_5_4_2', '2_7_1_1', '2_7_3_1', '2

In [47]:
all_tracking_data_dict = tracking_data_json['courtVisionData'][0]['pointsData']
all_tracking_data_dict.keys()

dict_keys(['1_1_4_1', '1_3_3_1', '1_3_5_1', '2_8_3_1', '3_6_3_1', '1_8_5_2', '2_7_6_2', '3_1_6_2', '3_3_6_1', '1_5_5_1', '1_5_5_2', '2_4_5_1', '2_4_5_2', '1_1_1_1', '1_1_3_1', '1_3_2_1', '1_5_2_1', '1_5_3_1', '1_5_4_1', '1_5_6_1', '1_7_3_1', '1_7_4_1', '1_9_1_1', '1_9_4_1', '2_2_2_1', '2_2_3_1', '2_2_4_1', '2_4_1_1', '2_4_3_1', '2_4_4_1', '2_4_6_1', '2_6_2_1', '2_6_3_1', '2_6_4_1', '2_6_5_1', '2_6_6_1', '2_6_8_1', '2_8_1_1', '2_8_2_1', '2_8_4_1', '2_10_1_1', '2_10_3_1', '2_10_4_1', '2_12_2_1', '2_12_4_1', '2_12_6_1', '3_2_1_1', '3_2_2_1', '3_2_5_1', '3_2_6_1', '3_2_7_1', '3_2_8_1', '3_2_11_1', '3_2_12_1', '3_4_1_1', '3_6_1_1', '3_6_2_1', '3_6_4_1', '3_6_5_1', '3_6_8_1', '1_2_4_2', '1_7_2_2', '3_1_5_2', '3_6_7_2', '1_1_2_2', '1_2_1_2', '1_3_1_2', '1_3_4_2', '1_4_3_2', '1_5_1_2', '1_6_2_1', '1_6_4_1', '1_6_6_1', '1_6_7_1', '1_7_1_2', '1_8_1_1', '1_8_3_1', '1_8_4_2', '1_9_2_2', '1_9_3_2', '2_1_1_1', '2_1_3_2', '2_1_4_2', '2_2_1_2', '2_5_1_1', '2_5_2_1', '2_5_4_2', '2_7_1_1', '2_7_3_1', '2

In [47]:
all_tracking_data_dict = tracking_data_json['courtVisionData'][0]['pointsData']
all_tracking_data_dict.keys()

dict_keys(['1_1_4_1', '1_3_3_1', '1_3_5_1', '2_8_3_1', '3_6_3_1', '1_8_5_2', '2_7_6_2', '3_1_6_2', '3_3_6_1', '1_5_5_1', '1_5_5_2', '2_4_5_1', '2_4_5_2', '1_1_1_1', '1_1_3_1', '1_3_2_1', '1_5_2_1', '1_5_3_1', '1_5_4_1', '1_5_6_1', '1_7_3_1', '1_7_4_1', '1_9_1_1', '1_9_4_1', '2_2_2_1', '2_2_3_1', '2_2_4_1', '2_4_1_1', '2_4_3_1', '2_4_4_1', '2_4_6_1', '2_6_2_1', '2_6_3_1', '2_6_4_1', '2_6_5_1', '2_6_6_1', '2_6_8_1', '2_8_1_1', '2_8_2_1', '2_8_4_1', '2_10_1_1', '2_10_3_1', '2_10_4_1', '2_12_2_1', '2_12_4_1', '2_12_6_1', '3_2_1_1', '3_2_2_1', '3_2_5_1', '3_2_6_1', '3_2_7_1', '3_2_8_1', '3_2_11_1', '3_2_12_1', '3_4_1_1', '3_6_1_1', '3_6_2_1', '3_6_4_1', '3_6_5_1', '3_6_8_1', '1_2_4_2', '1_7_2_2', '3_1_5_2', '3_6_7_2', '1_1_2_2', '1_2_1_2', '1_3_1_2', '1_3_4_2', '1_4_3_2', '1_5_1_2', '1_6_2_1', '1_6_4_1', '1_6_6_1', '1_6_7_1', '1_7_1_2', '1_8_1_1', '1_8_3_1', '1_8_4_2', '1_9_2_2', '1_9_3_2', '2_1_1_1', '2_1_3_2', '2_1_4_2', '2_2_1_2', '2_5_1_1', '2_5_2_1', '2_5_4_2', '2_7_1_1', '2_7_3_1', '2

In [58]:
rg_2021_data_frame.head()

Unnamed: 0,point_ID,set_num,game_num,point_num,serve_num,server_id,returner_id,point_winner_id,court_side,serve_speed_kph,...,returner_score,player1,player2,p1_cum_games,p2_cum_games,p1_cum_sets,p2_cum_sets,match_id,year,is_wta
0,1_1_1_1,1.0,1.0,1.0,1.0,23726,9086,23726,DeuceCourt,155 KPH,...,0.0,23726,9086,0.0,0.0,0.0,0.0,wta_year_2021_SD121_tracking_data.json,2021,True
1,1_1_2_1,1.0,1.0,2.0,1.0,23726,9086,23726,AdCourt,159.76 KPH,...,0.0,23726,9086,0.0,0.0,0.0,0.0,wta_year_2021_SD121_tracking_data.json,2021,True
2,1_1_2_2,1.0,1.0,2.0,2.0,23726,9086,23726,AdCourt,128 KPH,...,0.0,23726,9086,0.0,0.0,0.0,0.0,wta_year_2021_SD121_tracking_data.json,2021,True
3,1_1_3_1,1.0,1.0,3.0,1.0,23726,9086,23726,DeuceCourt,168.31 KPH,...,0.0,23726,9086,0.0,0.0,0.0,0.0,wta_year_2021_SD121_tracking_data.json,2021,True
4,1_1_3_2,1.0,1.0,3.0,2.0,23726,9086,23726,DeuceCourt,133 KPH,...,0.0,23726,9086,0.0,0.0,0.0,0.0,wta_year_2021_SD121_tracking_data.json,2021,True
