## 2021 Roland Garros

In [1]:
import pandas as pd
import numpy as np
import json
import requests
import os

pd.set_option('display.max_rows', 500)

# --> Import functions from "process" script
import sys
sys.path.append('../src')

from process import categorise_serve_direction, get_point_level_info, get_match_point_level_info, add_server_and_returner_scores, add_cum_games_and_sets, collect_serve_return_locations, collect_serve_plus1_locations


In [2]:
# Save JSON Data

def save_tracking_data_roland_garros_2021(year, num_matches = 20):
    '''
    Args:
    -----
    tournament_id [int]
    year: [int]
    '''

    for i in range(1,num_matches):
        if i < 10:
            match_num = '0' + str(i)
            match_id = 'SM0' + match_num
        elif ( i >= 10) & (i < 100):
            match_num = str(i)
            match_id = 'SM0' + match_num
        else:
            match_num = str(i)
            match_id = 'SM' + match_num

        

        #print(match_id)

        api_url = 'https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/' + str(year) + '/eventId/520/matchId/' + match_id +'/pointId/0_0_0'
        get_json_file = requests.get(api_url)

        try:
            tracking_data_json = get_json_file.json()

        except ValueError:

            #print('The Match {0} does not exist!'.format(match_id))

            continue
            
        if not tracking_data_json['courtVisionData']:
            #print('The Match {0} is empty!'.format(match_id))
            
            continue

        file_name = 'year_' + str(year)  + '_' + match_id + '_tracking_data.json'

        with open(file_name, 'w') as json_file:
            json.dump(tracking_data_json, json_file)
            
            
def save_tracking_data_roland_garros_wta_2021(year, num_matches = 20):
    '''
    Args:
    -----
    tournament_id [int]
    year: [int]
    '''

    for i in range(1,num_matches):
        if i < 10:
            match_num = '0' + str(i)
            match_id = 'SD0' + match_num
        elif ( i >= 10) & (i < 100):
            match_num = str(i)
            match_id = 'SD0' + match_num
        else:
            match_num = str(i)
            match_id = 'SD' + match_num

        
        
        #print(match_id)
        #https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/2021/eventId/520/matchId/SD103/pointId/0_0_0

        api_url = 'https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/' + str(year) + '/eventId/520/matchId/' + match_id +'/pointId/0_0_0'
        get_json_file = requests.get(api_url)

        try:
            tracking_data_json = get_json_file.json()

        except ValueError:

            print('The Match {0} does not exist!'.format(match_id))

            continue
            
        if not tracking_data_json['courtVisionData']:
            print('The Match {0} is empty!'.format(match_id))
            
            continue

        file_name = 'wta_year_' + str(year)  + '_' + match_id + '_tracking_data.json'

        with open(file_name, 'w') as json_file:
            json.dump(tracking_data_json, json_file)

In [3]:
%%capture 
#save_tracking_data_roland_garros_2021(year = 2021, num_matches=200)
save_tracking_data_roland_garros_2021(year = 2021, num_matches=5)

In [4]:
%%capture 
#save_tracking_data_roland_garros_wta_2021(year = 2021, num_matches=200)
save_tracking_data_roland_garros_wta_2021(year = 2021, num_matches=5)

In [5]:
%%capture 
# Supresses print message

rg_2021_data_frame = pd.DataFrame([])
for match_filename in os.listdir("./rg_2021_json_data"):
    
    if match_filename.endswith(".json"):
    
        with open('/Users/petertea/tennis_analytics/projects/roland_garros_project/2021/rg_2021_json_data/' + match_filename) as filename:
            tracking_data_json = json.load(filename)

        print(filename)

        data_to_add = get_match_point_level_info(tracking_data_json)
        data_to_add['match_id'] = match_filename

        year = [int(s) for s in match_filename.split('_') if s.isdigit()][0]
        is_wta = match_filename[0:3] == 'wta'
        data_to_add['year'] = year
        data_to_add['is_wta'] = is_wta
        rg_2021_data_frame = rg_2021_data_frame.append( data_to_add )

In [6]:
rg_2021_data_frame.to_csv('roland_garros_2021.csv', index=False)

In [7]:
# PLAYER IDS
def get_match_level_info(tracking_data_json, year):
    '''
    Args:
    -----
    year: match year
    tracking_data_json [json]: Json file
    
    Returns:
    --------
    dict of row to append into a dataframe
    
    
    Returns basic information from a match (player names, seed, court name)
    '''
    
    # Get to the crux of the information
    tracking_data_dict = tracking_data_json['courtVisionData'][0]
    
    #atp_tracking_data_dict['isMatchComplete']


    # -----------------------------
    # Player information
    # -----------------------------
    player1_info = tracking_data_dict['playersData']['playerTeam']
    player2_info = tracking_data_dict['playersData']['opponentTeam']

    
    match_dict = dict(
        year = year,
        player1 = player1_info[0]['name'],
        player2 = player2_info[0]['name'],
        player1_id = player1_info[0]['id'],
        player1_country = player1_info[0]['country'],
        player1_seed = player1_info[0]['seed'],
        player2_id = player2_info[0]['id'],
        player2_country = player2_info[0]['country'],
        player2_seed = player2_info[0]['seed'],
        
    # -------------------------------
    # - I have no idea what this is
    # -------------------------------
        point_id = tracking_data_dict['pointId'],
        court_name = tracking_data_dict['courtName'],
        court_id = tracking_data_dict['courtId'],
        num_sets_completed = tracking_data_dict['setsCompleted'],
        
        # Mens/Womens Singles
        match_type = tracking_data_dict['eventType'],

        # Complete status?
        match_status = tracking_data_dict['matchStatus']
        
        
    )
    
    return match_dict

In [8]:
### Loop through all json files in directory
data_list = []

for filename in os.listdir('./rg_2021_json_data'):
    if filename.endswith(".json"): 
         with open('/Users/petertea/tennis_analytics/projects/roland_garros_project/2021/rg_2021_json_data/' + filename) as file_name:
                tracking_data_json = json.load(file_name)
                file_year = [int(s) for s in filename.split('_') if s.isdigit()]
                match_info_to_add = get_match_level_info(tracking_data_json, year = file_year[0])
                match_info_to_add['filename'] = filename
                data_list.append(match_info_to_add)
    else:
        continue

        
available_matches = pd.DataFrame(data_list)
available_matches.sort_values(by=['year', 'match_type'], inplace = True)
available_matches.to_csv('rg_2021_matches_in_repo.csv', index = False)
#available_matches[available_matches['match_type'] == "Men's Singles"].head()

In [9]:
available_matches.sort_values(by = ['filename'])

Unnamed: 0,year,player1,player2,player1_id,player1_country,player1_seed,player2_id,player2_country,player2_seed,point_id,court_name,court_id,num_sets_completed,match_type,match_status,filename
50,2021,B.KREJCIKOVA,M.SAKKARI,26436,CZE,,28771,GRE,17.0,3_16_10,Court Philippe CHATRIER,1,3,Women's Singles,C,wta_year_2021_SD002_tracking_data.json
61,2021,A.PAVLYUCHENKOVA,T.ZIDANSEK,15108,RUS,31.0,33787,SLO,,2_9_5,Court Philippe CHATRIER,1,2,Women's Singles,C,wta_year_2021_SD003_tracking_data.json
7,2021,C.GAUFF,B.KREJCIKOVA,44185,USA,24.0,26436,CZE,,2_9_4,Court Philippe CHATRIER,1,2,Women's Singles,C,wta_year_2021_SD004_tracking_data.json
98,2021,M.SAKKARI,I.SWIATEK,28771,GRE,17.0,40613,POL,8.0,2_10_6,Court Philippe CHATRIER,1,2,Women's Singles,C,wta_year_2021_SD005_tracking_data.json
95,2021,E.RYBAKINA,A.PAVLYUCHENKOVA,38569,KAZ,21.0,15108,RUS,31.0,3_16_5,Court Philippe CHATRIER,1,2,Women's Singles,I,wta_year_2021_SD006_tracking_data.json
23,2021,T.ZIDANSEK,P.BADOSA,33787,SLO,,32919,ESP,33.0,3_14_5,Court Philippe CHATRIER,1,2,Women's Singles,I,wta_year_2021_SD007_tracking_data.json
97,2021,O.JABEUR,C.GAUFF,23770,TUN,25.0,44185,USA,24.0,2_7_9,Court Philippe CHATRIER,1,1,Women's Singles,I,wta_year_2021_SD008_tracking_data.json
6,2021,S.STEPHENS,B.KREJCIKOVA,21500,USA,,26436,CZE,,2_6_9,Court Suzanne LENGLEN,2,1,Women's Singles,I,wta_year_2021_SD009_tracking_data.json
66,2021,S.KENIN,M.SAKKARI,35854,USA,4.0,28771,GRE,17.0,2_9_7,Court Suzanne LENGLEN,2,1,Women's Singles,I,wta_year_2021_SD010_tracking_data.json
47,2021,S.WILLIAMS,E.RYBAKINA,3370,USA,7.0,38569,KAZ,21.0,2_12_5,Court Philippe CHATRIER,1,2,Women's Singles,C,wta_year_2021_SD012_tracking_data.json


In [18]:
rg_2021_data_frame[rg_2021_data_frame['match_id'] == 'year_2021_SM010_tracking_data.json']

Unnamed: 0,point_ID,set_num,game_num,point_num,serve_num,server_id,returner_id,point_winner_id,court_side,serve_speed_kph,...,returner_score,player1,player2,p1_cum_games,p2_cum_games,p1_cum_sets,p2_cum_sets,match_id,year,is_wta
0,1_1_1_1,1.0,1.0,1.0,1.0,7792,39723,7792.0,DeuceCourt,189 KPH,...,0.0,7792,39723,0.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
1,1_1_2_1,1.0,1.0,2.0,1.0,7792,39723,7792.0,AdCourt,185 KPH,...,0.0,7792,39723,0.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
2,1_1_3_1,1.0,1.0,3.0,1.0,7792,39723,7792.0,DeuceCourt,192 KPH,...,0.0,7792,39723,0.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
3,1_1_4_1,1.0,1.0,4.0,1.0,7792,39723,7792.0,AdCourt,194 KPH,...,0.0,7792,39723,0.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
4,1_2_1_1,1.0,2.0,1.0,1.0,39723,7792,,DeuceCourt,179.24 KPH,...,0.0,7792,39723,1.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
5,1_2_1_2,1.0,2.0,1.0,2.0,39723,7792,7792.0,DeuceCourt,143 KPH,...,0.0,7792,39723,1.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
6,1_2_2_1,1.0,2.0,2.0,1.0,39723,7792,39723.0,AdCourt,177 KPH,...,1.0,7792,39723,1.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
7,1_2_3_1,1.0,2.0,3.0,1.0,39723,7792,7792.0,DeuceCourt,192 KPH,...,1.0,7792,39723,1.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
8,1_2_4_1,1.0,2.0,4.0,1.0,39723,7792,7792.0,AdCourt,199.41 KPH,...,2.0,7792,39723,1.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
9,1_2_4_2,1.0,2.0,4.0,2.0,39723,7792,39723.0,AdCourt,133 KPH,...,2.0,7792,39723,1.0,0.0,0.0,0.0,year_2021_SM010_tracking_data.json,2021,False
