## Script for scraping Courtvision data From Roland Garros and Australian Open

In [2]:
import pandas as pd
import numpy as np
import json
import requests
import os

pd.set_option('display.max_rows', 500)

# --> Import functions from "process" script
import sys
sys.path.append('../src')

from process import categorise_serve_direction, get_point_level_info, get_match_point_level_info, add_server_and_returner_scores, add_cum_games_and_sets, collect_serve_return_locations, collect_serve_plus1_locations


### Scrape Roland Garros

In [None]:
### Scrape Roland Garros
# Save JSON Data

def save_tracking_data_roland_garros(year, num_matches = 20):
    '''
    Args:
    -----
    tournament_id [int]
    year: [int]
    '''

    for i in range(1,num_matches):
        if i < 10:
            match_num = '0' + str(i)
            match_id = 'SM0' + match_num
        elif ( i >= 10) & (i < 100):
            match_num = str(i)
            match_id = 'SM0' + match_num
        else:
            match_num = str(i)
            match_id = 'SM' + match_num

        

        #print(match_id)

        api_url = 'https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/' + str(year) + '/eventId/520/matchId/' + match_id +'/pointId/0_0_0'
        get_json_file = requests.get(api_url)

        try:
            tracking_data_json = get_json_file.json()

        except ValueError:

            #print('The Match {0} does not exist!'.format(match_id))

            continue
            
        if not tracking_data_json['courtVisionData']:
            #print('The Match {0} is empty!'.format(match_id))
            
            continue

        file_name = 'atp_year_' + str(year)  + '_' + match_id + '_tracking_data.json'

        with open(file_name, 'w') as json_file:
            json.dump(tracking_data_json, json_file)
            
            
def save_tracking_data_roland_garros_wta(year, num_matches = 20):
    '''
    Args:
    -----
    tournament_id [int]
    year: [int]
    '''

    for i in range(1,num_matches):
        if i < 10:
            match_num = '0' + str(i)
            match_id = 'SD0' + match_num
        elif ( i >= 10) & (i < 100):
            match_num = str(i)
            match_id = 'SD0' + match_num
        else:
            match_num = str(i)
            match_id = 'SD' + match_num

        
        
        #print(match_id)
        #https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/2021/eventId/520/matchId/SD103/pointId/0_0_0

        api_url = 'https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/' + str(year) + '/eventId/520/matchId/' + match_id +'/pointId/0_0_0'
        get_json_file = requests.get(api_url)

        try:
            tracking_data_json = get_json_file.json()

        except ValueError:

            print('The Match {0} does not exist!'.format(match_id))

            continue
            
        if not tracking_data_json['courtVisionData']:
            print('The Match {0} is empty!'.format(match_id))
            
            continue

        file_name = 'wta_year_' + str(year)  + '_' + match_id + '_tracking_data.json'

        with open(file_name, 'w') as json_file:
            json.dump(tracking_data_json, json_file)

In [None]:
# -- Save Men's Singles matches
%%capture 
#save_tracking_data_roland_garros(year = 2021, num_matches=200)
#save_tracking_data_roland_garros(year = 2021, num_matches=500)
#save_tracking_data_roland_garros(year = 2020, num_matches=500)
#save_tracking_data_roland_garros(year = 2019, num_matches=500)

In [None]:
# -- Save Women's Singles matches
%%capture 
#save_tracking_data_roland_garros_wta(year = 2021, num_matches=200)
#save_tracking_data_roland_garros_wta(year = 2021, num_matches=500)
#save_tracking_data_roland_garros_wta(year = 2020, num_matches=500)
#save_tracking_data_roland_garros_wta(year = 2019, num_matches=500)

In [3]:
# -- Save Men's Qualifying matches
def save_tracking_data_roland_garros_qual_men(year, num_matches = 20):
    '''
    Args:
    -----
    tournament_id [int]
    year: [int]
    '''

    for i in range(1,num_matches):
        if i < 10:
            match_num = '0' + str(i)
            match_id = 'QM0' + match_num
        elif ( i >= 10) & (i < 100):
            match_num = str(i)
            match_id = 'QM0' + match_num
        else:
            match_num = str(i)
            match_id = 'QM' + match_num

        
        
        #print(match_id)

        api_url = 'https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/' + str(year) + '/eventId/520/matchId/' + match_id +'/pointId/0_0_0'
        get_json_file = requests.get(api_url)

        try:
            tracking_data_json = get_json_file.json()

        except ValueError:

            #print('The Match {0} does not exist!'.format(match_id))

            continue
            
        if not tracking_data_json['courtVisionData']:
            #print('The Match {0} is empty!'.format(match_id))
            
            continue

        file_name = 'atp_qual_year_' + str(year)  + '_' + match_id + '_tracking_data.json'

        with open(file_name, 'w') as json_file:
            json.dump(tracking_data_json, json_file)
            
            
            

def save_tracking_data_roland_garros_qual_women(year, num_matches = 20):
    '''
    Args:
    -----
    tournament_id [int]
    year: [int]
    '''

    for i in range(1,num_matches):
        if i < 10:
            match_num = '0' + str(i)
            match_id = 'QD0' + match_num
        elif ( i >= 10) & (i < 100):
            match_num = str(i)
            match_id = 'QD0' + match_num
        else:
            match_num = str(i)
            match_id = 'QD' + match_num

        
        
        #print(match_id)

        api_url = 'https://itp-rg-sls.infosys-platforms.com/prod/api/court-vision/year/' + str(year) + '/eventId/520/matchId/' + match_id +'/pointId/0_0_0'
        get_json_file = requests.get(api_url)

        try:
            tracking_data_json = get_json_file.json()

        except ValueError:

            #print('The Match {0} does not exist!'.format(match_id))

            continue
            
        if not tracking_data_json['courtVisionData']:
            #print('The Match {0} is empty!'.format(match_id))
            
            continue

        file_name = 'wta_qual_year_' + str(year)  + '_' + match_id + '_tracking_data.json'

        with open(file_name, 'w') as json_file:
            json.dump(tracking_data_json, json_file)


In [4]:
save_tracking_data_roland_garros_qual_men(year = 2021, num_matches=100)
save_tracking_data_roland_garros_qual_men(year = 2020, num_matches=100)
save_tracking_data_roland_garros_qual_men(year = 2019, num_matches=100)