# Functions to extract information from json files

In [20]:
import json
import pandas as pd
import os

pd.set_option('display.max_columns', None)
pd.set_option("max_rows", None)

In [2]:
def get_match_level_info(tracking_data_json):
    '''
    Args:
    -----
    tracking_data_json [json]: Json file
    
    Returns:
    --------
    dict of row to append into a dataframe
    '''
    
    # Get to the crux of the information
    tracking_data_dict = tracking_data_json['courtVisionData'][0]
    
    #atp_tracking_data_dict['isMatchComplete']


    # -----------------------------
    # Player information
    # -----------------------------
    player1_info = tracking_data_dict['playersData']['playerTeam']
    player2_info = tracking_data_dict['playersData']['opponentTeam']

    
    match_dict = dict(
        player1 = player1_info[0]['name'],
        player2 = player2_info[0]['name'],
        player1_id = player1_info[0]['id'],
        player1_country = player1_info[0]['country'],
        player1_seed = player1_info[0]['seed'],
        player2_id = player2_info[0]['id'],
        player2_country = player2_info[0]['country'],
        player2_seed = player2_info[0]['seed'],
        
    # -------------------------------
    # - I have no idea what this is
    # -------------------------------
        point_id = tracking_data_dict['pointId'],
        court_name = tracking_data_dict['courtName'],
        court_id = tracking_data_dict['courtId'],
        num_sets_completed = tracking_data_dict['setsCompleted'],
        
        # Mens/Womens Singles/ Doubles
        match_type = tracking_data_dict['eventType'],

        # Complete status?
        match_status = tracking_data_dict['matchStatus']
        
        
    )
    
    return match_dict
    
    
    

In [15]:
### Loop through all json files in directory
data_list = []

for filename in os.listdir('./json_data/'):
    if filename.endswith(".json"): 
         with open('./json_data/' + filename) as file_name:
                tracking_data_json = json.load(file_name)
                data_list.append(get_match_level_info(tracking_data_json))
    else:
        continue

pd.DataFrame(data_list)


Unnamed: 0,player1,player2,player1_id,player1_country,player1_seed,player2_id,player2_country,player2_seed,point_id,court_name,court_id,num_sets_completed,match_type,match_status
0,I.SWIATEK,S.KENIN,40613,POL,,35854,USA,4,2_7_6,Court Philippe CHATRIER,1,2,Women's Singles,C
1,N.DJOKOVIC,R.NADAL,9801,SRB,1.0,7792,ESP,2,3_9_9,Court Philippe CHATRIER,1,3,Men's Singles,C


### Notes:
-----

* I suspect `point_id` is the last point played ?
    * Ex: `2_7_6` is | Set 2 | Game 7 | 6 ? |
    * Ex: `3_9_9` is | Set 3 | Game ? Nadal vs Djokovic was game 12... | ... |

### Rough Notes

In [2]:
# Open one tracking json file
with open('json_data/year_2020_SM001_tracking_data.json') as filename:
  atp_tracking_data_json = json.load(filename)

In [3]:
atp_tracking_data_dict = atp_tracking_data_json['courtVisionData'][0]
atp_tracking_data_dict.keys()

dict_keys(['pointsData', 'eventType', 'matchStatus', 'playersData', 'pointId', 'isMatchComplete', 'statsData', 'courtName', 'courtId', 'setsCompleted'])

In [19]:
# *********************************************************************************
#                Scrape match stats (anything that isn't point-by-point)
# *********************************************************************************

# Mens/Womens Singles/ Doubles
atp_tracking_data_dict['eventType']

# Complete status?
atp_tracking_data_dict['matchStatus']
atp_tracking_data_dict['isMatchComplete']


# -----------------------------
# Player information
# -----------------------------
player1_info = atp_tracking_data_dict['playersData']['playerTeam']
player2_info = atp_tracking_data_dict['playersData']['opponentTeam']

player1 = player1_info[0]['name']
player1_id = player1_info[0]['id']
player1_country = player1_info[0]['country']
player1_seed = player1_info[0]['seed']

player2 = player2_info[0]['name']
player2_id = player2_info[0]['id']
player2_country = player2_info[0]['country']
player2_seed = player2_info[0]['seed']

# -------------------------------
# - I have no idea what this is
# -------------------------------
point_id = atp_tracking_data_dict['pointId']

court_name = atp_tracking_data_dict['courtName']

court_id = atp_tracking_data_dict['courtId']

num_sets_completed = atp_tracking_data_dict['setsCompleted']
num_sets_completed

3

### Play-by-Play processing 

In [65]:
def get_point_level_info(one_point_sequence):
    '''
    Args:
    -----
    one_point_sequence [dict]: Dictionary
    
    Returns:
    --------
    dict of row to append into a dataframe
    
    Notes:
    ------
    Don't convert them to integers...yet
    '''
    
    serve_speed_kph = one_point_sequence['ballSpeedFrench']
    if ( (serve_speed_kph == '0') | serve_speed_kph == 'NA' ):
        serve_speed_kph = one_point_sequence['returnSpeedFrench']
        
    serve_speed_v2 = one_point_sequence['ballSpeed']
    
    if ( (serve_speed_v2 == '0') | serve_speed_v2 == 'NA' ):
        serve_speed_v2 = one_point_sequence['returnSpeed']
    
    point_dict = dict(
        # Match situation information
        #point_ID_v2 = one_point_sequence['id'],
        point_ID = one_point_sequence['pointId'],
        set_num = one_point_sequence['set'],
        #set_num_v2 = one_point_sequence['setNumber'],
        game_num = one_point_sequence['game'], 
        point_number = one_point_sequence['point'],
        #point_number_v2 = one_point_sequence['pointNumber'],
        serve_number = one_point_sequence['serve'],
        
        # players involved
        server_id = one_point_sequence['serverId'],
        returner_id = one_point_sequence['receiverId'],
        point_winner_id = one_point_sequence['scorerId'],
        court_side = one_point_sequence['court'],
        
        # Serve Stats
        serve_speed_kph = serve_speed_kph,
        serve_speed_v2 = serve_speed_v2,
        serve_type = one_point_sequence['serveType'],
        return_placement = one_point_sequence['returnPlacement'],
        
        
        # How point ended
        rally_length = one_point_sequence['rallyLength'],
        point_end_type = one_point_sequence['pointEndType'],
        error_type = one_point_sequence['errorType'],
        trapped_by_net = one_point_sequence['trappedByNet'],
        distanceOutsideCourt = one_point_sequence['distanceOutsideCourt'],
        distanceOutsideCourt_fr = one_point_sequence['distanceOutsideCourtFrench'],
        strokeType = one_point_sequence['strokeType'],
        hand = one_point_sequence['hand'],
        
        last_stroke_net_clearance_ft = one_point_sequence['heightAboveNet'],
        last_stroke_net_clearance_m = one_point_sequence['heightAboveNetFrench'],
        winner_placement = one_point_sequence['winnerPlacement'],
        unforcedErrorPlacement = one_point_sequence['unforcedErrorPlacement'],
        is_break_point = one_point_sequence['breakPoint'],
        is_break_point_connverted = one_point_sequence['breakPointConverted'],
        runAroundForeHand = one_point_sequence['runAroundForeHand'],

        
        # unknowns
        spin = one_point_sequence['spin'],
        cruciality = one_point_sequence['cruciality'],

        
        # Tracking info
        serveBounceCordinate_x = one_point_sequence['serveBounceCordinate']['x'],
        serveBounceCordinate_y = one_point_sequence['serveBounceCordinate']['y'],
        serveBounceCordinate_z = one_point_sequence['serveBounceCordinate']['z'],
        
        # (initial) Ball coordinate on last shot 
        ballHitCordinate_x = one_point_sequence['ballHitCordinate']['x'],
        ballHitCordinate_y = one_point_sequence['ballHitCordinate']['y'],
        ballHitCordinate_z = one_point_sequence['ballHitCordinate']['z'],
        
        # Ball coordinate on its last bounce of rally
        ballBounceCordinate_x = one_point_sequence['ballBounceCordinate']['x'],
        ballBounceCordinate_y = one_point_sequence['ballBounceCordinate']['y'],
        ballBounceCordinate_z = one_point_sequence['ballBounceCordinate']['z']
    )
    
    return point_dict

In [68]:
# Open one tracking json file
with open('json_data/year_2020_SM001_tracking_data.json') as filename:
  atp_tracking_data_json = json.load(filename)

atp_tracking_data_dict = atp_tracking_data_json['courtVisionData'][0]
point_id = '1_1_6_1'
one_point_sequence = atp_tracking_data_dict['pointsData'][point_id]


get_point_level_info(one_point_sequence)

{'point_ID': '1_1_6_1',
 'set_num': '1',
 'set_num_v2': '1',
 'game_num': '1',
 'point_number': '6',
 'serve_number': '1',
 'server_id': '9801',
 'returner_id': '7792',
 'point_winner_id': '7792',
 'court_side': 'AdCourt',
 'serve_speed_kph': '190 KPH',
 'serve_speed_v2': '190 KPH',
 'serve_type': 'Flat',
 'return_speed': '190 KPH',
 'return_placement': 3.5097500000000004,
 'spin': '3162.74',
 'distanceOutsideCourt': 'NA',
 'distanceOutsideCourt_fr': 'NA',
 'hand': 'BackHand',
 'cruciality': 'false',
 'unforcedErrorPlacement': 'NA',
 'strokeType': 'Ground',
 'rally_length': 10,
 'is_break_point': False,
 'is_break_point_connverted': False,
 'point_end_type': 'Winner',
 'error_type': 'NA',
 'winner_placement': 'Cross Court',
 'trapped_by_net': False,
 'runAroundForeHand': False,
 'last_stroke_net_clearance_ft': '5.01 Feet',
 'last_stroke_net_clearance_m': '1.53 Metre',
 'serveBounceCordinate_x': 5.643,
 'serveBounceCordinate_y': -0.478,
 'serveBounceCordinate_z': 0.039,
 'ballHitCordina

In [28]:
def get_match_point_level_info(raw_json_file):
    '''
    Args:
    -----
    one_point_sequence [dict]: Dictionary
    
    Returns:
    --------
    dict of row to append into a dataframe
    '''
    all_tracking_data_dict = raw_json_file['courtVisionData'][0]['pointsData']
    
    data_list = []
    for point_id_key in sorted(all_tracking_data_dict.keys()):
        #print(point_id_key)
        data_list.append( get_point_level_info( all_tracking_data_dict[point_id_key] ) )
    
    match_point_df = pd.DataFrame(data_list)
    return match_point_df



In [29]:
test_match = get_match_point_level_info(atp_tracking_data_json)

In [64]:
test_match
#test_match[['runAroundForeHand']]


Unnamed: 0,point_ID,point_ID_v2,set_num,game_num,point_number,serve_number,server_id,returner_id,point_winner_id,court_side,serve_speed_kph,serve_speed_v2,serve_type,serve_net_clearance_ft,serve_net_clearance_m,return_speed,return_placement,spin,distanceOutsideCourt,distanceOutsideCourt_fr,hand,cruciality,unforcedErrorPlacement,strokeType,rally_length,is_break_point,is_break_point_connverted,point_end_type,error_type,winner_placement,trapped_by_net,runAroundForeHand,serveBounceCordinate_x,serveBounceCordinate_y,serveBounceCordinate_z,ballHitCordinate_x,ballHitCordinate_y,ballHitCordinate_z,ballBounceCordinate_x,ballBounceCordinate_y,ballBounceCordinate_z
0,1,1_1_1_1,1,1,1,1,9801,7792,,DeuceCourt,0 KPH,,,,0 Metre,,,0.0,,,BackHand,False,,,0,False,False,Faulty Serve,,,False,False,,,,,,,,,
1,1,1_1_1_2,1,1,1,2,9801,7792,9801.0,DeuceCourt,136 KPH,136 KPH,,,0 Metre,,,0.0,,,ForeHand,False,,,3,False,False,Unforced Error,,,False,False,,,,,,,,,
2,2,1_1_2_1,1,1,2,1,9801,7792,9801.0,DeuceCourt,0 KPH,,,,0 Metre,,,0.0,,,BackHand,False,,,0,False,False,Faulty Serve,,,False,False,,,,,,,,,
3,2,1_1_2_2,1,1,2,2,9801,7792,7792.0,AdCourt,0 KPH,,,,0 Metre,,,0.0,,,BackHand,False,,,6,False,False,Forced Error,,,False,False,,,,,,,,,
4,3,1_1_3_1,1,1,3,1,9801,7792,7792.0,DeuceCourt,0 KPH,,,,0 Metre,,,0.0,,,BackHand,False,,,0,False,False,Faulty Serve,,,False,False,,,,,,,,,
5,1,1_1_3_2,1,1,3,2,9801,7792,9801.0,DeuceCourt,137 KPH,137 KPH,,,0 Metre,,,0.0,,,BackHand,False,,,5,False,False,Forced Error,,,False,False,,,,,,,,,
6,2,1_1_4_2,1,1,4,2,9801,7792,9801.0,AdCourt,153 KPH,153 KPH,,,0 Metre,,,0.0,,,BackHand,False,,,11,False,False,Winner,,,False,False,,,,,,,,,
7,1,1_1_5_1,1,1,5,1,9801,7792,7792.0,DeuceCourt,,,,,,194 KPH,,0.0,,,BackHand,False,,,2,False,False,Unforced Error,,,False,False,,,,,,,,,
8,2,1_1_6_1,1,1,6,1,9801,7792,7792.0,AdCourt,190 KPH,190 KPH,Flat,5.01 Feet,1.53 Metre,190 KPH,3.50975,3162.74,,,BackHand,False,,Ground,10,False,False,Winner,,Cross Court,False,False,5.643,-0.478,0.039,11.172,2.161,1.04,-9.23,-4.023,0.033
9,3,1_1_7_1,1,1,7,1,9801,7792,7792.0,DeuceCourt,174 KPH,174 KPH,Unclassified,,,174 KPH,4.3955,1214.72,,,ForeHand,False,,Ground,10,False,False,Forced Error,,,False,False,5.36,3.922,0.038,-15.478,-2.848,0.98,11.928,3.185,0.041
