In [103]:
import json
import os
import random
import time
from datetime import datetime

import pandas as pd
import requests
from IPython.display import display, clear_output

In [6]:
def get_raw_match_payloads(event_id):

    """
    From an eventId, obtain a dataframe with all match payloads which can be used to then extract match details
    
    """
    url = "https://liveeventsapi.worldtabletennis.com/api/cms/GetOfficialResult"
    
    params = {
        'EventId': event_id,
        "DocumentCode": "TTE"
    }
    
   
    headers = {
        'accept': 'application/json, text/plain, */*',
        'accept-language': 'en-GB,en;q=0.9,es;q=0.8',
        'cache-control': 'no-cache',
        'dnt': '1',
        'origin': 'https://www.worldtabletennis.com',
        'pragma': 'no-cache',
        'priority': 'u=1, i',        
        'referer': 'https://www.worldtabletennis.com/',
        'sec-ch-ua': '"Chromium";v="140", "Not=A?Brand";v="24", "Google Chrome";v="140"',
        'sec-ch-ua-mobile': '?1',
        'sec-ch-ua-platform': '"Android"',
        'sec-fetch-dest': 'empty',
        'sec-fetch-mode': 'cors',
        'sec-fetch-site': 'same-site',
        'user-agent': 'Mozilla/5.0 (Linux; Android 11.0; Surface Duo) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Mobile Safari/537.36'
    }
    
    try:
        response = requests.get(url, params=params, headers=headers)        
        response.raise_for_status()
        return response.json()
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error occurred: {err}")
    except requests.exceptions.RequestException as err:
        print(f"An error occurred: {err}")
    
    return None






In [123]:
events_file = "Data/Processed/Events/smash_events.csv"
output_dir = "Data/Raw/Match_payloads"

os.makedirs(output_dir, exist_ok=True)

events_df = pd.read_csv(events_file)
event_total = len(events_df)
successful_event_ids=[]

for number, event_id in enumerate(events_df["EventId"]):
    
    print(f"---🟢Commencing obtaining raw match payloads from {events_file}---🟢 \n ------")
    
    raw_payloads = get_raw_match_payloads(event_id)
    if not raw_payloads:
        print(f"No match codes found for event_{event_id} ⚪({number+1}/{event_total}) - Skipping.\n---")
        continue

    # Simplified logic: The 'if not payloads_df.empty:' check is removed as it's redundant.
    payloads_df = pd.DataFrame(raw_payloads)
    
    successful_event_ids.append(event_id)
    
    filename = f"{output_dir}/{event_id}_match_payloads.csv"
    payloads_df.to_csv(filename, index=False)
    
    sleep_duration = random.uniform(0.3, 1.3)
    display(f"{len(payloads_df)} match codes obtained for event ID:{event_id} 🔄({number+1}/{event_total}) - now pausing for {sleep_duration:.1f}s to give the API a break \n ---")
    clear_output(wait=True)
    time.sleep(sleep_duration)

print(f"✅ Finished! \nMatch payloads found for {len(successful_event_ids)}/{event_total} events.")
print(f"Successful Event IDs: {successful_event_ids}")

✅ Finished! 
Match payloads found for 9/9 events.
Successful Event IDs: [2536, 2629, 2904, 2932, 2942, 3085, 3082, 3128, 3098]


In [94]:
successful_event_ids = [2345, 2603, 2593, 2619, 2737, 2728, 2705, 2899, 2941, 2983, 2940, 2877, 3086, 3087, 3094, 3097, 2410, 2487, 2502, 2503, 2504, 2531, 2532, 2539, 2574, 2568, 2605, 2606, 2589, 2699, 2698, 2697, 2693, 2691, 2692, 2721, 2722, 2739, 2775, 2742, 2794, 2861, 2867, 2865, 2934, 2869, 2868, 2871, 2872, 2873, 2997, 3084, 3088, 3089, 3090, 3092, 3121, 3175, 3096, 2519, 2521, 2522, 2534, 2569, 2570, 2540, 2592, 2541, 2542, 2658, 2630, 2701, 2700, 2702, 2703, 2704, 2750, 2724, 2723, 2740, 2738, 2734, 2733, 2735, 2755, 2732, 2878, 2879, 2893, 2894, 2880, 2882, 2881, 2883, 2911, 2884, 2965, 2885, 2886, 2887, 2889, 2964, 2912, 2890, 2891, 3026, 3020, 3030, 3024, 3025, 3069, 3023, 3058, 3177, 3178, 3131, 3027, 3028, 3199, 3031, 2516, 2627, 2776, 2866, 2947, 2536, 2629, 2904, 2932, 2942, 3085, 3082, 3128, 3098, 2411, 2489, 2533, 2591, 2696, 2695, 2694, 2777, 2862, 2863, 2870, 2900, 3083, 3133, 3091, 3093, 2346, 2660, 3108, 2937, 3109]

In [95]:
all_event_ids = list(events_df["EventId"])

In [100]:
unsuccessful_event_ids = [event for event in all_event_ids if event not in successful_event_ids]

In [101]:
print (unsuccessful_event_ids)

[3099, 3100, 3066, 3191, 3065, 3059, 3112, 3110, 3176, 2263, 2265]


In [107]:
unsuccessful_events_df = events_df[events_df["EventId"].isin(unsuccessful_event_ids)]

In [110]:
unsuccessful_events_df.sort_values(by=["StartDateTime"])

Unnamed: 0,PageLink,EventName,EventType,EventTypeId,Country,City,ContinentCode,Subcontinent,StartDateTime,EndDateTime,...,Comments,EventDateChangeId,EventId,FromStartDate,FromEndDate,ToStartDate,ToEndDate,ShowInCalendar,Type,Event_Tier_Name
162,,Dishang 2020 ITTF Women's World Cup,World Cup,29,China,Weihai,asia,east asia,2020-11-08T00:00:00,2020-11-10T00:00:00,...,,,2263,,,,,,,
163,,Dishang 2020 ITTF Men's World Cup,World Cup,29,China,Weihai,asia,east asia,2020-11-13T00:00:00,2020-11-15T00:00:00,...,,,2265,,,,,,,
157,,WTT Star Contender London 2025,WTT Star Contender,35,England,London,europe,northern europe,2025-10-21T00:00:00,2025-10-26T00:00:00,...,,,3110,,,,,,,WTT Series
16,,WTT Champions Montpellier 2025,WTT Champions,65,France,Montpellier,europe,western europe,2025-10-28T00:00:00,2025-11-02T00:00:00,...,,,3099,,,,,,,WTT Series
17,,WTT Champions Frankfurt 2025,WTT Champions,65,Germany,Frankfurt,europe,western europe,2025-11-04T00:00:00,2025-11-09T00:00:00,...,,,3100,,,,,,,WTT Series
122,,WTT Feeder Vila Nova de Gaia 2025,WTT Feeder,81,Portugal,Vila Nova de Gaia,europe,southern europe,2025-11-05T00:00:00,2025-11-09T00:00:00,...,,,3066,,,,,,,WTT Feeder Series
123,,WTT Feeder Gdansk 2025,WTT Feeder,81,Poland,Gdansk,europe,eastern europe,2025-11-10T00:00:00,2025-11-14T00:00:00,...,,,3191,,,,,,,WTT Feeder Series
158,,WTT Star Contender Muscat 2025,WTT Star Contender,35,Oman,Muscat,asia,west asia,2025-11-17T00:00:00,2025-11-22T00:00:00,...,,,3176,,,,,,,WTT Series
124,,WTT Feeder Düsseldorf II 2025,WTT Feeder,81,Germany,Düsseldorf,europe,western europe,2025-11-24T00:00:00,2025-11-27T00:00:00,...,,,3065,,,,,,,WTT Feeder Series
125,,WTT Feeder Parma 2025,WTT Feeder,81,Italy,Parma,europe,southern europe,2025-11-29T00:00:00,2025-12-03T00:00:00,...,,,3059,,,,,,,WTT Feeder Series


In [None]:
def filter_singles_match_payloads(match_payloads):
    singles_match_payloads= [match_payload for match_payload  in match_payloads if "Singles" in match_payload ["subEventType"]]
    
    return singles_match_payloads

In [7]:
def get_singles_match_payloads(event_id):
    match_codes = get_raw_match_payloads(event_id)
    if match_payloads:
        print(f"✅ Obtained {len(match_payloads)} match payloads")
    
    singles_match_payloads = filter_singles_match__payloads(match__payloads)
    if singles_match__payloadss:
        print(f"✅ Obtained {len(singles_match__payloads)} singles match payloadss")
        #print(json.dumps(singles_match__payloads[:2], indent=2))
    else: 
        print (f"{len(singles_match__payloads)} singles matches found") 
    return singles_match__payloads

In [120]:
def get_event_ids (event_file):
    events_data_df = pd.read_csv(event_file)
    event_ids = events_data_df["EventId"]
    return event_ids

def get_event_match_payloads(event_file,output_dir = "./Data/Raw/Smash_events"):
    event_ids = get_event_ids(event_file)
    event_count = len(event_ids)
    successful_event_ids = [] 
    
    for number, event_id,num in enumerate(event_ids):
        
        print (f"obtaining match codes for event {event_id} ({number}/{event_count})")
        
        match_payloads = pd.DataFrame(get_singles_match_codes(event_id))
        
        file_name = f"event_{event_id}_match_codes.csv"            
        match_codes.to_csv(f"{output_dir}/{file_name}", index=False)
        sleep_duration = random.uniform(0.5,1.5)
        print (f"Match codes obtained - now pausing for {sleep_duration:.1f}s to give the API a break \n --- {event_id} ")
        time.sleep(sleep_duration)
        

    return None
        
        
        
    

                        

In [189]:
import os
dir = "Data/Raw/Match_codes"

len(os.listdir(dir))

167

In [119]:
get_raw_match_payloads(1000)

[]