In [13]:
import json
import os
import random
import time
from datetime import datetime
import re
import pandas as pd
import requests

from typing import Union, Dict

In [14]:
def get_raw_match_payloads(event_id: Union[int, str]) -> Union[Dict, None]:
    """
    Fetches the raw JSON containing all "match payloads" containing match metadata
    for a given WTT event ID using a GET request.
    
    Match-codes contained in payload are reequired for subsequent API call to get full match details.
    
    Reverse engineered from WTT events pages such as:
    https://www.worldtabletennis.com/eventInfo?eventId=3085&selectedTab=Matches
    
    
    Args:
        event_id (int or str): The unique identifier for the WTT event.
        
    Returns:
        dict or None: A dictionary containing the raw JSON match structure, or None on failure.
    """
    
    url = "https://liveeventsapi.worldtabletennis.com/api/cms/GetOfficialResult"
    
    params = {
        'EventId': str(event_id), # Ensure it is a string for the query
        "DocumentCode": "TTE"
    }
    
    # Define essential headers for robust scraping
    headers = {
        'Accept': 'application/json, text/plain, */*',
        'Referer': 'https://www.worldtabletennis.com/',
        'User-Agent': 'Mozilla/5.0 (Linux; Android 11.0; Surface Duo) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Mobile Safari/537.36'
    }
    
    try:
        # Execute GET Request. 'params' automatically builds the query string.
        response = requests.get(url, params=params, headers=headers)
        response.raise_for_status()
        return response.json()
        
    except requests.exceptions.HTTPError as err:
        print(f"❌ HTTP Error for Event {event_id}: {err}")
    except requests.exceptions.RequestException as err:
        print(f"❌ Connection Error for Event {event_id}: {err}")
    except json.JSONDecodeError:
        print(f"❌ JSON Decode Error for Event {event_id}: Invalid response.")
    
    return None

In [15]:
events_file = "Data/Processed/Events/smash_events.csv"
output_dir = "Data/Raw/Smash_match_payloads"

os.makedirs(output_dir, exist_ok=True)

events_df = pd.read_csv(events_file)
event_total = len(events_df)
successful_event_ids=[]

for number, event_id in enumerate(events_df["EventId"]):
    
    print(f"---🟢Commencing obtaining raw match payloads from {events_file}---🟢 \n ------")
    
    raw_payloads = get_raw_match_payloads(event_id)
    if not raw_payloads:
        print(f"No match codes found for event_{event_id} ⚪({number+1}/{event_total}) - Skipping.\n---")
        continue

    # Simplified logic: The 'if not payloads_df.empty:' check is removed as it's redundant.
    payloads_df = pd.DataFrame(raw_payloads)
    
    successful_event_ids.append(event_id)
    
    filename = f"{output_dir}/{event_id}_match_payloads.csv"
    payloads_df.to_csv(filename, index=False)
    
    sleep_duration = random.uniform(0.3, 0.8)
    display(f"{len(payloads_df)} match codes obtained for event ID:{event_id} 🔄({number+1}/{event_total}) - now pausing for {sleep_duration:.1f}s to give the API a break \n---")
    clear_output(wait=True)
    time.sleep(sleep_duration)

print(f"✅ Finished! \nMatch payloads found for {len(successful_event_ids)}/{event_total} events.")
print(f"Successful Event IDs: {successful_event_ids}")

✅ Finished! 
Match payloads found for 9/9 events.
Successful Event IDs: [2536, 2629, 2904, 2932, 2942, 3085, 3082, 3128, 3098]


In [126]:
len(successful_event_ids)

155

In [127]:
all_event_ids = list(events_df["EventId"])

In [128]:
unsuccessful_event_ids = [event for event in all_event_ids if event not in successful_event_ids]

In [129]:
print (unsuccessful_event_ids)

[3099, 3100, 3066, 3191, 3065, 3059, 3112, 3110, 3176, 2263, 2265]


In [134]:
unsuccessful_events_df = events_df[events_df["EventId"].isin(unsuccessful_event_ids)]

In [135]:
succ_dir="Data/Raw/Match_payloads"
print (len(os.listdir(succ_dir)))

156


In [136]:
len(successful_event_ids)

155

In [137]:
successful_event_ids.sort()

In [146]:
len(successful_event_ids)

155

In [150]:
success_event_file_numbers = []
for file in os.listdir(succ_dir):
    only_numbers = re.sub(r'[^0-9]', '', file)
    success_event_file_numbers.append(only_numbers)

print(len(success_event_file_numbers))
success_event_file_numbers.sort()

    
    

156


In [153]:
success_event_file_numbers[-1]

'3199'

In [7]:
def get_singles_match_payloads(event_id):
    match_codes = get_raw_match_payloads(event_id)
    if match_payloads:
        print(f"✅ Obtained {len(match_payloads)} match payloads")
    
    singles_match_payloads = filter_singles_match__payloads(match__payloads)
    if singles_match__payloadss:
        print(f"✅ Obtained {len(singles_match__payloads)} singles match payloadss")
        #print(json.dumps(singles_match__payloads[:2], indent=2))
    else: 
        print (f"{len(singles_match__payloads)} singles matches found") 
    return singles_match__payloads

In [120]:
def get_event_ids (event_file):
    events_data_df = pd.read_csv(event_file)
    event_ids = events_data_df["EventId"]
    return event_ids

def get_event_match_payloads(event_file,output_dir = "./Data/Raw/Smash_events"):
    event_ids = get_event_ids(event_file)
    event_count = len(event_ids)
    successful_event_ids = [] 
    
    for number, event_id,num in enumerate(event_ids):
        
        print (f"obtaining match codes for event {event_id} ({number}/{event_count})")
        
        match_payloads = pd.DataFrame(get_singles_match_codes(event_id))
        
        file_name = f"event_{event_id}_match_codes.csv"            
        match_codes.to_csv(f"{output_dir}/{file_name}", index=False)
        sleep_duration = random.uniform(0.5,1.5)
        print (f"Match codes obtained - now pausing for {sleep_duration:.1f}s to give the API a break \n --- {event_id} ")
        time.sleep(sleep_duration)
        

    return None
        
        
        
    

                        

In [189]:
import os
dir = "Data/Raw/Match_codes"

len(os.listdir(dir))

167

In [119]:
get_raw_match_payloads(1000)

[]