In [181]:
import requests
import json
import pandas as pd
import os
from datetime import datetime
import re
import random 
import time 
from IPython.display import display, clear_output, HTML
import glob

In [108]:
# Test for the final match of men's China Smash 2025 
event_id = "3098"
match_code = "TTEMSINGLES-----------FNL-000100----------"

In [182]:
def get_match_details(event_id, match_code):
    """
    Fetches the detailed card for a single match using its event ID and match code.

    Args:
        event_id (int or str): The ID of the event.
        match_code (str): The unique document code for the match.

    Returns:
        dict: The JSON response data as a Python dictionary, or None if an error occurs.
    """
    # The URL is an f-string, allowing us to insert the arguments dynamically
    url = f"https://liveeventsapi.worldtabletennis.com/api/cms/GetMatchCardDetails/{event_id}/{match_code}?&use_live_match_cache=false"
    
    headers = {
        'Accept': 'application/json',
        'Accept-Language': 'en-GB,en;q=0.9,es;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'DNT': '1',
        'Origin': 'https://www.worldtabletennis.com',
        'Pragma': 'no-cache',
        'Referer': 'https://www.worldtabletennis.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Linux; Android 11.0; Surface Duo) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Mobile Safari/537.36',
        'sec-ch-ua': '"Chromium";v="140", "Not=A?Brand";v="24", "Google Chrome";v="140"',
        'sec-ch-ua-mobile': '?1',
        'sec-ch-ua-platform': '"Android"'
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status() # Raise an error for bad status codes
        return response.json()
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error for match {match_code}: {err}")
    except requests.exceptions.RequestException as err:
        print(f"An error occurred for match {match_code}: {err}")
        
    return None



  
    
    

In [183]:
events_df = pd.read_csv("Data/Processed/Events/20251007_events.csv")


In [184]:
payload_files = glob.glob("Data/Processed/Match_payloads/*singles_payloads.csv")
print(len(payload_files))

filtered_payload_files = [file for file in payload_files if not any(s in file for s in smash_ids_list)]


output_dir = "Data/Raw/Match_details"
os.makedirs(output_dir, exist_ok=True)


155


In [185]:
start_time = time.perf_counter()

print("--- 🟢 Commencing Match Detail Scraper 🟢---")

total_files = len(payload_files)
for file_number, file in enumerate(payload_files):   

    
    
    
    payloads_df = pd.read_csv(file)
    total_matches = len(payloads_df)   
  

    event_id = payloads_df["eventId"].iloc[0]    
    match_codes = payloads_df["documentCode"]
   

    output_path = f"{output_dir}/{event_id}_match_details"
    os.makedirs(output_path, exist_ok=True)
  

    for match_number, match_code in enumerate(match_codes):
        pause = random.uniform(0.01,0.1)
        match_details = get_match_details(event_id, match_code)
        
        file_path= f"{output_path}/{event_id}_{match_code}.json"
        with open (file_path,"w") as fp:
            json.dump(match_details,fp)
        time.sleep(pause)


        end_time = time.perf_counter()
        duration = end_time - start_time
        minutes, seconds = divmod(duration, 60)
        
        clear_output(wait=True)
        print("--- 🟢 Commencing Match Detail Scraper 🟢---")
        print(f"Processing Event {event_id}: {file_number + 1}/{total_files}")
        print(f"Processing Match:      {match_number + 1}/{total_matches}")       
        print (f"Obtained Match Code for {event_id}/{match_code}, pausing for {pause:.1f} s.")
        print(f"Total run time = {int(minutes)} m and {seconds:.0f} s.")
        
        

       
        
print(f"✅ Finished obtaining match deatails for {total_files} events") 




--- 🟢 Commencing Match Detail Scraper 🟢---
Processing Event 2534: 155/155
Processing Match:      180/180
Obtained Match Code for 2534/TTEMSINGLES-----------RND1001800----------, pausing for 0.1 s.
Total run time = 315 m and 1 s.
✅ Finished obtaining match deatails for 155 events


In [86]:
type(match_details)

dict

In [154]:
events = []
for direc in glob.glob(f"{output_dir}/*"):
    events. append({"event":direc, "count":len(os.listdir(direc))})

    

In [155]:
sorted(events, key = lambda x: x["event"])

[{'event': 'Data/Raw/Match_details/2536_match_details', 'count': 210},
 {'event': 'Data/Raw/Match_details/2629_match_details', 'count': 237},
 {'event': 'Data/Raw/Match_details/2904_match_details', 'count': 237},
 {'event': 'Data/Raw/Match_details/2932_match_details', 'count': 238},
 {'event': 'Data/Raw/Match_details/2942_match_details', 'count': 236},
 {'event': 'Data/Raw/Match_details/3082_match_details', 'count': 264},
 {'event': 'Data/Raw/Match_details/3085_match_details', 'count': 238},
 {'event': 'Data/Raw/Match_details/3098_match_details', 'count': 237},
 {'event': 'Data/Raw/Match_details/3128_match_details', 'count': 238}]