In [107]:
import requests
import json
import pandas as pd
import os
from datetime import datetime
import re
import random 
import time 
from IPython.display import display, clear_output, HTML
import glob

In [108]:
# Test for the final match of men's China Smash 2025 
event_id = "3098"
match_code = "TTEMSINGLES-----------FNL-000100----------"

In [109]:
def get_match_details(event_id, match_code):
    """
    Fetches the detailed card for a single match using its event ID and match code.

    Args:
        event_id (int or str): The ID of the event.
        match_code (str): The unique document code for the match.

    Returns:
        dict: The JSON response data as a Python dictionary, or None if an error occurs.
    """
    # The URL is an f-string, allowing us to insert the arguments dynamically
    url = f"https://liveeventsapi.worldtabletennis.com/api/cms/GetMatchCardDetails/{event_id}/{match_code}?&use_live_match_cache=false"
    
    headers = {
        'Accept': 'application/json',
        'Accept-Language': 'en-GB,en;q=0.9,es;q=0.8',
        'Cache-Control': 'no-cache',
        'Connection': 'keep-alive',
        'Content-Type': 'application/json',
        'DNT': '1',
        'Origin': 'https://www.worldtabletennis.com',
        'Pragma': 'no-cache',
        'Referer': 'https://www.worldtabletennis.com/',
        'Sec-Fetch-Dest': 'empty',
        'Sec-Fetch-Mode': 'cors',
        'Sec-Fetch-Site': 'same-site',
        'User-Agent': 'Mozilla/5.0 (Linux; Android 11.0; Surface Duo) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Mobile Safari/537.36',
        'sec-ch-ua': '"Chromium";v="140", "Not=A?Brand";v="24", "Google Chrome";v="140"',
        'sec-ch-ua-mobile': '?1',
        'sec-ch-ua-platform': '"Android"'
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status() # Raise an error for bad status codes
        return response.json()
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error for match {match_code}: {err}")
    except requests.exceptions.RequestException as err:
        print(f"An error occurred for match {match_code}: {err}")
        
    return None



  
    
    

In [117]:
payload_files = glob.glob("Data/Processed/Match_payloads/*singles_payloads.csv")


output_dir = "Data/Raw/Match_details"
os.makedirs(output_dir, exist_ok=True)

print(payload_files)

['Data/Processed/Match_payloads/2629_singles_payloads.csv', 'Data/Processed/Match_payloads/3085_singles_payloads.csv', 'Data/Processed/Match_payloads/2904_singles_payloads.csv', 'Data/Processed/Match_payloads/3128_singles_payloads.csv', 'Data/Processed/Match_payloads/2942_singles_payloads.csv', 'Data/Processed/Match_payloads/3098_singles_payloads.csv', 'Data/Processed/Match_payloads/2536_singles_payloads.csv', 'Data/Processed/Match_payloads/3082_singles_payloads.csv', 'Data/Processed/Match_payloads/2932_singles_payloads.csv']


In [123]:
start_time = time.perf_counter()

print("--- 🟢 Commencing Match Detail Scraper 🟢---")

total_files = len(payload_files)

for file_number, file in enumerate(payload_files):   

    
    
    
    payloads_df = pd.read_csv(file)
    total_matches = len(payloads_df)   
  

    event_id = payloads_df["eventId"].iloc[0]    
    match_codes = payloads_df["documentCode"]
   

    output_path = f"{output_dir}/{event_id}_match_details"
    os.makedirs(output_path, exist_ok=True)
  

    for match_number, match_code in enumerate(match_codes):
        pause = random.uniform(0.3,0.8)
        match_details = get_match_details(event_id, match_code)
        
        file_path= f"{output_path}/{event_id}_{match_code}.json"
        with open (file_path,"w") as fp:
            json.dump(match_details,fp)
        time.sleep(pause)


        end_time = time.perf_counter()
        duration = end_time - start_time
        minutes, seconds = divmod(duration, 60)
        
        clear_output(wait=True)
        print("--- 🟢 Commencing Match Detail Scraper 🟢---")
        print(f"Processing Event {event_id}: {file_number + 1}/{total_files}")
        print(f"Processing Match:      {match_number + 1}/{total_matches}")       
        print (f"Obtained Match Code for {event_id}/{match_code}, pausing for {pause:.1f} s.")
         print(f"Total run time = {int(minutes)} and {seconds:1f} seconds.")
        
        

       
        
        




--- 🟢 Commencing Match Detail Scraper 🟢---
Processing Event 2629: 1/9
Processing Match:      5/237
Total run time = 0 and 7.164108 seconds.
Obtained Match Code for 2629/TTEWSINGLES-----------SFNL000100----------, pausing for 0.7 s.


KeyboardInterrupt: 

In [86]:
type(match_details)

dict

In [None]:
wds