In [16]:
import aiohttp
import asyncio
from typing import Optional, Dict, Any, Tuple
import time
import random
import pandas as pd
import json 


MIN_PAUSE_COMMENTARY = 0.05
MAX_PAUSE_COMMENTARY = 0.20
REQUEST_TIMEOUT_S = 30 
SECAPIMKEY = "S_WTT_882jjh7basdj91834783mds8j2jsd81" 


async def fetch_match_commentary(session: aiohttp.ClientSession, 
                                 match_tuple: Tuple[int, str],
                                 min_pause: float,
                                 max_pause: float
                                ) -> Optional[Dict[str, Any]]:
    """
    (Async Worker) Fetches the raw match commentary JSON for a specific match.

    Args:
        session (aiohttp.ClientSession): The active session for making requests.
        match_tuple (Tuple[int, str]): The (event_id, match_code) to scrape.
        min_pause (float): Minimum politeness pause.
        max_pause (float): Maximum politeness pause.

    Returns:
        Optional[Dict[str, Any]]: The full raw JSON response, or None on failure.
    """
    # Random politeness pause BEFORE the request
    await asyncio.sleep(random.uniform(min_pause, max_pause))
    
    event_id, match_code = match_tuple
   
    url = f"https://wtt-website-api-prod-3-frontdoor-bddnb2haduafdze9.a01.azurefd.net/api/match/commentary/{event_id}/{match_code}"
    
   
    headers = {
        'accept': 'application/json, text/plain, */*',
        'origin': 'https://www.worldtabletennis.com',
        'referer': 'https://www.worldtabletennis.com/',
        'secapimkey':"S_WTT_882jjh7basdj91834783mds8j2jsd81",
        'user-agent': 'Mozilla/5.0 (Linux; Android 11.0; Surface Duo) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/141.0.0.0 Mobile Safari/537.36'
    }
    
    try:
        async with session.get(url, headers=headers, timeout=REQUEST_TIMEOUT_S) as response:
            response.raise_for_status() # Raise an error for bad status codes
            
            # Return the full raw JSON dictionary
            return await response.json()
            
    except Exception as e:
        # Logging errors (handled by the main orchestrator)
        # print(f"WARN: Commentary fetch failed for {match_code}: {type(e).__name__}")
        return None

In [17]:
match_tuple = (3090, "TTEMSINGLES-----------FNL-000100----------")

In [18]:
async with aiohttp.ClientSession() as session:
    match_com = await fetch_match_commentary(session, match_tuple, MIN_PAUSE_COMMENTARY, MAX_PAUSE_COMMENTARY)  

In [43]:
match_json = json.loads(match_com)

In [44]:
match_df = pd.DataFrame(match_json)

In [45]:
match_df.dropna(inplace=True)

In [46]:
match_df["eventId"] = match_df["event_doc_code"].str.split("_").str[0]
match_df["matchCode"] = match_df["event_doc_code"].str.split("_").str[1]

In [50]:
match_df.drop(columns=["id", "event_doc_code"],inplace=True)

In [51]:
match_df


Unnamed: 0,data,_ts,eventId,matchCode
0,"{'ACTIONTYPE': 'SL', 'UTCDATE': '2025-06-15T00...",1749996202,3090,TTEMSINGLES-----------FNL-000100----------
1,"{'ACTIONTYPE': 'SSD', 'UTCDATE': '2025-06-15T0...",1749996331,3090,TTEMSINGLES-----------FNL-000100----------
2,"{'ACTIONTYPE': 'SSR', 'UTCDATE': '2025-06-15T0...",1749996332,3090,TTEMSINGLES-----------FNL-000100----------
3,"{'ACTIONTYPE': 'SSR', 'UTCDATE': '2025-06-15T0...",1749996333,3090,TTEMSINGLES-----------FNL-000100----------
4,"{'ACTIONTYPE': 'LV', 'UTCDATE': '2025-06-15T00...",1749996419,3090,TTEMSINGLES-----------FNL-000100----------
...,...,...,...,...
291,"{'ACTIONTYPE': 'MPOINT', 'UTCDATE': '2025-06-1...",1749999277,3090,TTEMSINGLES-----------FNL-000100----------
292,"{'ACTIONTYPE': 'SSR', 'UTCDATE': '2025-06-15T0...",1749999278,3090,TTEMSINGLES-----------FNL-000100----------
293,"{'ACTIONTYPE': 'STP', 'UTCDATE': '2025-06-15T0...",1749999291,3090,TTEMSINGLES-----------FNL-000100----------
294,"{'ACTIONTYPE': 'PT', 'UTCDATE': '2025-06-15T00...",1749999296,3090,TTEMSINGLES-----------FNL-000100----------


In [56]:
match_actions_df= match_df['data'].str.get('ACTIONTYPE')

In [68]:
df_working = match_df[['data']].copy()
df_working['ACTIONTYPE'] = df_working['data'].str.get('ACTIONTYPE')
df_working['COMMENTARYTEXT'] = df_working['data'].str.get('COMMENTARYTEXT')

# 2. Select only the necessary columns for the final result
df_unique_actions = df_working[['ACTIONTYPE', 'COMMENTARYTEXT']].copy()

# 3. Drop duplicates based on 'ACTIONTYPE', keeping the first occurrence.
# This ensures you see the first unique comment for every type.
unique_actions_df = df_unique_actions.drop_duplicates(
    subset=['ACTIONTYPE'], 
    keep='first'
)

# 4. Display the results
print("--- Unique Action Types and Their First Corresponding Comment ---")
print(unique_actions_df.to_string(index=False))

--- Unique Action Types and Their First Corresponding Comment ---
ACTIONTYPE                                                                                                                                                 COMMENTARYTEXT
        SL                                           Start of Men's Singles - Final - Match 1 between WEN Ruibo and DUDA Benedikt in Sports Center Jane Sandanski Table 1
       SSD                                                                                                                        No commentary available for this action
       SSR                                                                                                                             Service change: WEN Ruibo to serve
        LV                                                                                                                       Men's Singles - Final - Match 1 Live now
       STP                                                                          

In [84]:
for item in match_df["data"]:
    num = item.get("SEQUENCENUMBER")
    action =  item.get("ACTIONTYPE")
    text = item.get("COMMENTARYTEXT")
    if text:
        if "duda" in text.lower() or "net" in text.lower():
            print(f"Sequence Number: {num} | Action Type: {action} | Commentary Text: {text}")

Sequence Number: 1 | Action Type: SL | Commentary Text: Start of Men's Singles - Final - Match 1 between WEN Ruibo and DUDA Benedikt in Sports Center Jane Sandanski Table 1
Sequence Number: 10 | Action Type: SSR | Commentary Text: Service change: DUDA Benedikt to serve
Sequence Number: 12 | Action Type: PT | Commentary Text: Point won by DUDA Benedikt, Game G1, Game Scores 2-1,0-0,0-0,0-0,0-0,0-0,0-0, Match Score 0-0
Sequence Number: 14 | Action Type: PT | Commentary Text: Point won by DUDA Benedikt, Game G1, Game Scores 2-2,0-0,0-0,0-0,0-0,0-0,0-0, Match Score 0-0
Sequence Number: 20 | Action Type: SSR | Commentary Text: Service change: DUDA Benedikt to serve
Sequence Number: 22 | Action Type: PT | Commentary Text: Point won by DUDA Benedikt, Game G1, Game Scores 4-3,0-0,0-0,0-0,0-0,0-0,0-0, Match Score 0-0
Sequence Number: 30 | Action Type: SSR | Commentary Text: Service change: DUDA Benedikt to serve
Sequence Number: 32 | Action Type: PT | Commentary Text: Point won by DUDA Benedikt