In [25]:
import requests
import json
import pandas as pd
import os
from datetime import datetime
import re
import random 
import time 
from IPython.display import display, clear_output
import glob

In [26]:
def get_event_equipment(event_id):
    """
    Fetches the equipment details (like table and ball sponsor) for a specific event.

    Args:
        event_id (int or str): The ID of the event to fetch data for.

    Returns:
        dict: The JSON response data as a Python dictionary, or None if an error occurs.
    """
    # The URL is an f-string, allowing us to insert the event_id dynamically
    url = f"https://wtt-website-api-prod-3-frontdoor-bddnb2haduafdze9.a01.azurefd.net/api/cms/GetEventEquipmentwithLogo/{event_id}"
    
    # All the headers from your cURL command
    headers = {
        "accept": "application/json, text/plain, */*",
        "accept-language": "en-GB,en;q=0.9,es=q=0.8",
        "cache-control": "no-cache",
        "dnt": "1",
        "origin": "https://www.worldtabletennis.com",
        "pragma": "no-cache",
        "priority": "u=1, i",
        "referer": "https://www.worldtabletennis.com/",
        "sec-ch-ua": "\"Chromium\";v=\"140\", \"Not=A?Brand\";v=\"24\", \"Google Chrome\";v=\"140\"",
        "sec-ch-ua-mobile": "?1",
        "sec-ch-ua-platform": "\"Android\"",
        "sec-fetch-dest": "empty",
        "sec-fetch-mode": "cors",
        "sec-fetch-site": "cross-site",
        "secapimkey": "S_WTT_882jjh7basdj91834783mds8j2jsd81",
        "user-agent": "Mozilla/5.0 (Linux; Android 11.0; Surface Duo) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/140.0.0.0 Mobile Safari/537.36"
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status() # Raise an error for bad status codes (4xx or 5xx)
        return response.json()
    except requests.exceptions.HTTPError as err:
        print(f"HTTP Error occurred for event {event_id}: {err}")
    except requests.exceptions.RequestException as err:
        print(f"An error occurred for event {event_id}: {err}")
    
    return None

In [27]:
#Search folder with match payloads in in. Obtain all csv files. 
singles_payload_directory = "Data/Processed/Match_payloads"
payload_files = glob.glob(f"{singles_payload_directory}/*.csv")


#Obtain the event_ids from the file names!
successful_event_ids = []

for file in payload_files:    
    payloads_df = pd.read_csv(file)
    event_id = payloads_df["eventId"][0]
    successful_event_ids.append((event_id))  


#Obtain the events shortlist and extract only the succuessful events from successful_event_ids.
events_shortlist = "Data/Processed/Events/shortlist_events.csv"
shortlist_df = pd.read_csv(events_shortlist)
final_events_df = shortlist_df[shortlist_df["EventId"].isin(successful_event_ids)]


#Now add event table and ball sponsors to each event 

additional_data_list = []
event_total = len(final_events_df)

print(f"---🟢Commencing obtaining event sponsors from {events_shortlist}")

for number, (index, event) in enumerate(final_events_df.iterrows()):
    event_id = event["EventId"]

    match_payloads_file = f"{singles_payload_directory}/{event_id}_singles_payloads.csv"
    match_payloads_df = pd.read_csv(match_payloads_file)
    
    womens_matches = payloads_df[payloads_df["subEventType"] == "Women Singles"]
    num_womens_matches = len(womens_matches)
    mens_matches = payloads_df[payloads_df["subEventType"] == "Men Singles"]
    num_mens_matches = len(mens_matches)
   

    
    
    equipment_json = get_event_equipment(event_id)
    sleep_duration = random.uniform(0.3, 0.8)
    display(f"Obtaining sponsors for event ID:{event_id} 🔄({number+1}/{event_total}) - pausing for {sleep_duration:.1f}s to give the API a break ---")
    clear_output(wait=True)
    time.sleep(sleep_duration)
    
    

    table_sponsor = None
    ball_sponsor = None 
    if equipment_json:
        try: 
            sponsors_df = pd.DataFrame(equipment_json)

            table_sponsor_series = sponsors_df[sponsors_df["sponsorTypeName"] == "Official Table"]["sponsorLink"]
            ball_sponsor_series = sponsors_df[sponsors_df["sponsorTypeName"] == "Official Ball"]["sponsorLink"]
            table_sponsor = table_sponsor_series.iloc[0]
            ball_sponsor = ball_sponsor_series.iloc[0]
            
        except(KeyError,IndexError):
            print(f"    -> Could not parse sponsor data for event {event_id}.")
        
        

    sponsor_data_list.append({
    "EventId": event_id,
    "TableSponsorLink": table_sponsor,
    "BallSponsorLink": ball_sponsor,
    "Mens Match Count":num_mens_matches,
    "Womens Match Count": num_womens_matches    
    })

print("✅ Completed getting sponsors")
sponsors_to_add_df = pd.DataFrame(sponsor_data_list)

enriched_events_df = pd.merge(final_events_df,sponsors_to_add_df)  





✅ Completed getting sponsors


In [None]:
enri

In [22]:
enriched_events_df = test_df.copy()

# Manually determined map for sponsors, needs to be manually updated as required. 
sponsor_map = {
    'https://www.dhs-sportsglobal.com/': 'DHS',
    'http://www.dhs-sports.com/': 'DHS',
    'https://en.dhs-sports.com/': 'DHS',
    'https://www.doublefish.com/': 'Double Fish',
    'https://www.doublefish.com': 'Double Fish',
    'http://www.yinhe1986.cn/': 'Yinhe',
    'http://www.dhs-sportsglobal.com': 'DHS',
    'https://joola.com/pages/table-tennis?srsltid=AfmBOoquv7qYw0rrAhtEXdZaAsWcFDBkpTNksiGhnOxFif6xCoAajsx_': 'Joola'
}

sponsored_events_df["BallSponsor"] = sponsored_events_df["BallSponsorLink"].map(sponsor_map)
sponsored_events_df["TableSponsor"] = sponsored_events_df["TableSponsorLink"].map(sponsor_map)


#convert time to datetime object and print in readable form as only date is required. 
sponsored_events_df["StartDate"] = pd.to_datetime(sponsored_events_df["StartDateTime"])
sponsored_events_df["StartDate"] = sponsored_events_df["StartDate"].dt.normalize()


#drop columns I don't need starting with any empty columns

sponsored_events_df.dropna(axis=1, how='all', inplace=True)
sponsored_events_df.reset_index()

columns_to_drop = [ 
    "StartDateTime"
    "EndDateTime",
    "Event_Tier_Name",
    "BallSponsorLink",
    "TableSponsorLink",
    "StartDateTime",
    "EndDateTime",
    "EventTypeId",
    
]

sponsored_events_df.drop(columns = columns_to_drop, inplace = True, errors = 'ignore')

In [23]:
#### Save the final events df with sponsor information
date = datetime.now()
date = date.strftime("%Y%m%d")

output_dir = "Data/Processed/Events"
file_name = f"{output_dir}/{date}_events.csv"



sponsored_events_df.to_csv(file_name,index=False)


In [24]:
sponsored_events_df

Unnamed: 0,EventName,EventType,Country,City,ContinentCode,Subcontinent,EventId,Mens Match Count,Womens Match Count,BallSponsor,TableSponsor,StartDate
0,Singapore Smash 2022,WTT Grand Smash,Singapore,Singapore,asia,south east asia,2536,,,Double Fish,DHS,2022-03-07
1,Singapore Smash 2022,WTT Grand Smash,Singapore,Singapore,asia,south east asia,2536,119.0,119.0,Double Fish,DHS,2022-03-07
2,Singapore Smash 2023,WTT Grand Smash,Singapore,Singapore,asia,south east asia,2629,,,Double Fish,DHS,2023-03-07
3,Singapore Smash 2023,WTT Grand Smash,Singapore,Singapore,asia,south east asia,2629,119.0,119.0,Double Fish,DHS,2023-03-07
4,Singapore Smash 2024,WTT Grand Smash,Singapore,Singapore,asia,south east asia,2904,,,Yinhe,DHS,2024-03-07
5,Singapore Smash 2024,WTT Grand Smash,Singapore,Singapore,asia,south east asia,2904,119.0,119.0,Yinhe,DHS,2024-03-07
6,Saudi Smash 2024,WTT Grand Smash,Saudi Arabia,Jeddah,asia,west asia,2932,,,Double Fish,DHS,2024-05-01
7,Saudi Smash 2024,WTT Grand Smash,Saudi Arabia,Jeddah,asia,west asia,2932,119.0,119.0,Double Fish,DHS,2024-05-01
8,China Smash 2024 Presented by Zhonghui Life,WTT Grand Smash,China,Beijing,asia,east asia,2942,,,Double Fish,DHS,2024-09-26
9,China Smash 2024 Presented by Zhonghui Life,WTT Grand Smash,China,Beijing,asia,east asia,2942,119.0,119.0,Double Fish,DHS,2024-09-26
