In [344]:
import pandas as pd
import os
import sys
import glob
import requests
import json
from typing import List, Dict, Union, Tuple, Any, Optional
from datetime import datetime
import re

In [350]:
#----------- Configuration--------------------

"""
Events Shortlist is processed to reformat dates for better readability.
Only desired columns are kept.
Placeholder columns are added for ball and table sponsors that will be added in 06_gets_sponsors_and_finalise_events

"""


# Input file from script 02 (processed and tagged events)
SHORTLIST_FILE = "../Data/Processed/Events/shortlist_events.csv"

# payloads directory, where all payloads are stored:
SINGLES_PAYLOADS_DIR = "../Data/Processed/Singles_match_payloads"

date = datetime.now().strftime("%Y%m%d")


INTERMEDIATE_DIR = "../Data/Processed/Events/Intermediate"
INTERMEDIATE_NAME = f"{date}_intermediate_events.csv"



# Columns that are dropped after all processing is done.
COLUMNS_TO_DROP = [
    'StartDateTime', 'EndDateTime' ,# Replaced by StartDate , EndDate 
    'Subcontinent',
    'ContinentCode',
    'EventCode',
    'Tags',
    'EventTypeId',
    # Columns for rearranged events, blank for relevant events at time of writing this code.
    # so will simply be overlooked for simplicity.
    'ToStartDate', 'ToEndDate', 
    'PageLink',
    'Comments',
    'EventDateChangeId',
    'FromStartDate',
    'ToEndDate',
    'FromEndDate',
    'Type',
    'ShowInCalendar',
    'Event_Tier_Name'
    
]

In [351]:
def filter_by_payloads(shortlist_df: pd.DataFrame, singles_payloads_dir:str) -> pd.DataFrame:
    
    print("--- 🟠 Filtering Events By Payloads 🟠 ---")
    """
    Function is used to further filter down the events_shortlist.
    Only keeping events where valid, non-doubles, match payloads are available.

    Returns a new DF containing these selected events with valid payloads.
    """
    valid_event_ids = []
    payloads_files = glob.glob(f"{singles_payloads_dir}/*csv")
    for file in payloads_files:
        try: 
            payloads_df = pd.read_csv(file)
            event_id = payloads_df["eventId"].iloc[0]
           
            valid_event_ids.append(event_id)
        except Exception as e:
            print (e,file)
   


    mask = shortlist_df["eventId"].isin(valid_event_ids)
    valid_events_df = shortlist_df[mask].copy()
    valid_events_df = valid_events_df.reset_index(drop=True, inplace=False)

    print(f"\n--- ✅ Found {len(valid_events_df)} events with valid payloads ---")
    return valid_events_df    

In [352]:
d

NameError: name 'd' is not defined

In [353]:
if __name__ == "__main__": 
   
    
   


    # Try to read csv to shortlist df file
    # Catch error if file does not exist and exit.
    
    try:        
        shortlist_df = pd.read_csv(SHORTLIST_FILE)
    except FileNotFoundError:
        print(f"--- ❌ ERROR: Shortlist file not found: {SHORTLIST_FILE}. ---")
        sys.exit(1)
    

    # valid events are events where singles match payloads exist
    valid_events_df = filter_by_payloads(shortlist_df, SINGLES_PAYLOADS_DIR)
    
    # Convert start and end date to format yyyy-mm-dd for legibility
    valid_events_df['StartDate'] = pd.to_datetime(valid_events_df['StartDateTime']).dt.normalize()
    valid_events_df['EndDate'] = pd.to_datetime(valid_events_df['EndDateTime']).dt.normalize()

    # create place holders for relevant sponsor details
    valid_events_df["BallSponsor"] = "TBC"
    valid_events_df["TableSponsor"] = "TBC"


        
    valid_events_df = valid_events_df.drop(columns= COLUMNS_TO_DROP)

    output_path = os.path.join(INTERMEDIATE_DIR,INTERMEDIATE_NAME)
   

    valid_events_df.to_csv(output_path, index=False)
    

    print(f"--- ✅ Intermediate Events File saved to {output_path}")

    


--- 🟠 Filtering Events By Payloads 🟠 ---

--- ✅ Found 183 events with valid payloads ---
--- ✅ Intermediate Events File saved to ../Data/Processed/Events/Intermediate/20251027_intermediate_events.csv


In [349]:
valid_events_df

Unnamed: 0,EventName,EventType,Country,City,eventId,Completed,StartDate,EndDate,BallSponsor,TableSponsor
0,WTT Contender Doha 2021,WTT Contender,Qatar,Doha,2410,True,2021-02-28,2021-03-06,TBC,TBC
1,WTT Star Contender Doha 2021,WTT Star Contender,Qatar,Doha,2411,True,2021-03-05,2021-03-13,TBC,TBC
2,Tokyo 2020 Olympic Games,Olympic Games,Japan,Tokyo,2345,True,2021-07-23,2021-08-08,TBC,TBC
3,WTT Contender Budapest 2021,WTT Contender,Hungary,Budapest,2487,True,2021-08-15,2021-08-20,TBC,TBC
4,2021 ITTF Czech International Open,ITTF International Open,Czechia,Olomouc,2480,True,2021-08-21,2021-08-25,TBC,TBC
...,...,...,...,...,...,...,...,...,...,...
178,WTT Champions Macao 2025 Presented by Galaxy E...,WTT Champions,"Macao, China",Macao,3097,True,2025-09-09,2025-09-14,TBC,TBC
179,WTT Feeder Istanbul 2025,WTT Feeder,Türkiye,Istanbul,3199,True,2025-09-11,2025-09-15,TBC,TBC
180,WTT Feeder Cappadocia II 2025,WTT Feeder,Türkiye,Cappadocia - Nevsehir,3031,True,2025-09-16,2025-09-20,TBC,TBC
181,China Smash 2025 Presented by Beijing Shijings...,WTT Grand Smash,China,Beijing,3098,True,2025-09-25,2025-10-05,TBC,TBC
