# SACEOS Information crawler.
This notebook is used for pulling and consolidating datasets found in SACEOS page https://saceos.org.sg/

Library imports

In [1]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver 
from selenium.common.exceptions import NoSuchElementException

from datetime import datetime
from selenium.webdriver.firefox.options import Options
import pandas as pd
import time

Below are the firefox profile options set to speed loading process

In [2]:
options=Options()
firefox_option = webdriver.FirefoxProfile()
firefox_option.set_preference("network.http.pipelining", True)
firefox_option.set_preference("network.http.proxy.pipelining", True)
firefox_option.set_preference("network.http.pipelining.maxrequests", 8)
firefox_option.set_preference("content.notify.interval", 500000)
firefox_option.set_preference("content.notify.ontimer", True)
firefox_option.set_preference("content.switch.threshold", 250000)
# firefox_option.set_preference("browser.cache.memory.capacity", 65536) # Increase the cache capacity.
# firefox_option.set_preference("browser.startup.homepage", "about:blank")
# firefox_option.set_preference("reader.parse-on-load.enabled", False) # Disable reader, we won't need that.
# firefox_option.set_preference("browser.pocket.enabled", False) # Duck pocket too!
# firefox_option.set_preference("loop.enabled", False)
# firefox_option.set_preference("browser.chrome.toolbar_style", 1) # Text on Toolbar instead of icons
# firefox_option.set_preference("browser.display.show_image_placeholders", False) # Don't show thumbnails on not loaded images.
# firefox_option.set_preference("browser.display.use_document_colors", False) # Don't show document colors.
# firefox_option.set_preference("browser.display.use_document_fonts", 0) # Don't load document fonts.
# firefox_option.set_preference("browser.display.use_system_colors", True) # Use system colors.
# firefox_option.set_preference("browser.formfill.enable", False) # Autofill on forms disabled.
# firefox_option.set_preference("browser.helperApps.deleteTempFileOnExit", True) # Delete temprorary files.
# firefox_option.set_preference("browser.shell.checkDefaultBrowser", False)
# firefox_option.set_preference("browser.startup.homepage", "about:blank")
# firefox_option.set_preference("browser.startup.page", 0) # blank
# firefox_option.set_preference("browser.tabs.forceHide", True) # Disable tabs, We won't need that.
# firefox_option.set_preference("browser.urlbar.autoFill", False) # Disable autofill on URL bar.
# firefox_option.set_preference("browser.urlbar.autocomplete.enabled", False) # Disable autocomplete on URL bar.
# firefox_option.set_preference("browser.urlbar.showPopup", False) # Disable list of URLs when typing on URL bar.
# firefox_option.set_preference("browser.urlbar.showSearch", False) # Disable search bar.
# firefox_option.set_preference("extensions.checkCompatibility", False) # Addon update disabled
# firefox_option.set_preference("extensions.checkUpdateSecurity", False)
# firefox_option.set_preference("extensions.update.autoUpdateEnabled", False)
# firefox_option.set_preference("extensions.update.enabled", False)
# firefox_option.set_preference("general.startup.browser", False)
# firefox_option.set_preference("plugin.default_plugin_disabled", False)
# firefox_option.set_preference("permissions.default.image", 2) # Image load disabled again
# firefox_option.set_preference("javascript.enabled", True)
options.profile = firefox_option

### Getting SG upcoming calendar of MICE events from SACEOS  https://saceos.org.sg/mice-events-calendar-listing/


# MAIN execution

In [14]:
#CFG
BASELINE_INFO_SCRAPE = False

URL = "https://saceos.org.sg/mice-events-calendar-listing/"
driver = webdriver.Firefox(options=options)
driver.get(URL)

# XPATH wher
expand_xpath= "//div[@class='elementor-toggle-item']/div[1]/a"

# Webpage wait for required xpath to load
WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, expand_xpath)))

expandable_tabs = driver.find_elements(by=By.XPATH, value=expand_xpath)
print("Clicking monthly events")
for tab in expandable_tabs:
    print(tab.text)
    print("Clicking it...")
    tab.click()
    time.sleep(2)

print("Starting to trace events")
event_tracking_list = []
# Find elements
events_xpath= "//h2//*[contains(@style, 'text-decoration: underline; color: #000000;')]"
total_events_list= driver.find_elements(by=By.XPATH, value=events_xpath)
print("Total events: ", len(total_events_list))
for event in total_events_list:
    event_title = event.text

    event_date_xpath = "./../following-sibling::ul//li//*[contains(text(), 'Date')]"
    event_location_xpath = "./../following-sibling::ul//li//*[contains(text(), 'Location')]"
    href_location_xpath = "./../following-sibling::ul//li//*[contains(text(), 'https')]"

    try:
        event_date = event.find_element(by=By.XPATH, value=event_date_xpath).text
    except NoSuchElementException:
        print("Date unknown")
        event_date= ""    
    try:
        event_location = event.find_element(by=By.XPATH, value=event_location_xpath).text
    except NoSuchElementException:
        print("Location unknown")
        event_location = ""

    try:
        event_href =  event.find_element(by=By.XPATH, value=href_location_xpath).text
    except NoSuchElementException:
        print("Ref unknown")
        event_href = ""

    if event_title == "" and event_href:
        event_title = event_href.split(".")[1]
        
    event_metadata_list = [event_title, event_date, event_location , event_href]
    print(event_metadata_list)
    event_tracking_list.append(event_metadata_list)


Clicking monthly events
January 2024
Clicking it...
February 2024
Clicking it...
March 2024
Clicking it...
April 2024
Clicking it...
May 2024
Clicking it...
June 2024
Clicking it...
July 2024
Clicking it...
August 2024
Clicking it...
September 2024
Clicking it...
October 2024
Clicking it...
November 2024
Clicking it...
December 2024
Clicking it...
April 2025
Clicking it...
May 2025
Clicking it...
Starting to trace events
Total events:  181


## Save data to csv

In [None]:
## Convert dataset dictionary to dataframe
if event_tracking_list:
    df = pd.DataFrame(event_tracking_list, columns=["Event Title", "Date", "Location", "URL"])
    
    # Drop duplicates for cases with same date/location/url
    df.drop_duplicates(subset=["Date", "Location", "URL"], inplace=True, keep="last")
    datetime_now = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    file_name_date = datetime.now().strftime("%d%m%Y_%H%M%S")

    df["Date_of_check"] = datetime_now
    df.head()

    dataset_filename = f"Eventbrite_dataset_{file_name_date}.csv"
    df.to_csv(dataset_filename, index=False, encoding='utf-8')


# Close driver upon completion of saving file
driver.quit()
