# EventBrite Information crawler.
This notebook is used for pulling and consolidating datasets found in EventBrite page

Library imports

In [4]:
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver 
from selenium.common.exceptions import NoSuchElementException

from datetime import datetime
from selenium.webdriver.firefox.options import Options
import pandas as pd

Below are the firefox profile options set to speed loading process

In [5]:
options=Options()
firefox_option = webdriver.FirefoxProfile()
firefox_option.set_preference("network.http.pipelining", True)
firefox_option.set_preference("network.http.proxy.pipelining", True)
firefox_option.set_preference("network.http.pipelining.maxrequests", 8)
firefox_option.set_preference("content.notify.interval", 500000)
firefox_option.set_preference("content.notify.ontimer", True)
firefox_option.set_preference("content.switch.threshold", 250000)
firefox_option.set_preference("browser.cache.memory.capacity", 65536) # Increase the cache capacity.
firefox_option.set_preference("browser.startup.homepage", "about:blank")
firefox_option.set_preference("reader.parse-on-load.enabled", False) # Disable reader, we won't need that.
firefox_option.set_preference("browser.pocket.enabled", False) # Duck pocket too!
firefox_option.set_preference("loop.enabled", False)
firefox_option.set_preference("browser.chrome.toolbar_style", 1) # Text on Toolbar instead of icons
firefox_option.set_preference("browser.display.show_image_placeholders", False) # Don't show thumbnails on not loaded images.
firefox_option.set_preference("browser.display.use_document_colors", False) # Don't show document colors.
firefox_option.set_preference("browser.display.use_document_fonts", 0) # Don't load document fonts.
firefox_option.set_preference("browser.display.use_system_colors", True) # Use system colors.
firefox_option.set_preference("browser.formfill.enable", False) # Autofill on forms disabled.
firefox_option.set_preference("browser.helperApps.deleteTempFileOnExit", True) # Delete temprorary files.
firefox_option.set_preference("browser.shell.checkDefaultBrowser", False)
firefox_option.set_preference("browser.startup.homepage", "about:blank")
firefox_option.set_preference("browser.startup.page", 0) # blank
firefox_option.set_preference("browser.tabs.forceHide", True) # Disable tabs, We won't need that.
firefox_option.set_preference("browser.urlbar.autoFill", False) # Disable autofill on URL bar.
firefox_option.set_preference("browser.urlbar.autocomplete.enabled", False) # Disable autocomplete on URL bar.
firefox_option.set_preference("browser.urlbar.showPopup", False) # Disable list of URLs when typing on URL bar.
firefox_option.set_preference("browser.urlbar.showSearch", False) # Disable search bar.
firefox_option.set_preference("extensions.checkCompatibility", False) # Addon update disabled
firefox_option.set_preference("extensions.checkUpdateSecurity", False)
firefox_option.set_preference("extensions.update.autoUpdateEnabled", False)
firefox_option.set_preference("extensions.update.enabled", False)
firefox_option.set_preference("general.startup.browser", False)
firefox_option.set_preference("plugin.default_plugin_disabled", False)
firefox_option.set_preference("permissions.default.image", 2) # Image load disabled again
firefox_option.set_preference("javascript.enabled", False)
options.profile = firefox_option

### Getting SG upcoming calendar of events from Eventbrite https://www.eventbrite.sg/d/singapore--singapore/all-events/?page=1
As the events are paginated in listing and can be accessed by changing te parameter in url, it makes scraping easier.

# MAIN execution


In [6]:
URL = "https://www.eventbrite.sg/d/singapore--singapore/all-events/?page=1"
driver = webdriver.Firefox(options=options)
driver.get(URL)

# XPATH
page_results_xpath= ".//*/li[contains(@class, 'eds-pagination__navigation-minimal')]"

# Webpage wait for required xpath to load
WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, page_results_xpath)))

# FInd elements
try:
    total_page = driver.find_element(by=By.XPATH, value=page_results_xpath)
except NoSuchElementException:
    print("Unable to find required page information")

event_tracking_list = []
for i,event in enumerate(range(total_page), start=1):
    # For each event, extract title, status of event, and the dates and location which event occur. For multiple dates/locations, this will be added as separate entries
    event_card_xpath = ".//div[contains(@class, 'discover-search-desktop-card discover-search-desktop-card--hiddeable')]/section/div/section[@class='event-card-details']"
    URL = f"https://www.eventbrite.sg/d/singapore--singapore/all-events/?page={i}"
    driver.get(URL)

    # XPATH
    page_results_xpath= ".//*/li[contains(@class, 'eds-pagination__navigation-minimal')]"

    # Webpage wait for required xpath to load
    WebDriverWait(driver, 3).until(EC.presence_of_element_located((By.XPATH, page_results_xpath)))

    event_cards = event.find_elements(by=By.XPATH, value=event_card_xpath)
    for event_info in event_cards:
        title_xpath = "./a"
        date_location_xpath = "./p"

        try:
            event_title = event.find_element(by=By.XPATH, value=title_xpath).text
            href = event_title.get_attribute("href")
        except NoSuchElementException:
            print("No title info found, skipping to next")
            event_title = "" 
            href = ""
            continue

        date_location_list = event.find_elements(by=By.XPATH, value=date_location_xpath)

        if not date_location_list:
            print("No date or location info found. Continuing to next event")
            continue
        if len(date_location_list) == 1:
            date_info = ""
            try:
                location_info = event.find_element(by=By.XPATH, value="./p[1]").text
            except NoSuchElementException:
                location_info = ""
        elif len(date_location_list) == 2: 
            try:
                date_info = event.find_element(by=By.XPATH, value="./p[1]").text
            except NoSuchElementException:
                date_info = ""
            try:
                location = event.find_element(by=By.XPATH, value="./p[2]").text
            except NoSuchElementException:
                location_info = ""
        else: 
            print("Excessive information found. Unsure how to handle")

        upcoming_dates_representation_list = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday", "Tomorrow"]
        # Click on href
        if any(day in date_info for day in upcoming_dates_representation_list):
            new_driver = webdriver.Firefox(options=options)
            new_driver.get(href)

            button_info_xpath = ".//div[@class='event-listing__view-more-details-btn-container']/button"

            try:
                load_all_button = new_driver.find_element(by=By.XPATH, value=button_info_xpath )
                load_all_button.click()
            except NoSuchElementException:
                print("No view all event details button found")

            date_info_xpath = ".//span[@class='date-info__full-datetime']"
            WebDriverWait(new_driver, 5).until(EC.presence_of_element_located((By.XPATH, date_info_xpath)))

            try:
                date =new_driver.find_element(by=By.XPATH, value=date_info_xpath ).click()
            except NoSuchElementException:
                print("No date info found")
                date = ""
        # Extract info through the loop if available
        event_metadata_list = [f"{event_title}", date_info, location_info,  href]
        print(event_metadata_list)
        event_tracking_list.append(event_metadata_list)

# Find total entries based on date and location regardless of multiple same event information 
print("Upcoming eventbrute events:",len(event_tracking_list))

SyntaxError: 'break' outside loop (3189244789.py, line 19)

## Save data to csv

In [None]:
## Convert dataset dictionary to dataframe
if event_tracking_list:
    df = pd.DataFrame(event_tracking_list, columns=["Event Title", "Date", "Location", "URL"])

    datetime_now = datetime.now().strftime("%d/%m/%Y %H:%M:%S")
    file_name_date = datetime.now().strftime("%d%m%Y_%H%M%S")

    df["Date_of_check"] = datetime_now
    df.head()

    dataset_filename = f"Eventbrite_dataset_{file_name_date}.csv"
    df.to_csv(dataset_filename, index=False, encoding='utf-8')


# Close driver upon completion of saving file
driver.quit()
df.head()

Unnamed: 0,Event Title,Date,Location,Status,URL,Date_of_check
0,West Mall BT21-Themed CNY Activities & Promos,05 Jan - 24 Feb 2024,"1 Bukit Batok Central, Singapore 658713",ENDED,https://thesmartlocal.com/read/west-mall-bt21-...,15/04/2024 16:35:29
1,Comma Creative Arts Festival 2024,12 - 28 Jan 2024,"2 Orchard Link, Singapore 237978",ENDED,https://thesmartlocal.com/read/comma-creative-...,15/04/2024 16:35:29
2,Light To Night 2024,19 Jan - 08 Feb 2024,Civic District,ENDED,https://thesmartlocal.com/read/light-to-night-...,15/04/2024 16:35:29
3,Creative Intersections: Traces Of Dragons,19 Jan - 25 Feb 2024,Funan,ENDED,https://thesmartlocal.com/read/singapore-art-w...,15/04/2024 16:35:29
4,Artbox Avenue 2024,26 Jan - 04 Feb 2024,"Singapore Expo Hall 2, 1 Expo Drive, Singapore...",ENDED,https://thesmartlocal.com/read/artbox-avenue-2...,15/04/2024 16:35:29
