In [1]:
import urllib.request  # For fetching the webpage content
from bs4 import BeautifulSoup  # For parsing HTML content
from datetime import datetime
import csv
import os

In [2]:
# Helper functions
def clean_text(text):
    """Remove unwanted characters and extra whitespace from text."""
    return " ".join(text.strip().split()) if text else ""

In [3]:
VLR_MAIN_URL = 'www.vlr.gg'
vct_event_url = 'https://www.vlr.gg/events'
main_completed_classname= 'events-container'


In [None]:
contents = urllib.request.urlopen(vct_event_url).read()
soup = BeautifulSoup(contents, "html.parser")
links_divs = soup.find_all("div", {"class": main_completed_classname})
links = links_divs[0].find_all("a", {"class": "wf-card mod-flex event-item"})

VCT_EVENTS = [
    ["event_title", "vct_url", "status", "price_pool", "timeline", "region", "logo_url"]
]

for link in links:
    title_elem = link.find("div", {"class": "event-item-title"})
    title = clean_text(title_elem.text) if title_elem else ""

    vct_url = VLR_MAIN_URL + link["href"] if "href" in link.attrs else ""

    # event-item-desc-item-status mod-completed
    status_ele = link.find(
        "span", {"class": "event-item-desc-item-status"}
    )
    status = (
        clean_text(status_ele.contents[0]) if status_ele and status_ele.contents else ""
    ).upper()

    prize_elem = link.find("div", {"class": "event-item-desc-item mod-prize"})
    prize_pool = (
        clean_text(prize_elem.contents[0]) if prize_elem and prize_elem.contents else ""
    )

    date_elem = link.find("div", {"class": "event-item-desc-item mod-dates"})

    timeline = (
        clean_text(date_elem.contents[0]) if date_elem and date_elem.contents else ""
    )

    region_elem = link.find("i", {"class": "flag"})
    region = region_elem["class"][-1][-2:].upper() if region_elem else ""

    logo_elem = link.find("div", {"class": "event-item-thumb"}).find("img")
    logo_url = logo_elem["src"][2:] if logo_elem else ""

    VCT_EVENTS.append([title, vct_url, status, prize_pool, timeline, region, logo_url])

In [None]:
parent_dir = os.path.dirname(os.getcwd())
file_path = os.path.join(parent_dir, "dataset", "vct_events.csv")
os.makedirs(os.path.dirname(file_path), exist_ok=True)

# Write to CSV
with open(file_path, "a+", newline="") as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(
        ["Fetch Timestamp", datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z")]
    )
    writer.writerows(VCT_EVENTS)