
 # Valorant Masters Toronto 2025 Player Statistics Scraper
 This script scrapes player statistics from the Valorant Masters Toronto 2025 event page on vlr.gg, processes the data, and saves it to a CSV file.

In [7]:
from urllib.request import urlopen
from urllib.error import URLError
from bs4 import BeautifulSoup  # For parsing HTML content
from datetime import datetime
import logging
import csv
import os

In [8]:
# LOGGING SETUP
log_filename = os.path.join(
    os.path.dirname(os.getcwd()),
    "scraping",
    f"vct_processing_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log",
)

logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler(
            log_filename, mode="w"
        ),  # Use 'w' to overwrite for each run
        logging.StreamHandler(),
    ],
    force=True,
)
logger = logging.getLogger(os.getlogin())

In [9]:
# Helper functions
def clean_text(text):
    """Remove unwanted characters and extra whitespace from text."""
    return ' '.join(text.strip().split())

In [10]:
def fetchStats(event_url, output_file_path):
    main_table_classname = "wf-table mod-stats mod-scroll"

    # Fetch and parse HTML
    with urlopen(event_url) as response:
        soup = BeautifulSoup(response, "html.parser")

    # Find stats table
    stats_table = soup.find("table", class_=main_table_classname)
    if not stats_table:
        return  # Exit if table not found

    rows = stats_table.find_all("tr")
    if not rows:
        return  # Exit if no rows found

    # Initialize data matrix
    players_stat_mat = []

    # Process headers
    headers = [clean_text(th.text) for th in rows[0].find_all("th")]
    headers.insert(1, "Team")  # Insert 'Team' after 'Player'
    players_stat_mat.append(headers)

    # Process data rows
    for row in rows[1:]:
        tds = row.find_all("td")
        if not tds:
            continue  # Skip empty rows

        data = []
        for td in tds:
            if "class" in td.attrs:
                if "mod-player" in td["class"]:
                    name_team = td.get_text(",", True).split(",")
                    data.extend([clean_text(ele) for ele in name_team])
                    continue
                elif "mod-agents" in td["class"]:
                    agents = [
                        img["src"].split("/")[-1][:-4] for img in td.find_all("img")
                    ]
                    data.append(agents or [""])  # Handle empty agent list
                    continue
            data.append(clean_text(td.text))

        players_stat_mat.append(data)

    # Write to CSV
    with open(
        output_file_path, "w", newline=""
    ) as csvfile:  # Use 'w' to overwrite for fresh data
        writer = csv.writer(csvfile)
        writer.writerow(
            ["Fetch Timestamp", datetime.now().strftime("%Y-%m-%d %H:%M:%S %Z")]
        )
        writer.writerows(players_stat_mat)

In [None]:
parent_dir = os.path.dirname(os.getcwd())
dataset_dir = os.path.join(parent_dir, "dataset")
vct_evnt_file = os.path.join(dataset_dir, "vct_events.csv")
logger.info("Starting VCT events processing")

try:
    with open(vct_evnt_file, "r", newline="") as csvfile:
        logger.debug(f"Reading CSV file: {vct_evnt_file}")
        reader = csv.reader(csvfile)
        rows = list(reader)
        data = [row[:3] for row in rows[2:]]
        data = list(filter(lambda row: row[2] == "COMPLETED", data))
        print(data)
        logger.info(f"Extracted {len(data)} events from CSV")
except FileNotFoundError as e:
    logger.error(f"Failed to open CSV file {vct_evnt_file}: {str(e)}")
    data = []

for event_name, event_url in data:
    try:
        logger.debug(f"Processing event: {event_name} ({event_url})")
        output_file_path = os.path.join(dataset_dir, event_name, "player_stats.csv")
        os.makedirs(os.path.dirname(output_file_path), exist_ok=True)
        logger.debug(f"Created directory for {output_file_path}")

        x = event_url.split("/")
        x.insert(2, "stats")
        stats_url = "https://" + "/".join(x) + "?exclude=32232&min_rounds=0&agent=all"
        logger.debug(f"Constructed stats URL: {stats_url}")

        fetchStats(stats_url, output_file_path)
        logger.info(f"Successfully processed event: {event_name}")
    except (URLError, ValueError, IndexError) as e:
        logger.error(f"Error processing {event_name} ({event_url}): {str(e)}")
        continue
    except Exception as e:
        logger.exception(
            f"Unexpected error processing {event_name} ({event_url}): {str(e)}"
        )
        continue

logger.info("VCT events processing completed")

2025-07-06 16:52:14,246 - root - INFO - Starting VCT events processing


<filter object at 0x1057c7940>


TypeError: object of type 'filter' has no len()