## Introduction
This notebook scrapes climber names from the International Federation of Sport Climbing (IFSC) rankings website. It extracts data for both boulder and lead categories, for men and women.

### Setup and Imports
Let's start by importing the necessary libraries and suppressing warnings.

In [10]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import pandas as pd
import os

 ### Scraping IFSC

In [11]:
def scrape_ifsc_names(url, category_name):
    """Scrape climber names using Selenium for dynamic content."""
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service)

    print(f"Scraping {category_name} from {url}")
    driver.get(url)

    # Handle cookie popup
    try:
        accept_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept')]"))
        )
        accept_button.click()
        print(f"Accepted cookie popup for {category_name}")
    except Exception as e:
        print(f"No cookie popup found or error accepting it for {category_name}: {e}")

    # Determine discipline from category_name
    discipline = "lead" if "lead" in category_name else "boulder"
    tab_name = "Lead" if discipline == "lead" else "Boulder"

    # Click the appropriate tab
    try:
        tab = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.XPATH, f"//a[contains(@class, 'd3-ty-navigation-large') and contains(text(), '{tab_name}')]"))
        )
        tab.click()
        print(f"Clicked '{tab_name}' tab for {category_name}")

        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "font-normal"))
        )
        print(f"{tab_name} rankings data loaded for {category_name}!")
    except Exception as e:
        print(f"Could not click '{tab_name}' tab or load data for {category_name}: {e}")
        print(f"Attempting to proceed with URL as-is...")

    # Capture page source and parse
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")
    driver.quit()

    # Extract names
    first_names = soup.find_all("span", class_="font-normal")
    surnames = soup.find_all("span", class_="font-bold uppercase")

    print(f"Found {len(first_names)} first names for {category_name}")
    print(f"Found {len(surnames)} surnames for {category_name}")

    climbers = []
    if len(first_names) == len(surnames) and len(first_names) > 0:
        for fname, sname in zip(first_names, surnames):
            full_name = f"{fname.text.strip()} {sname.text.strip()}"
            climbers.append({"name": full_name})
    else:
        print(f"Mismatch in {category_name}—trying athlete links")
        athlete_links = soup.find_all("a", class_="hover:text-blue-aa")
        for link in athlete_links:
            name_span = link.find("span")
            if name_span:
                fname = name_span.find("span", class_="font-normal")
                sname = name_span.find("span", class_="font-bold uppercase")
                if fname and sname:
                    full_name = f"{fname.text.strip()} {sname.text.strip()}"
                    climbers.append({"name": full_name})

    print(f"Collected {len(climbers)} climbers for {category_name}")
    return climbers

In [12]:
def save_to_csv(data, filename, columns):
    """Save data to CSV in ifsc_data folder."""
    # Create ifsc_data folder if it doesn't exist
    os.makedirs("ifsc_data", exist_ok=True)
    filepath = os.path.join("ifsc_data", filename)
    df = pd.DataFrame(data, columns=columns)
    df.to_csv(filepath, index=False)
    print(f"Saved {len(data)} rows to {filepath}")

In [13]:
def main():
    print("Starting scraping process...")

    # Define categories and URLs
    categories = [
        ("boulder_men", "https://www.ifsc-climbing.org/rankings/index?discipline=boulder&category=men"),
        ("boulder_women", "https://www.ifsc-climbing.org/rankings/index?discipline=boulder&category=women"),
        ("lead_men", "https://www.ifsc-climbing.org/rankings/index?discipline=lead&category=men"),
        ("lead_women", "https://www.ifsc-climbing.org/rankings/index?discipline=lead&category=women")
    ]

    # Scrape and save each category
    for category_name, url in categories:
        climbers = scrape_ifsc_names(url, category_name)
        save_to_csv(climbers, f"{category_name}.csv", ["name"])
        print(f"Finished {category_name}\n")

In [14]:
if __name__ == "__main__":
    main()

Starting scraping process...
Scraping boulder_men from https://www.ifsc-climbing.org/rankings/index?discipline=boulder&category=men
Accepted cookie popup for boulder_men
Clicked 'Boulder' tab for boulder_men
Boulder rankings data loaded for boulder_men!
Found 210 first names for boulder_men
Found 210 surnames for boulder_men
Collected 210 climbers for boulder_men
Saved 210 rows to ifsc_data/boulder_men.csv
Finished boulder_men

Scraping boulder_women from https://www.ifsc-climbing.org/rankings/index?discipline=boulder&category=women
Accepted cookie popup for boulder_women
Clicked 'Boulder' tab for boulder_women
Boulder rankings data loaded for boulder_women!
Found 186 first names for boulder_women
Found 186 surnames for boulder_women
Collected 186 climbers for boulder_women
Saved 186 rows to ifsc_data/boulder_women.csv
Finished boulder_women

Scraping lead_men from https://www.ifsc-climbing.org/rankings/index?discipline=lead&category=men
Accepted cookie popup for lead_men
Clicked 'Lead