## Introduction
This notebook scrapes climber names from the International Federation of Sport Climbing (IFSC) rankings website. It extracts data for both boulder and lead categories, for men and women.

### Setup and Imports
Let's start by importing the necessary libraries and suppressing warnings.

In [4]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager

 ### Scraping IFSC

In [6]:
def scrape_ifsc_names(url, category_name):
    """Scrape climber names using Selenium for dynamic content."""
    # Use webdriver-manager to handle chromedriver automatically
    service = Service(ChromeDriverManager().install())
    driver = webdriver.Chrome(service=service)

    print(f"Scraping {category_name} from {url}")
    driver.get(url)

    # Handle cookie popup
    try:
        accept_button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, "//button[contains(text(), 'Accept')]"))  # Adjust if needed
        )
        accept_button.click()
        print(f"Accepted cookie popup for {category_name}")
    except Exception as e:
        print(f"No cookie popup found or error accepting it for {category_name}: {e}")

    # Determine discipline from category_name
    discipline = "lead" if "lead" in category_name else "boulder"
    tab_name = "Lead" if discipline == "lead" else "Boulder"

    # Click the appropriate tab
    try:
        # Locate the tab (Boulder or Lead) using its class and text
        tab = WebDriverWait(driver, 15).until(
            EC.element_to_be_clickable((By.XPATH, f"//a[contains(@class, 'd3-ty-navigation-large') and contains(text(), '{tab_name}')]"))
        )
        tab.click()
        print(f"Clicked '{tab_name}' tab for {category_name}")

        # Wait for the rankings data to load (check for names)
        WebDriverWait(driver, 15).until(
            EC.presence_of_element_located((By.CLASS_NAME, "font-normal"))  # Wait for a name to appear
        )
        print(f"{tab_name} rankings data loaded for {category_name}!")
    except Exception as e:
        print(f"Could not click '{tab_name}' tab or load data for {category_name}: {e}")
        print(f"Attempting to proceed with URL as-is...")

    # Capture the page source
    html = driver.page_source
    with open(f"{category_name}_page.html", "w", encoding="utf-8") as f:
        f.write(html)
    print(f"Saved HTML to {category_name}_page.html")

    soup = BeautifulSoup(html, "html.parser")
    driver.quit()

    # Extract names
    first_names = soup.find_all("span", class_="font-normal")
    surnames = soup.find_all("span", class_="font-bold uppercase")

    print(f"Found {len(first_names)} first names for {category_name}")
    print(f"Found {len(surnames)} surnames for {category_name}")

    climbers = []
    if len(first_names) == len(surnames) and len(first_names) > 0:
        for fname, sname in zip(first_names, surnames):
            full_name = f"{fname.text.strip()} {sname.text.strip()}"
            climbers.append({"name": full_name})
            print(f"Added to {category_name}: {full_name}")
    else:
        print(f"Mismatch in {category_name}—trying athlete links")
        athlete_links = soup.find_all("a", class_="hover:text-blue-aa")
        for link in athlete_links:
            name_span = link.find("span")
            if name_span:
                fname = name_span.find("span", class_="font-normal")
                sname = name_span.find("span", class_="font-bold uppercase")
                if fname and sname:
                    full_name = f"{fname.text.strip()} {sname.text.strip()}"
                    climbers.append({"name": full_name})
                    print(f"Added to {category_name} from link: {full_name}")

    return climbers

In [7]:
def save_to_csv(data, filename, columns):
    """Save data to CSV."""
    df = pd.DataFrame(data, columns=columns)
    df.to_csv(filename, index=False)
    print(f"Saved {len(data)} rows to {filename}")

In [8]:
def main():
    print("Starting scraping process...")

    # Define categories and URLs
    categories = [
        ("boulder_men", "https://www.ifsc-climbing.org/rankings/index?discipline=boulder&category=men"),
        ("boulder_women", "https://www.ifsc-climbing.org/rankings/index?discipline=boulder&category=women"),
        ("lead_men", "https://www.ifsc-climbing.org/rankings/index?discipline=lead&category=men"),
        ("lead_women", "https://www.ifsc-climbing.org/rankings/index?discipline=lead&category=women")
    ]

    # Scrape and save each category
    for category_name, url in categories:
        climbers = scrape_ifsc_names(url, category_name)
        save_to_csv(climbers, f"{category_name}.csv", ["name"])
        print(f"Finished {category_name}\n")

In [9]:
if __name__ == "__main__":
    main()

Starting scraping process...
Scraping boulder_men from https://www.ifsc-climbing.org/rankings/index?discipline=boulder&category=men
Accepted cookie popup for boulder_men
Clicked 'Boulder' tab for boulder_men
Boulder rankings data loaded for boulder_men!
Saved HTML to boulder_men_page.html
Found 210 first names for boulder_men
Found 210 surnames for boulder_men
Added to boulder_men: sorato ANRAKU
Added to boulder_men: dohyun LEE
Added to boulder_men: meichi NARASAKI
Added to boulder_men: tomoa NARASAKI
Added to boulder_men: sohta AMAGASA
Added to boulder_men: toby ROBERTS
Added to boulder_men: sam AVEZOU
Added to boulder_men: maximillian MILNE
Added to boulder_men: jongwon CHON
Added to boulder_men: manuel CORNU
Added to boulder_men: anze PEHARC
Added to boulder_men: dayan AKHTAR
Added to boulder_men: ritsu KAYOTANI
Added to boulder_men: yuji FUJIWAKI
Added to boulder_men: hannes VAN DUYSEN
Added to boulder_men: jakob SCHUBERT
Added to boulder_men: mejdi SCHALCK
Added to boulder_men: ad