In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By

from bs4 import BeautifulSoup
import pandas as pd
import time
from datetime import datetime

In [3]:
def create_csv(content, csv_name):

    # Parse the page content with BeautifulSoup
    soup = BeautifulSoup(content, 'html.parser')

    # Find the table containing the records
    list = soup.find(attrs={'class':'Table_table__2zsdR RecordsTable_table__3X8lL'}).find('tbody')

    # Find all table rows
    liste_items = list.find_all('tr')

    # Initialize lists to store the scraped data
    DISCIPLINE = []
    PERF = []
    COMPETITOR = []
    DOB = []
    COUNTRY = []
    VENUE = []
    DATE = []

    # Loop through each table row
    for item in liste_items:

        # Find all table data in the row
        row = item.find_all('td')

        DISCIPLINE.append(row[0].text.strip())

        perf = row[2].text.strip()
        for r in ["*", "Mx", "Wo", "h"]:
            perf = perf.replace(r, "")
        PERF.append(perf.strip())

        COMPETITOR.append(row[4].text.strip())

        DOB.append(row[5].text.strip())
        
        COUNTRY.append(row[6].text.strip())

        VENUE.append(row[7].text.strip().replace("(i)", ""))

        DATE.append(row[8].text.strip())

    records = pd.DataFrame({'DISCIPLINE': DISCIPLINE,
                            'PERF': PERF,
                            'COMPETITOR': COMPETITOR,
                            'DOB': DOB,
                            'COUNTRY': COUNTRY,
                            'VENUE': VENUE,
                            'DATE': DATE})
    records.to_csv(csv_name, index=False)

In [4]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def click_button(driver, xpath):
    try:
        button = WebDriverWait(driver, 10).until(
            EC.element_to_be_clickable((By.XPATH, xpath))
        )
        button.click()
    except Exception as e:
        print(f"Erreur lors du clic sur le bouton : {e}")
        driver.quit()
        assert False

def get_content_and_create_csv(driver, url, button_selector, csv_name):
    """Navigate to a URL, click a button, and create a CSV from the page content."""
    driver.get(url)
    if button_selector:
        click_button(driver, button_selector)
    content = driver.page_source
    create_csv(content, csv_name)

In [7]:
categories = [
    ("world-records", "world_records"),
    ("olympic-games-records", "olympic_games_records"),
    ("african-records", "african_records"),
    ("asian-records", "asian_records"),
    ("european-records", "european_records"),
    ("nacac-records", "nacac_records"),
    ("oceanian-records", "oceanian_records"),
    ("south-american-records", "south_american_records")
]

men_button = '//*[@id="__next"]/div[3]/div/div/div[2]/ul/li[2]/button'

with webdriver.Chrome() as driver:
    for category, file_name in categories:
        base_url = "https://worldathletics.org/records/by-category/"
        get_content_and_create_csv(driver, f"{base_url}{category}", None, f"data/women_{file_name}.csv")
        get_content_and_create_csv(driver, f"{base_url}{category}", men_button, f"data/men_{file_name}.csv")

In [9]:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

chrome_options = Options()
chrome_options.add_argument("--headless")  # Active le mode headless

with webdriver.Chrome(options=chrome_options) as driver:
    for category, file_name in categories:
        base_url = "https://worldathletics.org/records/by-category/"
        get_content_and_create_csv(driver, f"{base_url}{category}", None, f"data/women_{file_name}.csv")
        get_content_and_create_csv(driver, f"{base_url}{category}", men_button, f"data/men_{file_name}.csv")

        print(f"Les fichiers {file_name}.csv ont été créés avec succès.")

Les fichiers world_records.csv ont été créés avec succès.
Les fichiers olympic_games_records.csv ont été créés avec succès.
Les fichiers african_records.csv ont été créés avec succès.
Les fichiers asian_records.csv ont été créés avec succès.
Les fichiers european_records.csv ont été créés avec succès.
Les fichiers nacac_records.csv ont été créés avec succès.
Les fichiers oceanian_records.csv ont été créés avec succès.
Les fichiers south_american_records.csv ont été créés avec succès.
